diff --git a/packages/kokkos/.github/ISSUE_TEMPLATE/bug_report.md b/packages/kokkos/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000000000000000000000000000000000..5a259e3a58c9660e01c2981228ac816525aeea63
--- /dev/null
+++ b/packages/kokkos/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,21 @@
+---
+name: Bug report
+about: Create a report to correct failures and improve our code
+title: ''
+labels: ''
+assignees: ''
+---
+**Describe the bug**
+Please provide a concise, clear description of the bug, as well as any available error logs.
+
+**Please also include the following items to support reproducing the bug**
+1. compilers (with versions)
+2. Kokkos release or commit used (i.e., the sha1 number)
+3. platform and backend
+4. cmake configure command
+5. output from cmake command
+6. code needed to reproduce the bug
+7. command line needed to reproduce the bug
+7. please also attach the `KokkosCore_config.h` header file (generated during the build);
+**Any additional info**
+Please provide any additional context about the issue here.
diff --git a/packages/kokkos/.github/workflows/cancelling.yml b/packages/kokkos/.github/workflows/cancelling.yml
deleted file mode 100644
index fa30adf956e1c272c1b8d29d131f225b1ff94919..0000000000000000000000000000000000000000
--- a/packages/kokkos/.github/workflows/cancelling.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-name: cancel-builds-on-update
-on:
-  workflow_run:
-    workflows: ['github-Linux', 'github-OSX']
-    types: ['requested']
-
-jobs:
-  cancel-duplicate-workflow-runs:
-    name: "Cancel duplicate workflow runs"
-    runs-on: ubuntu-latest
-    steps:
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel duplicate workflow runs"
-        with:
-          cancelMode: duplicates
-          cancelFutureDuplicates: true
-          token: ${{ secrets.GITHUB_TOKEN }}
-          sourceRunId: ${{ github.event.workflow_run.id }}
-          notifyPRCancel: true
-          skipEventTypes: '["push", "schedule"]'
diff --git a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml
index b76167f330a87eaf79af25f706c33d3e910865d1..f6a8f22308c6bdfc12fb79582755e7f0bf59337a 100644
--- a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml
+++ b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml
@@ -1,6 +1,10 @@
 name: github-Linux
 on: [push, pull_request]
 
+concurrency:
+  group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{github.event_name == 'pull_request'}}
+
 jobs:
   CI:
     continue-on-error: true
@@ -43,6 +47,9 @@ jobs:
       - name: maybe_disable_death_tests
         if: ${{ matrix.distro == 'fedora:rawhide' }}
         run: echo "GTEST_FILTER=-*DeathTest*" >> $GITHUB_ENV
+      - name: maybe_use_external_gtest
+        if: ${{ matrix.distro == 'ubuntu:latest' }}
+        run: sudo apt-get update && sudo apt-get install -y libgtest-dev
       - name: CMake
         run: |
           cmake -B builddir \
diff --git a/packages/kokkos/.github/workflows/osx.yml b/packages/kokkos/.github/workflows/osx.yml
index 178af12405cc2f6fc24a5ae46adf034b1c73a94e..69a09adf89d1496a91858284633db1bcb4a7a81c 100644
--- a/packages/kokkos/.github/workflows/osx.yml
+++ b/packages/kokkos/.github/workflows/osx.yml
@@ -2,6 +2,10 @@ name: github-OSX
 
 on: [push, pull_request]
 
+concurrency:
+  group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{github.event_name == 'pull_request'}}
+
 jobs:
   osxci:
     name: osx-ci
@@ -12,7 +16,7 @@ jobs:
         include:
           - backend: "SERIAL"
             cmake_build_type: "RelWithDebInfo"
-          - backend: "PTHREAD"
+          - backend: "THREADS"
             cmake_build_type: "RelWithDebInfo"
           - backend: "SERIAL"
             cmake_build_type: "Debug"
diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo
index bfbe5e6fd3ec3ae381fe5adbd8b39d0797bff2fa..6b9388486be0998ff3de0c89d365e3f245d0231a 100644
--- a/packages/kokkos/.gitrepo
+++ b/packages/kokkos/.gitrepo
@@ -6,7 +6,7 @@
 [subrepo]
 	remote = git@github.com:kokkos/kokkos.git
 	branch = master
-	commit = 2879e23507bcb21adb739d6317b3430f665de4a6
-	parent = 36833c0c0fc1a841eaed63df6b7d34609307f2a5
+	commit = b52f8c835f4df003954dad66d9761094f8baa66c
+	parent = 1994bb4f069142aa3f886d30aa0585ed9117eed1
 	method = merge
 	cmdver = 0.4.3
diff --git a/packages/kokkos/.jenkins b/packages/kokkos/.jenkins
index 09e8515e96f2bb8255bcaba7304780b484f303ea..b5d7fc3071d286694c40a1ce111a8fb04a1134a5 100644
--- a/packages/kokkos/.jenkins
+++ b/packages/kokkos/.jenkins
@@ -28,6 +28,32 @@ pipeline {
 
         stage('Build') {
             parallel {
+                stage('CUDA-11.4-NVHPC') {
+                    agent {
+                        dockerfile {
+                            filename 'Dockerfile.nvhpc'
+                            dir 'scripts/docker'
+                            label 'nvidia-docker && volta && large_images'
+                            args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES'
+                        }
+                    }
+                    steps {
+                        sh '''rm -rf build && mkdir -p build && cd build && \
+                              /opt/cmake/bin/cmake \
+                                -DCMAKE_BUILD_TYPE=Debug \
+                                -DCMAKE_CXX_COMPILER=nvc++ \
+                                -DCMAKE_CXX_FLAGS=-Werror \
+                                -DCMAKE_CXX_STANDARD=17 \
+                                -DKokkos_ENABLE_COMPILER_WARNINGS=ON \
+                                -DKokkos_ENABLE_DEPRECATED_CODE_3=OFF \
+                                -DKokkos_ENABLE_TESTS=ON \
+                                -DKokkos_ENABLE_CUDA=ON \
+                                -DKokkos_ENABLE_CUDA_LAMBDA=ON \
+                                -DKokkos_ENABLE_OPENMP=ON \
+                              .. && \
+                              make -j8 && ctest --verbose'''
+                    }
+                }
                 stage('SYCL-OneAPI') {
                     agent {
                         dockerfile {
@@ -44,10 +70,10 @@ pipeline {
                                 -DCMAKE_BUILD_TYPE=Release \
                                 -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
                                 -DCMAKE_CXX_COMPILER=clang++ \
-                                -DCMAKE_CXX_FLAGS="-Werror -Wno-unknown-cuda-version -Wno-gnu-zero-variadic-macro-arguments" \
+                                -DCMAKE_CXX_FLAGS="-Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-linker-warnings" \
                                 -DKokkos_ARCH_VOLTA70=ON \
                                 -DKokkos_ENABLE_COMPILER_WARNINGS=ON \
-                                -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \
+                                -DKokkos_ENABLE_DEPRECATED_CODE_3=OFF \
                                 -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \
                                 -DKokkos_ENABLE_EXAMPLES=ON \
                                 -DKokkos_ENABLE_TESTS=ON \
@@ -63,12 +89,12 @@ pipeline {
                         }
                     }
                 }
-                stage('HIP-ROCm-4.2-C++14') {
+                stage('HIP-ROCm-4.5-C++14') {
                     agent {
                         dockerfile {
                             filename 'Dockerfile.hipcc'
                             dir 'scripts/docker'
-                            additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:4.2'
+                            additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:4.5'
                             label 'rocm-docker && vega'
                             args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES'
                         }
@@ -92,6 +118,7 @@ pipeline {
                                 -DKokkos_ENABLE_TESTS=ON \
                                 -DKokkos_ENABLE_HIP=ON \
                                 -DKokkos_ENABLE_OPENMP=ON \
+                                -DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS=ON \
                               .. && \
                               make -j8 && ctest --verbose'''
                     }
@@ -101,12 +128,12 @@ pipeline {
                         }
                     }
                 }
-                stage('HIP-ROCm-4.2-C++17') {
+                stage('HIP-ROCm-4.5-C++17') {
                     agent {
                         dockerfile {
                             filename 'Dockerfile.hipcc'
                             dir 'scripts/docker'
-                            additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:4.2'
+                            additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:4.5'
                             label 'rocm-docker && vega'
                             args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES'
                         }
@@ -133,12 +160,12 @@ pipeline {
                         }
                     }
                 }
-                stage('OPENMPTARGET-ROCm-4.2') {
+                stage('OPENMPTARGET-ROCm-4.5') {
                     agent {
                         dockerfile {
                             filename 'Dockerfile.hipcc'
                             dir 'scripts/docker'
-                            additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:4.2'
+                            additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:4.5'
                             label 'rocm-docker && vega && AMD_Radeon_Instinct_MI60'
                             args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES'
                         }
@@ -156,7 +183,7 @@ pipeline {
                               cmake \
                                 -Bbuild \
                                 -DCMAKE_BUILD_TYPE=Debug \
-                                -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
+                                -DCMAKE_CXX_COMPILER=amdclang++ \
                                 -DCMAKE_CXX_STANDARD=17 \
                                 -DKokkos_ENABLE_COMPILER_WARNINGS=ON \
                                 -DKokkos_ENABLE_DEPRECATED_CODE_3=OFF \
@@ -379,7 +406,7 @@ pipeline {
                         }
                     }
                 }
-                stage('GCC-5.3.0') {
+                stage('GCC-5.3.1') {
                     agent {
                         dockerfile {
                             filename 'Dockerfile.gcc'
@@ -404,7 +431,7 @@ pipeline {
                                 -DKokkos_ENABLE_OPENMP=ON \
                                 -DKokkos_ENABLE_LIBDL=OFF \
                                 -DKokkos_ENABLE_LIBQUADMATH=ON \
-                                -DCMAKE_PREFIX_PATH=/usr/local/lib/gcc/x86_64-unknown-linux-gnu/5.3.0 \
+                                -DCMAKE_PREFIX_PATH=/usr/lib/gcc/x86_64-linux-gnu/5.3.1 \
                               .. && \
                               make -j8 && ctest --verbose && gcc -I$PWD/../core/src/ ../core/unit_test/tools/TestCInterface.c'''
                     }
diff --git a/packages/kokkos/BUILD.md b/packages/kokkos/BUILD.md
index bb1a31f266ec487c27daced9971d481e41d81c2d..114baf99f1dd32937e35147065f04a512f0fd257 100644
--- a/packages/kokkos/BUILD.md
+++ b/packages/kokkos/BUILD.md
@@ -119,8 +119,8 @@ Device backends can be enabled by specifying `-DKokkos_ENABLE_X`.
 * Kokkos_ENABLE_OPENMP
     * Whether to build OpenMP backend
     * BOOL Default: OFF
-* Kokkos_ENABLE_PTHREAD
-    * Whether to build Pthread backend
+* Kokkos_ENABLE_THREADS
+    * Whether to build C++ thread backend
     * BOOL Default: OFF
 * Kokkos_ENABLE_SERIAL
     * Whether to build serial backend
@@ -178,7 +178,7 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
     * Whether to print information about which profiling tools gotloaded
     * BOOL Default: OFF
 * Kokkos_ENABLE_TESTS
-    * Whether to build serial  backend
+    * Whether to enable test suite
     * BOOL Default: OFF
 
 ## Other Options
diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md
index 2e779791dde2a83394662f640f90626f66696f28..dfbe22eddefef9d634aeb21ccde99d3f0d1a7628 100644
--- a/packages/kokkos/CHANGELOG.md
+++ b/packages/kokkos/CHANGELOG.md
@@ -1,5 +1,120 @@
 # Change Log
 
+## [3.6.00](https://github.com/kokkos/kokkos/tree/3.6.00) (2022-02-18)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/3.5.00...3.6.00)
+
+### Features:
+- Add C++ standard algorithms [\#4315](https://github.com/kokkos/kokkos/pull/4315)
+- Implement `fill_random` for `DynRankView` [\#4763](https://github.com/kokkos/kokkos/pull/4763)
+- Add `bhalf_t` [\#4543](https://github.com/kokkos/kokkos/pull/4543) [\#4653](https://github.com/kokkos/kokkos/pull/4653)
+- Add mathematical constants [\#4519](https://github.com/kokkos/kokkos/pull/4519)
+- Allow `Kokkos::{create_mirror*,resize,realloc}` to be used with `WithoutInitializing` [\#4486](https://github.com/kokkos/kokkos/pull/4486) [\#4337](https://github.com/kokkos/kokkos/pull/4337)
+- Implement `KOKKOS_IF_ON_{HOST,DEVICE}` macros [\#4660](https://github.com/kokkos/kokkos/pull/4660)
+- Allow setting the CMake language for Kokkos [\#4323](https://github.com/kokkos/kokkos/pull/4323)
+
+#### Perf bug fix
+- Desul: Add ScopeCaller [\#4690](https://github.com/kokkos/kokkos/pull/4690)
+- Enable Desul atomics by default when using Makefiles [\#4606](https://github.com/kokkos/kokkos/pull/4606)
+- Unique token improvement [\#4741](https://github.com/kokkos/kokkos/pull/4741) [\#4748](https://github.com/kokkos/kokkos/pull/4748)
+
+#### Other improvements:
+- Add math function long double overload on the host side [\#4712](https://github.com/kokkos/kokkos/pull/4712)
+
+### Deprecations:
+- Array reductions with pointer return types [\#4756](https://github.com/kokkos/kokkos/pull/4756)
+- Deprecate `partition_master`, `validate_partition` [\#4737](https://github.com/kokkos/kokkos/pull/4737)
+- Deprecate `Kokkos_ENABLE_PTHREAD` in favor of `Kokkos_ENABLE_THREADS` [\#4619](https://github.com/kokkos/kokkos/pull/4619) ** pair with use std::threads **
+- Deprecate `log2(unsigned) -> int` (removing in next release) [\#4595](https://github.com/kokkos/kokkos/pull/4595)
+- Deprecate `Kokkos::Impl::is_view` [\#4592](https://github.com/kokkos/kokkos/pull/4592)
+- Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros and the `ActiveExecutionMemorySpace` alias [\#4668](https://github.com/kokkos/kokkos/issues/4668)
+
+### Backends and Archs Enhancements:
+
+#### SYCL:
+- Update required SYCL compiler version [\#4749](https://github.com/kokkos/kokkos/pull/4749)
+- Cap vector size to kernel maximum for SYCL [\#4704](https://github.com/kokkos/kokkos/pull/4704)
+- Improve check for compatibility of vector size and subgroup size in SYCL [\#4579](https://github.com/kokkos/kokkos/pull/4579)
+- Provide `chunk_size` for SYCL [\#4635](https://github.com/kokkos/kokkos/pull/4635)
+- Use host-pinned memory for SYCL kernel memory [\#4627](https://github.com/kokkos/kokkos/pull/4627)
+- Use shuffle-based algorithm for scalar reduction [\#4608](https://github.com/kokkos/kokkos/pull/4608)
+- Implement pool of USM IndirectKernelMemory [\#4596](https://github.com/kokkos/kokkos/pull/4596)
+- Provide valid default team size for SYCL [\#4481](https://github.com/kokkos/kokkos/pull/4481)
+
+#### CUDA:
+- Add checks for shmem usage in `parallel_reduce` [\#4548](https://github.com/kokkos/kokkos/pull/4548)
+
+#### HIP:
+- Add support for fp16 in the HIP backend [\#4688](https://github.com/kokkos/kokkos/pull/4688)
+- Disable multiple kernel instantiations when using HIP (configure with `-DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS=ON` to use) [\#4644](https://github.com/kokkos/kokkos/pull/4644)
+- Fix HIP scratch use per instance [\#4439](https://github.com/kokkos/kokkos/pull/4439)
+- Change allocation header to 256B alignment for AMD VEGA architecture [\#4753](https://github.com/kokkos/kokkos/pull/4753)
+- Add generic `KOKKOS_ARCH_VEGA` macro [\#4782](https://github.com/kokkos/kokkos/pull/4782)
+- Require ROCm 4.5 [\#4689](https://github.com/kokkos/kokkos/pull/4689)
+
+### HPX:
+- Adapt to HPX 1.7.0 which is now required [\#4241](https://github.com/kokkos/kokkos/pull/4241)
+
+#### OpenMP:
+- Fix thread deduction for OpenMP for `thread_count==0` [\#4541](https://github.com/kokkos/kokkos/pull/4541)
+
+#### OpenMPTarget:
+- Update memory space `size_type` to improve performance (`size_t -> unsigned`) [\#4779](https://github.com/kokkos/kokkos/pull/4779)
+
+#### Other Improvements:
+- Improve NVHPC support [\#4599](https://github.com/kokkos/kokkos/pull/4599)
+- Add `Kokkos::Experimental::{min,max,minmax,clamp}` [\#4629](https://github.com/kokkos/kokkos/pull/4629) [\#4506](https://github.com/kokkos/kokkos/pull/4506)
+- Use device type as template argument in Containers and Algorithms [\#4724](https://github.com/kokkos/kokkos/pull/4724) [\#4675](https://github.com/kokkos/kokkos/pull/4675)
+- Implement `Kokkos::sort` with execution space [\#4490](https://github.com/kokkos/kokkos/pull/4490)
+- `Kokkos::resize` always error out for mismatch in runtime rank [\#4681](https://github.com/kokkos/kokkos/pull/4681)
+- Print current call stack when calling `Kokkos::abort()` from the host [\#4672](https://github.com/kokkos/kokkos/pull/4672) [\#4671](https://github.com/kokkos/kokkos/pull/4671)
+- Detect mismatch of execution spaces in functors [\#4655](https://github.com/kokkos/kokkos/pull/4655)
+- Improve view label access on host [\#4647](https://github.com/kokkos/kokkos/pull/4647)
+- Error out for `const` scalar return type in reduction [\#4645](https://github.com/kokkos/kokkos/pull/4645)
+- Don't allow calling `UnorderdMap::value_at` for a set [\#4639](https://github.com/kokkos/kokkos/pull/4639)
+- Add `KOKKOS_COMPILER_NVHPC` macro, disable `quiet_NaN` and `signaling_NaN` [\#4586](https://github.com/kokkos/kokkos/pull/4586)
+- Improve performance of `local_deep_copy` [\#4511](https://github.com/kokkos/kokkos/pull/4511)
+- Improve performance when sorting integers [\#4464](https://github.com/kokkos/kokkos/pull/4464)
+- Add missing numeric traits (`denorm_min`, `reciprocal_overflow_threshold`, `{quiet,silent}_NaN}`) and make them work on cv-qualified types [\#4466](https://github.com/kokkos/kokkos/pull/4466) [\#4415](https://github.com/kokkos/kokkos/pull/4415) [\#4473](https://github.com/kokkos/kokkos/pull/4473) [\#4443](https://github.com/kokkos/kokkos/pull/4443)
+
+### Implemented enhancements BuildSystem
+- Manually compute IntelLLVM compiler version for older CMake versions [\#4760](https://github.com/kokkos/kokkos/pull/4760)
+- Add Xptxas without = to `nvcc_wrapper` [\#4646](https://github.com/kokkos/kokkos/pull/4646)
+- Use external GoogleTest optionally [\#4563](https://github.com/kokkos/kokkos/pull/4563)
+- Silent warnings about multiple optimization flags with `nvcc_wrapper` [\#4502](https://github.com/kokkos/kokkos/pull/4502)
+- Use the same flags in Makefile.kokkos for POWER7/8/9 as for CMake [\#4483](https://github.com/kokkos/kokkos/pull/4483)
+- Fix support for A64FX architecture [\#4745](https://github.com/kokkos/kokkos/pull/4745)
+
+### Incompatibilities:
+- Drop `KOKKOS_ARCH_HIP` macro when using generated GNU makefiles [\#4786](https://github.com/kokkos/kokkos/pull/4786)
+- Remove gcc-toolchain auto add for clang in Makefile.kokkos [\#4762](https://github.com/kokkos/kokkos/pull/4762)
+
+### Bug Fixes:
+- Lock constant memory in Cuda/HIP kernel launch with a mutex (thread safety) [\#4525](https://github.com/kokkos/kokkos/pull/4525)
+- Fix overflow for large requested scratch allocation [\#4551](https://github.com/kokkos/kokkos/pull/4551)
+- Fix Windows build in mingw [\#4564](https://github.com/kokkos/kokkos/pull/4564)
+- Fix `kokkos_launch_compiler`: escape `$` character [\#4769](https://github.com/kokkos/kokkos/pull/4769) [\#4703](https://github.com/kokkos/kokkos/pull/4703)
+- Fix math functions with NVCC and GCC 5 as host compiler [\#4733](https://github.com/kokkos/kokkos/pull/4733)
+- Fix shared build with Intel19 [\#4725](https://github.com/kokkos/kokkos/pull/4725)
+- Do not install empty `desul/src/` directory [\#4714](https://github.com/kokkos/kokkos/pull/4714)
+- Fix wrong `device_id` computation in `identifier_from_devid` (Profiling Interface) [\#4694](https://github.com/kokkos/kokkos/pull/4694)
+- Fix a bug in CUDA scratch memory pool (abnormally high memory consumption) [\#4673](https://github.com/kokkos/kokkos/pull/4673)
+- Remove eval of command args in `hpcbind` [\#4630](https://github.com/kokkos/kokkos/pull/4630)
+- SYCL fix to run when no GPU is detected [\#4623](https://github.com/kokkos/kokkos/pull/4623)
+- Fix `layout_strides::span` for rank-0 views [\#4605](https://github.com/kokkos/kokkos/pull/4605)
+- Fix SYCL atomics for local memory [\#4585](https://github.com/kokkos/kokkos/pull/4585)
+- Hotfix `mdrange_large_deep_copy` for SYCL [\#4581](https://github.com/kokkos/kokkos/pull/4581)
+- Fix bug when sorting integer using the HIP backend [\#4570](https://github.com/kokkos/kokkos/pull/4570)
+- Fix compilation error when using HIP with RDC [\#4553](https://github.com/kokkos/kokkos/pull/4553)
+- `DynamicView`: Fix deallocation extent [\#4533](https://github.com/kokkos/kokkos/pull/4533)
+- SYCL fix running parallel_reduce with TeamPolicy for large ranges [\#4532](https://github.com/kokkos/kokkos/pull/4532)
+- Fix bash syntax error in `nvcc_wrapper` [\#4524](https://github.com/kokkos/kokkos/pull/4524)
+- OpenMPTarget `team_policy` reduce fixes for `init/join` reductions [\#4521](https://github.com/kokkos/kokkos/pull/4521)
+- Avoid hangs in the Threads backend [\#4499](https://github.com/kokkos/kokkos/pull/4499)
+- OpenMPTarget fix reduction bug in `parallel_reduce` for `TeamPolicy` [\#4491](https://github.com/kokkos/kokkos/pull/4491)
+- HIP fix scratch space per instance [\#4439](https://github.com/kokkos/kokkos/pull/4439)
+- OpenMPTarget fix team scratch allocation [\#4431](https://github.com/kokkos/kokkos/pull/4431)
+
+
 ## [3.5.00](https://github.com/kokkos/kokkos/tree/3.5.00) (2021-10-19)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.01...3.5.00)
 
diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt
index 1b6753f983db34e64bc1e10bfc3f008c6fec5ede..e1c6893725eab7cd75ca388863abf6a146d07f93 100644
--- a/packages/kokkos/CMakeLists.txt
+++ b/packages/kokkos/CMakeLists.txt
@@ -1,7 +1,7 @@
 
 # Disable in-source builds to prevent source tree corruption.
 if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" )
-  message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files." )
+  message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files and delete CMakeCache.txt." )
 endif()
 
 # We want to determine if options are given with the wrong case
@@ -75,7 +75,31 @@ IF(NOT KOKKOS_HAS_TRILINOS)
   cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
   set(CMAKE_DISABLE_SOURCE_CHANGES ON)
   set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
+
+  # What language are we compiling Kokkos as
+  # downstream dependencies need to match this!
+  SET(KOKKOS_COMPILE_LANGUAGE CXX)
+  # use lower case here since we didn't parse options yet
+  IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
+
+    # Without this as a language for the package we would get a C++ compiler enabled.
+    # but we still need a C++ compiler even if we build all our cpp files as CUDA only
+    # because otherwise the C++ features don't work etc.
+    # This is just the rather odd way CMake does this, since CUDA doesn't imply C++ even
+    # though it is a C++ extension ... (but I guess it didn't use to be back in CUDA 4 or 5
+    # days.
+    SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX)
+
+    IF (Kokkos_ENABLE_CUDA)
+      SET(KOKKOS_COMPILE_LANGUAGE CUDA)
+    ENDIF()
+  ENDIF()
+
   IF (Spack_WORKAROUND)
+    IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
+      MESSAGE(FATAL_ERROR "Can't currently use Kokkos_ENABLE_COMPILER_AS_CMAKE_LANGUAGE in a spack installation!")
+    ENDIF()
+
     #if we are explicitly using Spack for development,
     #nuke the Spack compiler
     SET(SPACK_CXX $ENV{SPACK_CXX})
@@ -86,7 +110,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
   ENDIF()
   # Always call the project command to define Kokkos_ variables
   # and to make sure that C++ is an enabled language
-  PROJECT(Kokkos CXX)
+  PROJECT(Kokkos ${KOKKOS_COMPILE_LANGUAGE} ${KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE})
   IF(NOT HAS_PARENT)
     IF (NOT CMAKE_BUILD_TYPE)
       SET(DEFAULT_BUILD_TYPE "RelWithDebInfo")
@@ -111,7 +135,7 @@ ENDIF()
 
 
 set(Kokkos_VERSION_MAJOR 3)
-set(Kokkos_VERSION_MINOR 5)
+set(Kokkos_VERSION_MINOR 6)
 set(Kokkos_VERSION_PATCH 00)
 set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
 math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
@@ -151,6 +175,10 @@ IF(NOT MSVC)
     GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -DKOKKOS_DEPENDENCE)
 ENDIF()
 
+IF(Kokkos_ENABLE_TESTS AND NOT KOKKOS_HAS_TRILINOS)
+  find_package(GTest)
+ENDIF()
+
 # Include a set of Kokkos-specific wrapper functions that
 # will either call raw CMake or TriBITS
 # These are functions like KOKKOS_INCLUDE_DIRECTORIES
@@ -174,10 +202,6 @@ KOKKOS_SETUP_BUILD_ENVIRONMENT()
 # 7) Export and install targets
 
 OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF)
-# Workaround for building position independent code.
-IF(BUILD_SHARED_LIBS)
-  SET(CMAKE_POSITION_INDEPENDENT_CODE ON)
-ENDIF()
 
 SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontainers Kokkos::kokkosalgorithms)
 SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms)
diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos
index 7ab18f5894e8880bd0584a4815f2260e07772cfb..b1afed5d0663a9f70a0896236dd08b71fdf90c2e 100644
--- a/packages/kokkos/Makefile.kokkos
+++ b/packages/kokkos/Makefile.kokkos
@@ -1,13 +1,13 @@
 # Default settings common options.
 
 KOKKOS_VERSION_MAJOR = 3
-KOKKOS_VERSION_MINOR = 5
+KOKKOS_VERSION_MINOR = 6
 KOKKOS_VERSION_PATCH = 00
 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
 
-# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Pthread,Serial
+# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
 #KOKKOS_DEVICES ?= "OpenMP"
-KOKKOS_DEVICES ?= "Pthread"
+KOKKOS_DEVICES ?= "Threads"
 # Options:
 # Intel:    KNC,KNL,SNB,HSW,BDW,SKX
 # NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86
@@ -15,6 +15,7 @@ KOKKOS_DEVICES ?= "Pthread"
 # IBM:      BGQ,Power7,Power8,Power9
 # AMD-GPUS: Vega900,Vega906,Vega908,Vega90A
 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
+# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP
 KOKKOS_ARCH ?= ""
 # Options: yes,no
 KOKKOS_DEBUG ?= "no"
@@ -22,7 +23,7 @@ KOKKOS_DEBUG ?= "no"
 KOKKOS_USE_TPLS ?= ""
 # Options: c++14,c++1y,c++17,c++1z,c++2a
 KOKKOS_CXX_STANDARD ?= "c++14"
-# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings
+# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings,disable_desul_atomics
 KOKKOS_OPTIONS ?= ""
 KOKKOS_CMAKE ?= "no"
 KOKKOS_TRIBITS ?= "no"
@@ -82,7 +83,9 @@ KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS
 KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
 KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr)
 KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
+# deprecated
 KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics)
+KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_desul_atomics)
 KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
 KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecation_warnings)
 
@@ -90,12 +93,18 @@ KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),
 
 # Check for Kokkos Host Execution Spaces one of which must be on.
 KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP)
-KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread)
+KOKKOS_INTERNAL_USE_THREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Threads)
+# deprecated
+KOKKOS_INTERNAL_USE_PTHREAD := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread)
 KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX)
 KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial)
+ifeq ($(KOKKOS_INTERNAL_USE_PTHREAD), 1)
+  KOKKOS_INTERNAL_USE_THREADS := 1
+  $(warning Warning: Pthread is deprecated. Use Threads instead! KOKKOS_DEVICES=$(KOKKOS_DEVICES))
+endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
-  ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
+  ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 0)
     ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
       KOKKOS_INTERNAL_USE_SERIAL := 1
     endif
@@ -115,7 +124,7 @@ endif
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
   KOKKOS_DEVICELIST += OpenMP
 endif
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
   KOKKOS_DEVICELIST += Threads
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
@@ -461,7 +470,7 @@ ifneq ($(KOKKOS_CMAKE), yes)
   # CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command
   KOKKOS_CXXLDFLAGS = -L$(shell pwd)
 endif
-KOKKOS_LINK_FLAGS = 
+KOKKOS_LINK_FLAGS =
 KOKKOS_SRC =
 KOKKOS_HEADERS =
 
@@ -493,7 +502,7 @@ tmp := $(call kokkos_append_header,'$H''endif')
 tmp := $(call kokkos_append_header,"")
 tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION $(KOKKOS_VERSION)")
 tmp := $(call kokkos_append_header,"")
-	
+
 tmp := $(call kokkos_append_header,"/* Execution Spaces */")
 
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
@@ -520,7 +529,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
   tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMP')
 endif
 
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
   tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_THREADS")
 endif
 
@@ -941,18 +950,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
   ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
 
   else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) 
-        KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
-        KOKKOS_LDFLAGS  += -mcpu=power8 -mtune=power8
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-
-      else 
-        # Assume that this is a really a GNU compiler on P8.
-        KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
-        KOKKOS_LDFLAGS  += -mcpu=power8 -mtune=power8
-      endif
-    endif
+    KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
+    KOKKOS_LDFLAGS  += -mcpu=power8 -mtune=power8
   endif
 endif
 
@@ -962,18 +961,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
   ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
 
   else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) 
-        KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
-        KOKKOS_LDFLAGS  += -mcpu=power9 -mtune=power9
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-
-      else 
-        # Assume that this is a really a GNU compiler on P9
-        KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
-        KOKKOS_LDFLAGS  += -mcpu=power9 -mtune=power9
-      endif
-    endif
+    KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
+    KOKKOS_LDFLAGS  += -mcpu=power9 -mtune=power9
   endif
 endif
 
@@ -1191,29 +1180,32 @@ endif
 ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
   # Lets start with adding architecture defines
   ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
-    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 900")
     tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
     KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
   endif
   ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
-    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 906")
     tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
     KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
   endif
   ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1)
-    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 908")
     tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
     KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx908
   endif
   ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1)
-    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 90A")
     tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
     KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx90a
   endif
 
 
   KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp)
   KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp)
+  ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0)
+    KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp
+  endif
 
   KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
   KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
@@ -1271,8 +1263,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
   KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG)
 endif
 
-ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1)
+ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0)
   tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS")
+else
+  ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1)
+    $(error Contradictory Desul atomics options: KOKKOS_OPTIONS=$(KOKKOS_OPTIONS) )
+  endif
 endif
 
 KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
@@ -1313,7 +1309,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
   ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
     tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SYCL.hpp>","KokkosCore_Config_FwdBackend.hpp")
     tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SYCL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
-    tmp := $(call kokkos_append_config_header,"$H""include <setup/Kokkos_Setup_SYCL.hpp>","KokkosCore_Config_SetupBackend.hpp")
   endif
   ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
     tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HIP.hpp>","KokkosCore_Config_FwdBackend.hpp")
@@ -1324,7 +1319,7 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
     tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENMP.hpp>","KokkosCore_Config_FwdBackend.hpp")
     tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENMP.hpp>","KokkosCore_Config_DeclareBackend.hpp")
   endif
-  ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
+  ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
     tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_THREADS.hpp>","KokkosCore_Config_FwdBackend.hpp")
     tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_THREADS.hpp>","KokkosCore_Config_DeclareBackend.hpp")
   endif
@@ -1353,7 +1348,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
 
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
   KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
-  ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1)
+  ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0)
     KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp
   endif
   KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
@@ -1405,7 +1400,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
   KOKKOS_LINK_FLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
 endif
 
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
   KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
   KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
   KOKKOS_LIBS += -lpthread
@@ -1439,14 +1434,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
   KOKKOS_TPL_LIBRARY_NAMES += hpx
 endif
 
-# Explicitly set the GCC Toolchain for Clang.
-ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-  KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
-  KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
-  KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
-  KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
-endif
-
 # Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning.
 ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
   KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
@@ -1493,7 +1480,7 @@ libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
 	ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
 	ranlib libkokkos.a
 
-print-cxx-flags: 
+print-cxx-flags:
 	echo "$(KOKKOS_CXXFLAGS)"
 
 KOKKOS_LINK_DEPENDS=libkokkos.a
diff --git a/packages/kokkos/Makefile.targets b/packages/kokkos/Makefile.targets
index 93854d0cf150c97d5058422b7ca9ff28ce2ba8b6..a9cb12e1b46f3e8baa443576528b3eee07c9fded 100644
--- a/packages/kokkos/Makefile.targets
+++ b/packages/kokkos/Makefile.targets
@@ -10,6 +10,8 @@ Kokkos_Stacktrace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_S
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Stacktrace.cpp
 Kokkos_ExecPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp
+Kokkos_Command_Line_Parsing.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Command_Line_Parsing.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Command_Line_Parsing.cpp
 Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
 Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
@@ -68,9 +70,11 @@ Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
 Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
+Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp
 endif
 
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
 Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
 Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
diff --git a/packages/kokkos/README.md b/packages/kokkos/README.md
index 673f4627125223f0d09f1e184d9942c0e6a0b7ff..f6c500cc1a73ee937064b848d4edd1633d6c596c 100644
--- a/packages/kokkos/README.md
+++ b/packages/kokkos/README.md
@@ -48,14 +48,14 @@ For specifics see the LICENSE file contained in the repository or distribution.
 Generally Kokkos should work with all compiler versions newer than the minimum.
 However as in all sufficiently complex enough code, we have to work around compiler
 bugs with almost all compilers. So compiler versions we don't test may have issues
-we are unaware off.
+we are unaware of.
 
 * GCC: 5.3.0
 * Clang: 4.0.0
 * Intel: 17.0.1
 * NVCC: 9.2.88
 * NVC++: 21.5
-* ROCM: 4.3
+* ROCm: 4.3
 * MSVC: 19.29
 * IBM XL: 16.1.1
 * Fujitsu: 4.5.0
@@ -70,7 +70,7 @@ we are unaware off.
 * MSVC: 19.29
 * ARM/Clang: 20.1
 * IBM XL: 16.1.1
-* ROCM: 4.3.0
+* ROCm: 4.3.0
 
 ### Build system:
 
@@ -80,7 +80,7 @@ we are unaware off.
 
 Primary tested compiler are passing in release mode
 with warnings as errors. They also are tested with a comprehensive set of
-backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...).
+backend combinations (i.e. OpenMP, Threads, Serial, OpenMP+Serial, ...).
 We are using the following set of flags:
 * GCC:
    ````
@@ -193,7 +193,7 @@ The main reason is that you may otherwise need many different
 configurations of Kokkos installed depending on the required compile time
 features an application needs. For example there is only one default
 execution space, which means you need different installations to have OpenMP
-or Pthreads as the default space. Also for the CUDA backend there are certain
+or C++ threads as the default space. Also for the CUDA backend there are certain
 choices, such as allowing relocatable device code, which must be made at
 installation time. Building Kokkos inline uses largely the same process
 as compiling an application against an installed Kokkos library.
diff --git a/packages/kokkos/Spack.md b/packages/kokkos/Spack.md
index 31a07deb56a0c9dc09e4453e196a8c8302634b19..79606c259d5b5e840ff40240de4fc1087bad2c4d 100644
--- a/packages/kokkos/Spack.md
+++ b/packages/kokkos/Spack.md
@@ -24,20 +24,22 @@ By default, Spack doesn't 'see' anything on your system - including things like
 This can be limited by adding a `packages.yaml` to your `$HOME/.spack` folder that includes CMake (and CUDA, if applicable).  For example, your `packages.yaml` file could be:
 ````yaml
 packages:
- cuda:
-  modules:
-   cuda@10.1.243: [cuda/10.1.243]
-  paths:
-   cuda@10.1.243:
-    /opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243
-  buildable: false
- cmake:
-  modules:
-   cmake: [cmake/3.16.8]
-  paths:
-   cmake:
-    /opt/local/ppc64le/cmake/3.16.8
-  buildable: false
+  cuda:
+    buildable: false
+    externals:
+    - prefix: /opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243
+      spec: cuda@10.1.243
+    - modules:
+      - cuda/10.1.243
+      spec: cuda@10.1.243
+  cmake:
+    buildable: false
+    externals:
+    - prefix: /opt/local/ppc64le/cmake/3.16.8
+      spec: cmake@3.16.8
+    - modules:
+      - cmake/3.16.8
+      spec: cmake@3.16.8
 ````
 The `modules` entry is only necessary on systems that require loading Modules (i.e. most DOE systems).
 The `buildable` flag is useful to make sure Spack crashes if there is a path error,
diff --git a/packages/kokkos/algorithms/src/CMakeLists.txt b/packages/kokkos/algorithms/src/CMakeLists.txt
index cf5564032c20bdae4593f44fc66c8b1e39e0833f..4b60d887f79bd7737a42554808ac1de0f4bb5ac7 100644
--- a/packages/kokkos/algorithms/src/CMakeLists.txt
+++ b/packages/kokkos/algorithms/src/CMakeLists.txt
@@ -10,6 +10,7 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 FILE(GLOB ALGO_HEADERS *.hpp)
 FILE(GLOB ALGO_SOURCES *.cpp)
 LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
+APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp)
 
 INSTALL (
   DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
diff --git a/packages/kokkos/algorithms/src/Kokkos_Random.hpp b/packages/kokkos/algorithms/src/Kokkos_Random.hpp
index 46b8ab87fabfbeabda12beb3ddabf0eb6aab3482..59c11afd9a24b146792e384d831a896ec17b1ba3 100644
--- a/packages/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/packages/kokkos/algorithms/src/Kokkos_Random.hpp
@@ -466,6 +466,25 @@ struct rand<Generator, Kokkos::Experimental::half_t> {
 };
 #endif  // defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
 
+#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
+template <class Generator>
+struct rand<Generator, Kokkos::Experimental::bhalf_t> {
+  using bhalf = Kokkos::Experimental::bhalf_t;
+  KOKKOS_INLINE_FUNCTION
+  static bhalf max() { return bhalf(1.0); }
+  KOKKOS_INLINE_FUNCTION
+  static bhalf draw(Generator& gen) { return bhalf(gen.frand()); }
+  KOKKOS_INLINE_FUNCTION
+  static bhalf draw(Generator& gen, const bhalf& range) {
+    return bhalf(gen.frand(float(range)));
+  }
+  KOKKOS_INLINE_FUNCTION
+  static bhalf draw(Generator& gen, const bhalf& start, const bhalf& end) {
+    return bhalf(gen.frand(float(start), float(end)));
+  }
+};
+#endif  // defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
+
 template <class Generator>
 struct rand<Generator, float> {
   KOKKOS_INLINE_FUNCTION
@@ -499,7 +518,7 @@ struct rand<Generator, double> {
 };
 
 template <class Generator>
-struct rand<Generator, Kokkos::complex<float> > {
+struct rand<Generator, Kokkos::complex<float>> {
   KOKKOS_INLINE_FUNCTION
   static Kokkos::complex<float> max() {
     return Kokkos::complex<float>(1.0, 1.0);
@@ -528,7 +547,7 @@ struct rand<Generator, Kokkos::complex<float> > {
 };
 
 template <class Generator>
-struct rand<Generator, Kokkos::complex<double> > {
+struct rand<Generator, Kokkos::complex<double>> {
   KOKKOS_INLINE_FUNCTION
   static Kokkos::complex<double> max() {
     return Kokkos::complex<double>(1.0, 1.0);
@@ -617,24 +636,23 @@ struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::OpenMPTarget>
     : std::false_type {};
 #endif
 
-template <class ExecutionSpace>
+template <class DeviceType>
 struct Random_UniqueIndex {
-  using locks_view_type = View<int**, ExecutionSpace>;
+  using locks_view_type = View<int**, DeviceType>;
   KOKKOS_FUNCTION
   static int get_state_idx(const locks_view_type) {
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-    const int i = ExecutionSpace::impl_hardware_thread_id();
-    return i;
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST(
+        (return DeviceType::execution_space::impl_hardware_thread_id();))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 };
 
 #ifdef KOKKOS_ENABLE_CUDA
-template <>
-struct Random_UniqueIndex<Kokkos::Cuda> {
-  using locks_view_type = View<int**, Kokkos::Cuda>;
+template <class MemorySpace>
+struct Random_UniqueIndex<Kokkos::Device<Kokkos::Cuda, MemorySpace>> {
+  using locks_view_type =
+      View<int**, Kokkos::Device<Kokkos::Cuda, MemorySpace>>;
   KOKKOS_FUNCTION
   static int get_state_idx(const locks_view_type& locks_) {
 #ifdef __CUDA_ARCH__
@@ -660,9 +678,11 @@ struct Random_UniqueIndex<Kokkos::Cuda> {
 #endif
 
 #ifdef KOKKOS_ENABLE_HIP
-template <>
-struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
-  using locks_view_type = View<int**, Kokkos::Experimental::HIP>;
+template <class MemorySpace>
+struct Random_UniqueIndex<
+    Kokkos::Device<Kokkos::Experimental::HIP, MemorySpace>> {
+  using locks_view_type =
+      View<int**, Kokkos::Device<Kokkos::Experimental::HIP, MemorySpace>>;
   KOKKOS_FUNCTION
   static int get_state_idx(const locks_view_type& locks_) {
 #ifdef __HIP_DEVICE_COMPILE__
@@ -688,18 +708,37 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
 #endif
 
 #ifdef KOKKOS_ENABLE_SYCL
-template <>
-struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
-  using locks_view_type = View<int**, Kokkos::Experimental::SYCL>;
+template <class MemorySpace>
+struct Random_UniqueIndex<
+    Kokkos::Device<Kokkos::Experimental::SYCL, MemorySpace>> {
+  using locks_view_type =
+      View<int**, Kokkos::Device<Kokkos::Experimental::SYCL, MemorySpace>>;
   KOKKOS_FUNCTION
   static int get_state_idx(const locks_view_type& locks_) {
-#ifdef KOKKOS_ARCH_INTEL_GPU
-    int i = Kokkos::Impl::clock_tic() % locks_.extent(0);
-#else
-    int i = 0;
-#endif
+    auto item = sycl::ext::oneapi::experimental::this_nd_item<3>();
+    std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1),
+                                item.get_local_id(0)};
+    std::size_t blockIdx[3]  = {item.get_group(2), item.get_group(1),
+                               item.get_group(0)};
+    std::size_t blockDim[3] = {item.get_local_range(2), item.get_local_range(1),
+                               item.get_local_range(0)};
+    std::size_t gridDim[3]  = {
+        item.get_global_range(2) / item.get_local_range(2),
+        item.get_global_range(1) / item.get_local_range(1),
+        item.get_global_range(0) / item.get_local_range(0)};
+    const int i_offset =
+        (threadIdx[0] * blockDim[1] + threadIdx[1]) * blockDim[2] +
+        threadIdx[2];
+    int i =
+        (((blockIdx[0] * gridDim[1] + blockIdx[1]) * gridDim[2] + blockIdx[2]) *
+             blockDim[0] * blockDim[1] * blockDim[2] +
+         i_offset) %
+        locks_.extent(0);
     while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
-      i = (i + 1) % static_cast<int>(locks_.extent(0));
+      i += blockDim[0] * blockDim[1] * blockDim[2];
+      if (i >= static_cast<int>(locks_.extent(0))) {
+        i = i_offset;
+      }
     }
     return i;
   }
@@ -707,9 +746,12 @@ struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
 #endif
 
 #ifdef KOKKOS_ENABLE_OPENMPTARGET
-template <>
-struct Random_UniqueIndex<Kokkos::Experimental::OpenMPTarget> {
-  using locks_view_type = View<int**, Kokkos::Experimental::OpenMPTarget>;
+template <class MemorySpace>
+struct Random_UniqueIndex<
+    Kokkos::Device<Kokkos::Experimental::OpenMPTarget, MemorySpace>> {
+  using locks_view_type =
+      View<int**,
+           Kokkos::Device<Kokkos::Experimental::OpenMPTarget, MemorySpace>>;
   KOKKOS_FUNCTION
   static int get_state_idx(const locks_view_type& locks) {
     const int team_size = omp_get_num_threads();
@@ -873,10 +915,13 @@ class Random_XorShift64 {
 
 template <class DeviceType = Kokkos::DefaultExecutionSpace>
 class Random_XorShift64_Pool {
+ public:
+  using device_type = typename DeviceType::device_type;
+
  private:
-  using execution_space = typename DeviceType::execution_space;
-  using locks_type      = View<int**, execution_space>;
-  using state_data_type = View<uint64_t**, DeviceType>;
+  using execution_space = typename device_type::execution_space;
+  using locks_type      = View<int**, device_type>;
+  using state_data_type = View<uint64_t**, device_type>;
   locks_type locks_;
   state_data_type state_;
   int num_states_;
@@ -884,7 +929,6 @@ class Random_XorShift64_Pool {
 
  public:
   using generator_type = Random_XorShift64<DeviceType>;
-  using device_type    = DeviceType;
 
   KOKKOS_INLINE_FUNCTION
   Random_XorShift64_Pool() {
@@ -923,8 +967,10 @@ class Random_XorShift64_Pool {
     state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_,
                              padding_);
 
-    typename state_data_type::HostMirror h_state = create_mirror_view(state_);
-    typename locks_type::HostMirror h_lock       = create_mirror_view(locks_);
+    typename state_data_type::HostMirror h_state =
+        Kokkos::create_mirror_view(Kokkos::WithoutInitializing, state_);
+    typename locks_type::HostMirror h_lock =
+        Kokkos::create_mirror_view(Kokkos::WithoutInitializing, locks_);
 
     // Execute on the HostMirror's default execution space.
     Random_XorShift64<typename state_data_type::HostMirror::execution_space>
@@ -947,8 +993,7 @@ class Random_XorShift64_Pool {
 
   KOKKOS_INLINE_FUNCTION
   Random_XorShift64<DeviceType> get_state() const {
-    const int i =
-        Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
+    const int i = Impl::Random_UniqueIndex<device_type>::get_state_idx(locks_);
     return Random_XorShift64<DeviceType>(state_(i, 0), i);
   }
 
@@ -1119,11 +1164,14 @@ class Random_XorShift1024 {
 
 template <class DeviceType = Kokkos::DefaultExecutionSpace>
 class Random_XorShift1024_Pool {
+ public:
+  using device_type = typename DeviceType::device_type;
+
  private:
-  using execution_space = typename DeviceType::execution_space;
-  using locks_type      = View<int**, execution_space>;
-  using int_view_type   = View<int**, DeviceType>;
-  using state_data_type = View<uint64_t * [16], DeviceType>;
+  using execution_space = typename device_type::execution_space;
+  using locks_type      = View<int**, device_type>;
+  using int_view_type   = View<int**, device_type>;
+  using state_data_type = View<uint64_t * [16], device_type>;
 
   locks_type locks_;
   state_data_type state_;
@@ -1135,8 +1183,6 @@ class Random_XorShift1024_Pool {
  public:
   using generator_type = Random_XorShift1024<DeviceType>;
 
-  using device_type = DeviceType;
-
   KOKKOS_INLINE_FUNCTION
   Random_XorShift1024_Pool() { num_states_ = 0; }
 
@@ -1175,9 +1221,12 @@ class Random_XorShift1024_Pool {
     state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_);
     p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_, padding_);
 
-    typename state_data_type::HostMirror h_state = create_mirror_view(state_);
-    typename locks_type::HostMirror h_lock       = create_mirror_view(locks_);
-    typename int_view_type::HostMirror h_p       = create_mirror_view(p_);
+    typename state_data_type::HostMirror h_state =
+        Kokkos::create_mirror_view(Kokkos::WithoutInitializing, state_);
+    typename locks_type::HostMirror h_lock =
+        Kokkos::create_mirror_view(Kokkos::WithoutInitializing, locks_);
+    typename int_view_type::HostMirror h_p =
+        Kokkos::create_mirror_view(Kokkos::WithoutInitializing, p_);
 
     // Execute on the HostMirror's default execution space.
     Random_XorShift64<typename state_data_type::HostMirror::execution_space>
@@ -1203,8 +1252,7 @@ class Random_XorShift1024_Pool {
 
   KOKKOS_INLINE_FUNCTION
   Random_XorShift1024<DeviceType> get_state() const {
-    const int i =
-        Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
+    const int i = Impl::Random_UniqueIndex<device_type>::get_state_idx(locks_);
     return Random_XorShift1024<DeviceType>(state_, p_(i, 0), i);
   };
 
@@ -1224,265 +1272,34 @@ class Random_XorShift1024_Pool {
 
 namespace Impl {
 
-template <class ViewType, class RandomPool, int loops, int rank,
-          class IndexType>
-struct fill_random_functor_range;
 template <class ViewType, class RandomPool, int loops, int rank,
           class IndexType>
 struct fill_random_functor_begin_end;
 
 template <class ViewType, class RandomPool, int loops, class IndexType>
-struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
-  using execution_space = typename ViewType::execution_space;
-  ViewType a;
-  RandomPool rand_pool;
-  typename ViewType::const_value_type range;
-
-  using Rand = rand<typename RandomPool::generator_type,
-                    typename ViewType::non_const_value_type>;
-
-  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
-                            typename ViewType::const_value_type range_)
-      : a(a_), rand_pool(rand_pool_), range(range_) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(const IndexType& i) const {
-    typename RandomPool::generator_type gen = rand_pool.get_state();
-    for (IndexType j = 0; j < loops; j++) {
-      const IndexType idx = i * loops + j;
-      if (idx < static_cast<IndexType>(a.extent(0)))
-        a(idx) = Rand::draw(gen, range);
-    }
-    rand_pool.free_state(gen);
-  }
-};
-
-template <class ViewType, class RandomPool, int loops, class IndexType>
-struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
-  using execution_space = typename ViewType::execution_space;
-  ViewType a;
-  RandomPool rand_pool;
-  typename ViewType::const_value_type range;
-
-  using Rand = rand<typename RandomPool::generator_type,
-                    typename ViewType::non_const_value_type>;
-
-  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
-                            typename ViewType::const_value_type range_)
-      : a(a_), rand_pool(rand_pool_), range(range_) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(IndexType i) const {
-    typename RandomPool::generator_type gen = rand_pool.get_state();
-    for (IndexType j = 0; j < loops; j++) {
-      const IndexType idx = i * loops + j;
-      if (idx < static_cast<IndexType>(a.extent(0))) {
-        for (IndexType k = 0; k < static_cast<IndexType>(a.extent(1)); k++)
-          a(idx, k) = Rand::draw(gen, range);
-      }
-    }
-    rand_pool.free_state(gen);
-  }
-};
-
-template <class ViewType, class RandomPool, int loops, class IndexType>
-struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
-  using execution_space = typename ViewType::execution_space;
-  ViewType a;
-  RandomPool rand_pool;
-  typename ViewType::const_value_type range;
-
-  using Rand = rand<typename RandomPool::generator_type,
-                    typename ViewType::non_const_value_type>;
-
-  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
-                            typename ViewType::const_value_type range_)
-      : a(a_), rand_pool(rand_pool_), range(range_) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(IndexType i) const {
-    typename RandomPool::generator_type gen = rand_pool.get_state();
-    for (IndexType j = 0; j < loops; j++) {
-      const IndexType idx = i * loops + j;
-      if (idx < static_cast<IndexType>(a.extent(0))) {
-        for (IndexType k = 0; k < static_cast<IndexType>(a.extent(1)); k++)
-          for (IndexType l = 0; l < static_cast<IndexType>(a.extent(2)); l++)
-            a(idx, k, l) = Rand::draw(gen, range);
-      }
-    }
-    rand_pool.free_state(gen);
-  }
-};
-
-template <class ViewType, class RandomPool, int loops, class IndexType>
-struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
-  using execution_space = typename ViewType::execution_space;
-  ViewType a;
-  RandomPool rand_pool;
-  typename ViewType::const_value_type range;
-
-  using Rand = rand<typename RandomPool::generator_type,
-                    typename ViewType::non_const_value_type>;
-
-  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
-                            typename ViewType::const_value_type range_)
-      : a(a_), rand_pool(rand_pool_), range(range_) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(IndexType i) const {
-    typename RandomPool::generator_type gen = rand_pool.get_state();
-    for (IndexType j = 0; j < loops; j++) {
-      const IndexType idx = i * loops + j;
-      if (idx < static_cast<IndexType>(a.extent(0))) {
-        for (IndexType k = 0; k < static_cast<IndexType>(a.extent(1)); k++)
-          for (IndexType l = 0; l < static_cast<IndexType>(a.extent(2)); l++)
-            for (IndexType m = 0; m < static_cast<IndexType>(a.extent(3)); m++)
-              a(idx, k, l, m) = Rand::draw(gen, range);
-      }
-    }
-    rand_pool.free_state(gen);
-  }
-};
-
-template <class ViewType, class RandomPool, int loops, class IndexType>
-struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
-  using execution_space = typename ViewType::execution_space;
-  ViewType a;
-  RandomPool rand_pool;
-  typename ViewType::const_value_type range;
-
-  using Rand = rand<typename RandomPool::generator_type,
-                    typename ViewType::non_const_value_type>;
-
-  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
-                            typename ViewType::const_value_type range_)
-      : a(a_), rand_pool(rand_pool_), range(range_) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(IndexType i) const {
-    typename RandomPool::generator_type gen = rand_pool.get_state();
-    for (IndexType j = 0; j < loops; j++) {
-      const IndexType idx = i * loops + j;
-      if (idx < static_cast<IndexType>(a.extent(0))) {
-        for (IndexType k = 0; k < static_cast<IndexType>(a.extent(1)); k++)
-          for (IndexType l = 0; l < static_cast<IndexType>(a.extent(2)); l++)
-            for (IndexType m = 0; m < static_cast<IndexType>(a.extent(3)); m++)
-              for (IndexType n = 0; n < static_cast<IndexType>(a.extent(4));
-                   n++)
-                a(idx, k, l, m, n) = Rand::draw(gen, range);
-      }
-    }
-    rand_pool.free_state(gen);
-  }
-};
-
-template <class ViewType, class RandomPool, int loops, class IndexType>
-struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
-  using execution_space = typename ViewType::execution_space;
-  ViewType a;
-  RandomPool rand_pool;
-  typename ViewType::const_value_type range;
-
-  using Rand = rand<typename RandomPool::generator_type,
-                    typename ViewType::non_const_value_type>;
-
-  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
-                            typename ViewType::const_value_type range_)
-      : a(a_), rand_pool(rand_pool_), range(range_) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(IndexType i) const {
-    typename RandomPool::generator_type gen = rand_pool.get_state();
-    for (IndexType j = 0; j < loops; j++) {
-      const IndexType idx = i * loops + j;
-      if (idx < static_cast<IndexType>(a.extent(0))) {
-        for (IndexType k = 0; k < static_cast<IndexType>(a.extent(1)); k++)
-          for (IndexType l = 0; l < static_cast<IndexType>(a.extent(2)); l++)
-            for (IndexType m = 0; m < static_cast<IndexType>(a.extent(3)); m++)
-              for (IndexType n = 0; n < static_cast<IndexType>(a.extent(4));
-                   n++)
-                for (IndexType o = 0; o < static_cast<IndexType>(a.extent(5));
-                     o++)
-                  a(idx, k, l, m, n, o) = Rand::draw(gen, range);
-      }
-    }
-    rand_pool.free_state(gen);
-  }
-};
-
-template <class ViewType, class RandomPool, int loops, class IndexType>
-struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
+struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 0,
+                                     IndexType> {
   using execution_space = typename ViewType::execution_space;
   ViewType a;
   RandomPool rand_pool;
-  typename ViewType::const_value_type range;
+  typename ViewType::const_value_type begin, end;
 
   using Rand = rand<typename RandomPool::generator_type,
                     typename ViewType::non_const_value_type>;
 
-  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
-                            typename ViewType::const_value_type range_)
-      : a(a_), rand_pool(rand_pool_), range(range_) {}
+  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
+                                typename ViewType::const_value_type begin_,
+                                typename ViewType::const_value_type end_)
+      : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {}
 
   KOKKOS_INLINE_FUNCTION
-  void operator()(IndexType i) const {
+  void operator()(IndexType) const {
     typename RandomPool::generator_type gen = rand_pool.get_state();
-    for (IndexType j = 0; j < loops; j++) {
-      const IndexType idx = i * loops + j;
-      if (idx < static_cast<IndexType>(a.extent(0))) {
-        for (IndexType k = 0; k < static_cast<IndexType>(a.extent(1)); k++)
-          for (IndexType l = 0; l < static_cast<IndexType>(a.extent(2)); l++)
-            for (IndexType m = 0; m < static_cast<IndexType>(a.extent(3)); m++)
-              for (IndexType n = 0; n < static_cast<IndexType>(a.extent(4));
-                   n++)
-                for (IndexType o = 0; o < static_cast<IndexType>(a.extent(5));
-                     o++)
-                  for (IndexType p = 0; p < static_cast<IndexType>(a.extent(6));
-                       p++)
-                    a(idx, k, l, m, n, o, p) = Rand::draw(gen, range);
-      }
-    }
+    a()                                     = Rand::draw(gen, begin, end);
     rand_pool.free_state(gen);
   }
 };
 
-template <class ViewType, class RandomPool, int loops, class IndexType>
-struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
-  using execution_space = typename ViewType::execution_space;
-  ViewType a;
-  RandomPool rand_pool;
-  typename ViewType::const_value_type range;
-
-  using Rand = rand<typename RandomPool::generator_type,
-                    typename ViewType::non_const_value_type>;
-
-  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
-                            typename ViewType::const_value_type range_)
-      : a(a_), rand_pool(rand_pool_), range(range_) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(IndexType i) const {
-    typename RandomPool::generator_type gen = rand_pool.get_state();
-    for (IndexType j = 0; j < loops; j++) {
-      const IndexType idx = i * loops + j;
-      if (idx < static_cast<IndexType>(a.extent(0))) {
-        for (IndexType k = 0; k < static_cast<IndexType>(a.extent(1)); k++)
-          for (IndexType l = 0; l < static_cast<IndexType>(a.extent(2)); l++)
-            for (IndexType m = 0; m < static_cast<IndexType>(a.extent(3)); m++)
-              for (IndexType n = 0; n < static_cast<IndexType>(a.extent(4));
-                   n++)
-                for (IndexType o = 0; o < static_cast<IndexType>(a.extent(5));
-                     o++)
-                  for (IndexType p = 0; p < static_cast<IndexType>(a.extent(6));
-                       p++)
-                    for (IndexType q = 0;
-                         q < static_cast<IndexType>(a.extent(7)); q++)
-                      a(idx, k, l, m, n, o, p, q) = Rand::draw(gen, range);
-      }
-    }
-    rand_pool.free_state(gen);
-  }
-};
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
                                      IndexType> {
@@ -1752,30 +1569,34 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
   }
 };
 
-}  // namespace Impl
-
 template <class ViewType, class RandomPool, class IndexType = int64_t>
 void fill_random(ViewType a, RandomPool g,
-                 typename ViewType::const_value_type range) {
+                 typename ViewType::const_value_type begin,
+                 typename ViewType::const_value_type end) {
   int64_t LDA = a.extent(0);
   if (LDA > 0)
     parallel_for("Kokkos::fill_random", (LDA + 127) / 128,
-                 Impl::fill_random_functor_range<ViewType, RandomPool, 128,
-                                                 ViewType::Rank, IndexType>(
-                     a, g, range));
+                 Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128,
+                                                     ViewType::Rank, IndexType>(
+                     a, g, begin, end));
 }
 
+}  // namespace Impl
+
 template <class ViewType, class RandomPool, class IndexType = int64_t>
 void fill_random(ViewType a, RandomPool g,
                  typename ViewType::const_value_type begin,
                  typename ViewType::const_value_type end) {
-  int64_t LDA = a.extent(0);
-  if (LDA > 0)
-    parallel_for("Kokkos::fill_random", (LDA + 127) / 128,
-                 Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128,
-                                                     ViewType::Rank, IndexType>(
-                     a, g, begin, end));
+  Impl::apply_to_view_of_static_rank(
+      [&](auto dst) { Kokkos::Impl::fill_random(dst, g, begin, end); }, a);
 }
+
+template <class ViewType, class RandomPool, class IndexType = int64_t>
+void fill_random(ViewType a, RandomPool g,
+                 typename ViewType::const_value_type range) {
+  fill_random(a, g, 0, range);
+}
+
 }  // namespace Kokkos
 
 #endif
diff --git a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp
index 7c1ce4c4cd8e757f3018da3989660d3b4c5e4cff..cde5e6857e59e2b14a33d4b30dd6c8650f1143d8 100644
--- a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp
+++ b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp
@@ -206,8 +206,10 @@ class BinSort {
   //----------------------------------------
   // Constructor: takes the keys, the binning_operator and optionally whether to
   // sort within bins (default false)
-  BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
-          BinSortOp bin_op_, bool sort_within_bins_ = false)
+  template <typename ExecutionSpace>
+  BinSort(const ExecutionSpace& exec, const_key_view_type keys_,
+          int range_begin_, int range_end_, BinSortOp bin_op_,
+          bool sort_within_bins_ = false)
       : keys(keys_),
         keys_rnd(keys_),
         bin_op(bin_op_),
@@ -222,50 +224,63 @@ class BinSort {
         "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins());
     bin_count_const = bin_count_atomic;
     bin_offsets =
-        offset_type(view_alloc(WithoutInitializing,
+        offset_type(view_alloc(exec, WithoutInitializing,
                                "Kokkos::SortImpl::BinSortFunctor::bin_offsets"),
                     bin_op.max_bins());
     sort_order =
-        offset_type(view_alloc(WithoutInitializing,
+        offset_type(view_alloc(exec, WithoutInitializing,
                                "Kokkos::SortImpl::BinSortFunctor::sort_order"),
                     range_end - range_begin);
   }
 
+  BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
+          BinSortOp bin_op_, bool sort_within_bins_ = false)
+      : BinSort(execution_space{}, keys_, range_begin_, range_end_, bin_op_,
+                sort_within_bins_) {}
+
+  template <typename ExecutionSpace>
+  BinSort(const ExecutionSpace& exec, const_key_view_type keys_,
+          BinSortOp bin_op_, bool sort_within_bins_ = false)
+      : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {}
+
   BinSort(const_key_view_type keys_, BinSortOp bin_op_,
           bool sort_within_bins_ = false)
-      : BinSort(keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {}
+      : BinSort(execution_space{}, keys_, bin_op_, sort_within_bins_) {}
 
   //----------------------------------------
   // Create the permutation vector, the bin_offset array and the bin_count
   // array. Can be called again if keys changed
-  void create_permute_vector() {
+  template <class ExecutionSpace = execution_space>
+  void create_permute_vector(const ExecutionSpace& exec = execution_space{}) {
     const size_t len = range_end - range_begin;
     Kokkos::parallel_for(
         "Kokkos::Sort::BinCount",
-        Kokkos::RangePolicy<execution_space, bin_count_tag>(0, len), *this);
+        Kokkos::RangePolicy<ExecutionSpace, bin_count_tag>(exec, 0, len),
+        *this);
     Kokkos::parallel_scan("Kokkos::Sort::BinOffset",
-                          Kokkos::RangePolicy<execution_space, bin_offset_tag>(
-                              0, bin_op.max_bins()),
+                          Kokkos::RangePolicy<ExecutionSpace, bin_offset_tag>(
+                              exec, 0, bin_op.max_bins()),
                           *this);
 
-    Kokkos::deep_copy(bin_count_atomic, 0);
+    Kokkos::deep_copy(exec, bin_count_atomic, 0);
     Kokkos::parallel_for(
         "Kokkos::Sort::BinBinning",
-        Kokkos::RangePolicy<execution_space, bin_binning_tag>(0, len), *this);
+        Kokkos::RangePolicy<ExecutionSpace, bin_binning_tag>(exec, 0, len),
+        *this);
 
     if (sort_within_bins)
       Kokkos::parallel_for(
           "Kokkos::Sort::BinSort",
-          Kokkos::RangePolicy<execution_space, bin_sort_bins_tag>(
-              0, bin_op.max_bins()),
+          Kokkos::RangePolicy<ExecutionSpace, bin_sort_bins_tag>(
+              exec, 0, bin_op.max_bins()),
           *this);
   }
 
   // Sort a subset of a view with respect to the first dimension using the
   // permutation array
-  template <class ValuesViewType>
-  void sort(ValuesViewType const& values, int values_range_begin,
-            int values_range_end) const {
+  template <class ExecutionSpace, class ValuesViewType>
+  void sort(const ExecutionSpace& exec, ValuesViewType const& values,
+            int values_range_begin, int values_range_end) const {
     using scratch_view_type =
         Kokkos::View<typename ValuesViewType::data_type,
                      typename ValuesViewType::array_layout,
@@ -279,7 +294,7 @@ class BinSort {
     }
 
     scratch_view_type sorted_values(
-        view_alloc(WithoutInitializing,
+        view_alloc(exec, WithoutInitializing,
                    "Kokkos::SortImpl::BinSortFunctor::sorted_values"),
         values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
         values.rank_dynamic > 1 ? values.extent(1)
@@ -308,7 +323,7 @@ class BinSort {
                   values_range_begin - range_begin);
 
       parallel_for("Kokkos::Sort::CopyPermute",
-                   Kokkos::RangePolicy<execution_space>(0, len), functor);
+                   Kokkos::RangePolicy<ExecutionSpace>(exec, 0, len), functor);
     }
 
     {
@@ -316,10 +331,23 @@ class BinSort {
           values, range_begin, sorted_values);
 
       parallel_for("Kokkos::Sort::Copy",
-                   Kokkos::RangePolicy<execution_space>(0, len), functor);
+                   Kokkos::RangePolicy<ExecutionSpace>(exec, 0, len), functor);
     }
+  }
+
+  // Sort a subset of a view with respect to the first dimension using the
+  // permutation array
+  template <class ValuesViewType>
+  void sort(ValuesViewType const& values, int values_range_begin,
+            int values_range_end) const {
+    execution_space exec;
+    sort(exec, values, values_range_begin, values_range_end);
+    exec.fence("Kokkos::Sort: fence after sorting");
+  }
 
-    execution_space().fence("Kokkos::Sort: fence after sorting");
+  template <class ExecutionSpace, class ValuesViewType>
+  void sort(ExecutionSpace const& exec, ValuesViewType const& values) const {
+    this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin);
   }
 
   template <class ValuesViewType>
@@ -409,16 +437,41 @@ struct BinOp1D {
   BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
           typename KeyViewType::const_value_type max)
       : max_bins_(max_bins__ + 1),
-        mul_(1.0 * max_bins__ / (max - min)),
+        // Cast to int64_t to avoid possible overflow when using integer
+        mul_(std::is_integral<typename KeyViewType::const_value_type>::value
+                 ? 1.0 * max_bins__ / (int64_t(max) - int64_t(min))
+                 : 1.0 * max_bins__ / (max - min)),
         range_(max - min),
-        min_(min) {}
+        min_(min) {
+    // For integral types the number of bins may be larger than the range
+    // in which case we can exactly have one unique value per bin
+    // and then don't need to sort bins.
+    if (std::is_integral<typename KeyViewType::const_value_type>::value &&
+        static_cast<uint64_t>(range_) <= static_cast<uint64_t>(max_bins__)) {
+      mul_ = 1.;
+    }
+  }
 
   // Determine bin index from key value
-  template <class ViewType>
+  template <
+      class ViewType,
+      std::enable_if_t<!std::is_integral<typename ViewType::value_type>::value,
+                       bool> = true>
   KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
     return int(mul_ * (keys(i) - min_));
   }
 
+  // Determine bin index from key value
+  template <
+      class ViewType,
+      std::enable_if_t<std::is_integral<typename ViewType::value_type>::value,
+                       bool> = true>
+  KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
+    // The cast to int64_t is necessary because otherwise HIP returns the wrong
+    // result.
+    return int(mul_ * (int64_t(keys(i)) - int64_t(min_)));
+  }
+
   // Return maximum bin index + 1
   KOKKOS_INLINE_FUNCTION
   int max_bins() const { return max_bins_; }
@@ -485,8 +538,8 @@ struct BinOp3D {
 
 namespace Impl {
 
-template <class ViewType>
-bool try_std_sort(ViewType view) {
+template <class ViewType, class ExecutionSpace>
+bool try_std_sort(ViewType view, const ExecutionSpace& exec) {
   bool possible    = true;
   size_t stride[8] = {view.stride_0(), view.stride_1(), view.stride_2(),
                       view.stride_3(), view.stride_4(), view.stride_5(),
@@ -497,6 +550,7 @@ bool try_std_sort(ViewType view) {
   possible = possible && (ViewType::Rank == 1);
   possible = possible && (stride[0] == 1);
   if (possible) {
+    exec.fence("Kokkos::sort: Fence before sorting on the host");
     std::sort(view.data(), view.data() + view.extent(0));
   }
   return possible;
@@ -519,10 +573,12 @@ struct min_max_functor {
 
 }  // namespace Impl
 
-template <class ViewType>
-void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
+template <class ExecutionSpace, class ViewType>
+std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
+    const ExecutionSpace& exec, ViewType const& view,
+    bool const always_use_kokkos_sort = false) {
   if (!always_use_kokkos_sort) {
-    if (Impl::try_std_sort(view)) return;
+    if (Impl::try_std_sort(view, exec)) return;
   }
   using CompType = BinOp1D<ViewType>;
 
@@ -530,34 +586,68 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
   Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
   parallel_reduce("Kokkos::Sort::FindExtent",
                   Kokkos::RangePolicy<typename ViewType::execution_space>(
-                      0, view.extent(0)),
+                      exec, 0, view.extent(0)),
                   Impl::min_max_functor<ViewType>(view), reducer);
   if (result.min_val == result.max_val) return;
+  // For integral types the number of bins may be larger than the range
+  // in which case we can exactly have one unique value per bin
+  // and then don't need to sort bins.
+  bool sort_in_bins = true;
+  // TODO: figure out better max_bins then this ...
+  int64_t max_bins = view.extent(0) / 2;
+  if (std::is_integral<typename ViewType::non_const_value_type>::value) {
+    // Cast to int64_t to avoid possible overflow when using integer
+    int64_t const max_val = result.max_val;
+    int64_t const min_val = result.min_val;
+    // using 10M as the cutoff for special behavior (roughly 40MB for the count
+    // array)
+    if ((max_val - min_val) < 10000000) {
+      max_bins     = max_val - min_val + 1;
+      sort_in_bins = false;
+    }
+  }
+
   BinSort<ViewType, CompType> bin_sort(
-      view, CompType(view.extent(0) / 2, result.min_val, result.max_val), true);
-  bin_sort.create_permute_vector();
-  bin_sort.sort(view);
+      view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins);
+  bin_sort.create_permute_vector(exec);
+  bin_sort.sort(exec, view);
 }
 
 template <class ViewType>
-void sort(ViewType view, size_t const begin, size_t const end) {
+void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
+  typename ViewType::execution_space exec;
+  sort(exec, view, always_use_kokkos_sort);
+  exec.fence("Kokkos::Sort: fence after sorting");
+}
+
+template <class ExecutionSpace, class ViewType>
+std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
+    const ExecutionSpace& exec, ViewType view, size_t const begin,
+    size_t const end) {
   using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
   using CompType     = BinOp1D<ViewType>;
 
   Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
   Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
 
-  parallel_reduce("Kokkos::Sort::FindExtent", range_policy(begin, end),
+  parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end),
                   Impl::min_max_functor<ViewType>(view), reducer);
 
   if (result.min_val == result.max_val) return;
 
   BinSort<ViewType, CompType> bin_sort(
-      view, begin, end,
+      exec, view, begin, end,
       CompType((end - begin) / 2, result.min_val, result.max_val), true);
 
-  bin_sort.create_permute_vector();
-  bin_sort.sort(view, begin, end);
+  bin_sort.create_permute_vector(exec);
+  bin_sort.sort(exec, view, begin, end);
+}
+
+template <class ViewType>
+void sort(ViewType view, size_t const begin, size_t const end) {
+  typename ViewType::execution_space exec;
+  sort(exec, view, begin, end);
+  exec.fence("Kokkos::Sort: fence after sorting");
 }
 
 }  // namespace Kokkos
diff --git a/packages/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp b/packages/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2e3babbcf0af85e854dd896d52cec8c661171d53
--- /dev/null
+++ b/packages/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp
@@ -0,0 +1,102 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_ALGORITHMS_HPP
+#define KOKKOS_STD_ALGORITHMS_HPP
+
+/// \file Kokkos_StdAlgorithms.hpp
+/// \brief Kokkos counterparts for Standard C++ Library algorithms
+
+#include <std_algorithms/Kokkos_Constraints.hpp>
+#include <std_algorithms/Kokkos_RandomAccessIterator.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+
+// distance
+#include <std_algorithms/Kokkos_Distance.hpp>
+
+// move, swap, iter_swap
+#include "std_algorithms/Kokkos_ModifyingOperations.hpp"
+
+// find, find_if, find_if_not
+// for_each, for_each_n
+// mismatch
+// equal
+// count_if, count
+// all_of, any_of, none_of
+// adjacent_find
+// lexicographical_compare
+// search, search_n
+// find_first_of, find_end
+#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
+
+// replace, replace_copy_if, replace_copy, replace_if
+// copy, copy_n, copy_backward, copy_if
+// fill, fill_n
+// transform
+// generate, generate_n
+// reverse, reverse_copy
+// move, move_backward
+// swap_ranges
+// unique, unique_copy
+// rotate, rotate_copy
+// remove, remove_if, remove_copy, remove_copy_if
+// shift_left, shift_right
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+
+// is_sorted_until, is_sorted
+#include <std_algorithms/Kokkos_SortingOperations.hpp>
+
+// min_element, max_element, minmax_element
+#include <std_algorithms/Kokkos_MinMaxElementOperations.hpp>
+
+// is_partitioned, partition_copy, partition_point
+#include <std_algorithms/Kokkos_PartitioningOperations.hpp>
+
+// adjacent_difference
+// reduce, transform_reduce
+// exclusive_scan, transform_exclusive_scan
+// inclusive_scan, transform_inclusive_scan
+#include <std_algorithms/Kokkos_Numeric.hpp>
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..beb53fdd70c31c9fd02ba2cffd822ee2567fdd09
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp
@@ -0,0 +1,105 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_BEGIN_END_HPP
+#define KOKKOS_BEGIN_END_HPP
+
+#include <Kokkos_View.hpp>
+#include "Kokkos_RandomAccessIterator.hpp"
+#include "Kokkos_Constraints.hpp"
+
+/// \file Kokkos_BeginEnd.hpp
+/// \brief Kokkos begin, end, cbegin, cend
+
+namespace Kokkos {
+namespace Experimental {
+
+template <class DataType, class... Properties>
+KOKKOS_INLINE_FUNCTION auto begin(
+    const Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  using it_t =
+      Impl::RandomAccessIterator<Kokkos::View<DataType, Properties...>>;
+  return it_t(v);
+}
+
+template <class DataType, class... Properties>
+KOKKOS_INLINE_FUNCTION auto end(
+    const Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  using it_t =
+      Impl::RandomAccessIterator<Kokkos::View<DataType, Properties...>>;
+  return it_t(v, v.extent(0));
+}
+
+template <class DataType, class... Properties>
+KOKKOS_INLINE_FUNCTION auto cbegin(
+    const Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  using ViewConstType =
+      typename Kokkos::View<DataType, Properties...>::const_type;
+  const ViewConstType cv = v;
+  using it_t             = Impl::RandomAccessIterator<ViewConstType>;
+  return it_t(cv);
+}
+
+template <class DataType, class... Properties>
+KOKKOS_INLINE_FUNCTION auto cend(
+    const Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  using ViewConstType =
+      typename Kokkos::View<DataType, Properties...>::const_type;
+  const ViewConstType cv = v;
+  using it_t             = Impl::RandomAccessIterator<ViewConstType>;
+  return it_t(cv, cv.extent(0));
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Constraints.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Constraints.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ec54cd1900d4edc2c347035dc7feeb1568df32f7
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Constraints.hpp
@@ -0,0 +1,237 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_ALGORITHMS_CONSTRAINTS_HPP_
+#define KOKKOS_STD_ALGORITHMS_CONSTRAINTS_HPP_
+
+#include <Kokkos_DetectionIdiom.hpp>
+#include <Kokkos_View.hpp>
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template <typename T, typename enable = void>
+struct is_admissible_to_kokkos_std_algorithms : std::false_type {};
+
+template <typename T>
+struct is_admissible_to_kokkos_std_algorithms<
+    T, std::enable_if_t< ::Kokkos::is_view<T>::value && T::rank == 1 &&
+                         (std::is_same<typename T::traits::array_layout,
+                                       Kokkos::LayoutLeft>::value ||
+                          std::is_same<typename T::traits::array_layout,
+                                       Kokkos::LayoutRight>::value ||
+                          std::is_same<typename T::traits::array_layout,
+                                       Kokkos::LayoutStride>::value)> >
+    : std::true_type {};
+
+template <class ViewType>
+KOKKOS_INLINE_FUNCTION constexpr void
+static_assert_is_admissible_to_kokkos_std_algorithms(
+    const ViewType& /* view */) {
+  static_assert(is_admissible_to_kokkos_std_algorithms<ViewType>::value,
+                "Currently, Kokkos standard algorithms only accept 1D Views.");
+}
+
+//
+// is_iterator
+//
+template <class T>
+using iterator_category_t = typename T::iterator_category;
+
+template <class T>
+using is_iterator = Kokkos::is_detected<iterator_category_t, T>;
+
+//
+// are_iterators
+//
+template <class... Args>
+struct are_iterators;
+
+template <class T>
+struct are_iterators<T> {
+  static constexpr bool value = is_iterator<T>::value;
+};
+
+template <class Head, class... Tail>
+struct are_iterators<Head, Tail...> {
+  static constexpr bool value =
+      are_iterators<Head>::value && are_iterators<Tail...>::value;
+};
+
+//
+// are_random_access_iterators
+//
+template <class... Args>
+struct are_random_access_iterators;
+
+template <class T>
+struct are_random_access_iterators<T> {
+  static constexpr bool value =
+      is_iterator<T>::value &&
+      std::is_base_of<std::random_access_iterator_tag,
+                      typename T::iterator_category>::value;
+};
+
+template <class Head, class... Tail>
+struct are_random_access_iterators<Head, Tail...> {
+  static constexpr bool value = are_random_access_iterators<Head>::value &&
+                                are_random_access_iterators<Tail...>::value;
+};
+
+//
+// iterators_are_accessible_from
+//
+template <class... Args>
+struct iterators_are_accessible_from;
+
+template <class ExeSpace, class IteratorType>
+struct iterators_are_accessible_from<ExeSpace, IteratorType> {
+  using view_type = typename IteratorType::view_type;
+  static constexpr bool value =
+      SpaceAccessibility<ExeSpace,
+                         typename view_type::memory_space>::accessible;
+};
+
+template <class ExeSpace, class Head, class... Tail>
+struct iterators_are_accessible_from<ExeSpace, Head, Tail...> {
+  static constexpr bool value =
+      iterators_are_accessible_from<ExeSpace, Head>::value &&
+      iterators_are_accessible_from<ExeSpace, Tail...>::value;
+};
+
+template <class ExecutionSpace, class... IteratorTypes>
+KOKKOS_INLINE_FUNCTION constexpr void
+static_assert_random_access_and_accessible(const ExecutionSpace& /* ex */,
+                                           IteratorTypes... /* iterators */) {
+  static_assert(
+      are_random_access_iterators<IteratorTypes...>::value,
+      "Currently, Kokkos standard algorithms require random access iterators.");
+  static_assert(
+      iterators_are_accessible_from<ExecutionSpace, IteratorTypes...>::value,
+      "Incompatible view/iterator and execution space");
+}
+
+//
+// have matching difference_type
+//
+template <class... Args>
+struct iterators_have_matching_difference_type;
+
+template <class T>
+struct iterators_have_matching_difference_type<T> {
+  static constexpr bool value = true;
+};
+
+template <class T1, class T2>
+struct iterators_have_matching_difference_type<T1, T2> {
+  static constexpr bool value =
+      std::is_same<typename T1::difference_type,
+                   typename T2::difference_type>::value;
+};
+
+template <class T1, class T2, class... Tail>
+struct iterators_have_matching_difference_type<T1, T2, Tail...> {
+  static constexpr bool value =
+      iterators_have_matching_difference_type<T1, T2>::value &&
+      iterators_have_matching_difference_type<T2, Tail...>::value;
+};
+
+template <class IteratorType1, class IteratorType2>
+KOKKOS_INLINE_FUNCTION constexpr void
+static_assert_iterators_have_matching_difference_type(IteratorType1 /* it1 */,
+                                                      IteratorType2 /* it2 */) {
+  static_assert(iterators_have_matching_difference_type<IteratorType1,
+                                                        IteratorType2>::value,
+                "Iterators do not have matching difference_type");
+}
+
+template <class IteratorType1, class IteratorType2, class IteratorType3>
+KOKKOS_INLINE_FUNCTION constexpr void
+static_assert_iterators_have_matching_difference_type(IteratorType1 it1,
+                                                      IteratorType2 it2,
+                                                      IteratorType3 it3) {
+  static_assert_iterators_have_matching_difference_type(it1, it2);
+  static_assert_iterators_have_matching_difference_type(it2, it3);
+}
+
+//
+// not_openmptarget
+//
+template <class ExeSpace>
+struct not_openmptarget {
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  static constexpr bool value = true;
+#else
+  static constexpr bool value =
+      !std::is_same<std::decay_t<ExeSpace>,
+                    ::Kokkos::Experimental::OpenMPTarget>::value;
+#endif
+};
+
+template <class ExecutionSpace>
+KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget(
+    const ExecutionSpace&) {
+  static_assert(not_openmptarget<ExecutionSpace>::value,
+                "Currently, Kokkos standard algorithms do not support custom "
+                "comparators in OpenMPTarget");
+}
+
+//
+// valid range
+//
+template <class IteratorType>
+void expect_valid_range(IteratorType first, IteratorType last) {
+  // this is a no-op for release
+  KOKKOS_EXPECTS(last >= first);
+  // avoid compiler complaining when KOKKOS_EXPECTS is no-op
+  (void)first;
+  (void)last;
+}
+
+}  // namespace Impl
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ced4370472a714fd9416836048fc6055532d77ea
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp
@@ -0,0 +1,69 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_ALGORITHMS_DISTANCE_HPP
+#define KOKKOS_STD_ALGORITHMS_DISTANCE_HPP
+
+#include "Kokkos_Constraints.hpp"
+#include "Kokkos_RandomAccessIterator.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+
+template <class IteratorType>
+KOKKOS_INLINE_FUNCTION constexpr typename IteratorType::difference_type
+distance(IteratorType first, IteratorType last) {
+  static_assert(
+      ::Kokkos::Experimental::Impl::are_random_access_iterators<
+          IteratorType>::value,
+      "Kokkos::Experimental::distance: only implemented for random access "
+      "iterators.");
+
+  return last - first;
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..18d5dadd539e1d19897c2268954637542b17eea5
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp
@@ -0,0 +1,120 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_HELPER_PREDICATES_HPP
+#define KOKKOS_STD_HELPER_PREDICATES_HPP
+
+#include <Kokkos_Macros.hpp>
+
+// naming convetion:
+// StdAlgoSomeExpressiveNameUnaryPredicate
+// StdAlgoSomeExpressiveNameBinaryPredicate
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+// ------------------
+// UNARY PREDICATES
+// ------------------
+template <class T>
+struct StdAlgoEqualsValUnaryPredicate {
+  T m_value;
+
+  KOKKOS_FUNCTION
+  constexpr bool operator()(const T& val) const { return val == m_value; }
+
+  KOKKOS_FUNCTION
+  constexpr explicit StdAlgoEqualsValUnaryPredicate(const T& _value)
+      : m_value(_value) {}
+};
+
+template <class T>
+struct StdAlgoNotEqualsValUnaryPredicate {
+  T m_value;
+
+  KOKKOS_FUNCTION
+  constexpr bool operator()(const T& val) const { return !(val == m_value); }
+
+  KOKKOS_FUNCTION
+  constexpr explicit StdAlgoNotEqualsValUnaryPredicate(const T& _value)
+      : m_value(_value) {}
+};
+
+template <class ValueType, class PredicateType>
+struct StdAlgoNegateUnaryPredicateWrapper {
+  PredicateType m_pred;
+
+  KOKKOS_FUNCTION
+  constexpr bool operator()(const ValueType& val) const { return !m_pred(val); }
+
+  KOKKOS_FUNCTION
+  constexpr explicit StdAlgoNegateUnaryPredicateWrapper(
+      const PredicateType& pred)
+      : m_pred(pred) {}
+};
+
+// ------------------
+// BINARY PREDICATES
+// ------------------
+template <class ValueType1, class ValueType2 = ValueType1>
+struct StdAlgoEqualBinaryPredicate {
+  KOKKOS_FUNCTION
+  constexpr bool operator()(const ValueType1& a, const ValueType2& b) const {
+    return a == b;
+  }
+};
+
+template <class ValueType1, class ValueType2 = ValueType1>
+struct StdAlgoLessThanBinaryPredicate {
+  KOKKOS_FUNCTION
+  constexpr bool operator()(const ValueType1& a, const ValueType2& b) const {
+    return a < b;
+  }
+};
+
+}  // namespace Impl
+}  // namespace Experimental
+}  // namespace Kokkos
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..aa8f5ba3760c7569fb7cb31fd80fd1fe76eda197
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp
@@ -0,0 +1,409 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_MIN_MAX_ELEMENT_OPERATIONS_HPP
+#define KOKKOS_STD_MIN_MAX_ELEMENT_OPERATIONS_HPP
+
+#include <Kokkos_Core.hpp>
+#include "Kokkos_BeginEnd.hpp"
+#include "Kokkos_Constraints.hpp"
+#include "Kokkos_Distance.hpp"
+#include "Kokkos_ModifyingOperations.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template <class IteratorType, class ReducerType>
+struct StdMinOrMaxElemFunctor {
+  using index_type     = typename IteratorType::difference_type;
+  using red_value_type = typename ReducerType::value_type;
+
+  IteratorType m_first;
+  ReducerType m_reducer;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i, red_value_type& red_value) const {
+    m_reducer.join(red_value, red_value_type{m_first[i], i});
+  }
+
+  KOKKOS_FUNCTION
+  StdMinOrMaxElemFunctor(IteratorType first, ReducerType reducer)
+      : m_first(std::move(first)), m_reducer(std::move(reducer)) {}
+};
+
+template <class IteratorType, class ReducerType>
+struct StdMinMaxElemFunctor {
+  using index_type     = typename IteratorType::difference_type;
+  using red_value_type = typename ReducerType::value_type;
+  IteratorType m_first;
+  ReducerType m_reducer;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i, red_value_type& red_value) const {
+    const auto& my_value = m_first[i];
+    m_reducer.join(red_value, red_value_type{my_value, my_value, i, i});
+  }
+
+  KOKKOS_FUNCTION
+  StdMinMaxElemFunctor(IteratorType first, ReducerType reducer)
+      : m_first(std::move(first)), m_reducer(std::move(reducer)) {}
+};
+
+// ------------------------------------------
+// min_or_max_element_impl
+// ------------------------------------------
+template <template <class... Args> class ReducerType, class ExecutionSpace,
+          class IteratorType, class... Args>
+IteratorType min_or_max_element_impl(const std::string& label,
+                                     const ExecutionSpace& ex,
+                                     IteratorType first, IteratorType last,
+                                     Args&&... args) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  if (first == last) {
+    return last;
+  }
+
+  // aliases
+  using index_type           = typename IteratorType::difference_type;
+  using value_type           = typename IteratorType::value_type;
+  using reducer_type         = ReducerType<value_type, index_type, Args...>;
+  using reduction_value_type = typename reducer_type::value_type;
+  using func_t = StdMinOrMaxElemFunctor<IteratorType, reducer_type>;
+
+  // run
+  reduction_value_type red_result;
+  reducer_type reducer(red_result, std::forward<Args>(args)...);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            func_t(first, reducer), reducer);
+
+  // fence not needed because reducing into scalar
+
+  // return
+  return first + red_result.loc;
+}
+
+// ------------------------------------------
+// minmax_element_impl
+// ------------------------------------------
+template <template <class... Args> class ReducerType, class ExecutionSpace,
+          class IteratorType, class... Args>
+::Kokkos::pair<IteratorType, IteratorType> minmax_element_impl(
+    const std::string& label, const ExecutionSpace& ex, IteratorType first,
+    IteratorType last, Args&&... args) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  if (first == last) {
+    return {first, first};
+  }
+
+  // aliases
+  using index_type           = typename IteratorType::difference_type;
+  using value_type           = typename IteratorType::value_type;
+  using reducer_type         = ReducerType<value_type, index_type, Args...>;
+  using reduction_value_type = typename reducer_type::value_type;
+  using func_t               = StdMinMaxElemFunctor<IteratorType, reducer_type>;
+
+  // run
+  reduction_value_type red_result;
+  reducer_type reducer(red_result, std::forward<Args>(args)...);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            func_t(first, reducer), reducer);
+
+  // fence not needed because reducing into scalar
+
+  // return
+  return {first + red_result.min_loc, first + red_result.max_loc};
+}
+
+}  // end namespace Impl
+
+// ----------------------
+// min_element public API
+// ----------------------
+template <class ExecutionSpace, class IteratorType>
+auto min_element(const ExecutionSpace& ex, IteratorType first,
+                 IteratorType last) {
+  return Impl::min_or_max_element_impl<MinFirstLoc>(
+      "Kokkos::min_element_iterator_api_default", ex, first, last);
+}
+
+template <class ExecutionSpace, class IteratorType>
+auto min_element(const std::string& label, const ExecutionSpace& ex,
+                 IteratorType first, IteratorType last) {
+  return Impl::min_or_max_element_impl<MinFirstLoc>(label, ex, first, last);
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+auto min_element(const ExecutionSpace& ex, IteratorType first,
+                 IteratorType last, ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>(
+      "Kokkos::min_element_iterator_api_default", ex, first, last,
+      std::move(comp));
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+auto min_element(const std::string& label, const ExecutionSpace& ex,
+                 IteratorType first, IteratorType last, ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>(
+      label, ex, first, last, std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto min_element(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  return Impl::min_or_max_element_impl<MinFirstLoc>(
+      "Kokkos::min_element_view_api_default", ex, begin(v), end(v));
+}
+
+template <class ExecutionSpace, class DataType, class ComparatorType,
+          class... Properties>
+auto min_element(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v,
+                 ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>(
+      "Kokkos::min_element_view_api_default", ex, begin(v), end(v),
+      std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto min_element(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  return Impl::min_or_max_element_impl<MinFirstLoc>(label, ex, begin(v),
+                                                    end(v));
+}
+
+template <class ExecutionSpace, class DataType, class ComparatorType,
+          class... Properties>
+auto min_element(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v,
+                 ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>(
+      label, ex, begin(v), end(v), std::move(comp));
+}
+
+// ----------------------
+// max_element public API
+// ----------------------
+template <class ExecutionSpace, class IteratorType>
+auto max_element(const ExecutionSpace& ex, IteratorType first,
+                 IteratorType last) {
+  return Impl::min_or_max_element_impl<MaxFirstLoc>(
+      "Kokkos::max_element_iterator_api_default", ex, first, last);
+}
+
+template <class ExecutionSpace, class IteratorType>
+auto max_element(const std::string& label, const ExecutionSpace& ex,
+                 IteratorType first, IteratorType last) {
+  return Impl::min_or_max_element_impl<MaxFirstLoc>(label, ex, first, last);
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+auto max_element(const ExecutionSpace& ex, IteratorType first,
+                 IteratorType last, ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>(
+      "Kokkos::max_element_iterator_api_default", ex, first, last,
+      std::move(comp));
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+auto max_element(const std::string& label, const ExecutionSpace& ex,
+                 IteratorType first, IteratorType last, ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>(
+      label, ex, first, last, std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto max_element(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  return Impl::min_or_max_element_impl<MaxFirstLoc>(
+      "Kokkos::max_element_view_api_default", ex, begin(v), end(v));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto max_element(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  return Impl::min_or_max_element_impl<MaxFirstLoc>(label, ex, begin(v),
+                                                    end(v));
+}
+
+template <class ExecutionSpace, class DataType, class ComparatorType,
+          class... Properties>
+auto max_element(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v,
+                 ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>(
+      "Kokkos::max_element_view_api_default", ex, begin(v), end(v),
+      std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class ComparatorType,
+          class... Properties>
+auto max_element(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v,
+                 ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>(
+      label, ex, begin(v), end(v), std::move(comp));
+}
+
+// -------------------------
+// minmax_element public API
+// -------------------------
+template <class ExecutionSpace, class IteratorType>
+auto minmax_element(const ExecutionSpace& ex, IteratorType first,
+                    IteratorType last) {
+  return Impl::minmax_element_impl<MinMaxFirstLastLoc>(
+      "Kokkos::minmax_element_iterator_api_default", ex, first, last);
+}
+
+template <class ExecutionSpace, class IteratorType>
+auto minmax_element(const std::string& label, const ExecutionSpace& ex,
+                    IteratorType first, IteratorType last) {
+  return Impl::minmax_element_impl<MinMaxFirstLastLoc>(label, ex, first, last);
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+auto minmax_element(const ExecutionSpace& ex, IteratorType first,
+                    IteratorType last, ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>(
+      "Kokkos::minmax_element_iterator_api_default", ex, first, last,
+      std::move(comp));
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+auto minmax_element(const std::string& label, const ExecutionSpace& ex,
+                    IteratorType first, IteratorType last,
+                    ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>(
+      label, ex, first, last, std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto minmax_element(const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  return Impl::minmax_element_impl<MinMaxFirstLastLoc>(
+      "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto minmax_element(const std::string& label, const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  return Impl::minmax_element_impl<MinMaxFirstLastLoc>(label, ex, begin(v),
+                                                       end(v));
+}
+
+template <class ExecutionSpace, class DataType, class ComparatorType,
+          class... Properties>
+auto minmax_element(const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType, Properties...>& v,
+                    ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>(
+      "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v),
+      std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class ComparatorType,
+          class... Properties>
+auto minmax_element(const std::string& label, const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType, Properties...>& v,
+                    ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>(
+      label, ex, begin(v), end(v), std::move(comp));
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f8ca3456e5060cdc370fa2720936fdb3136b8738
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp
@@ -0,0 +1,113 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_MODIFYING_OPERATIONS_HPP
+#define KOKKOS_MODIFYING_OPERATIONS_HPP
+
+#include <Kokkos_Core.hpp>
+#include "Kokkos_BeginEnd.hpp"
+#include "Kokkos_Constraints.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+
+// move
+template <typename T>
+KOKKOS_INLINE_FUNCTION std::remove_reference_t<T>&& move(T&& t) {
+  return static_cast<std::remove_reference_t<T>&&>(t);
+}
+
+// swap
+template <class T>
+KOKKOS_INLINE_FUNCTION void swap(T& a, T& b) noexcept {
+  static_assert(
+      std::is_move_assignable<T>::value && std::is_move_constructible<T>::value,
+      "Kokkos::Experimental::swap arguments must be move assignable "
+      "and move constructible");
+
+  T tmp = std::move(a);
+  a     = std::move(b);
+  b     = std::move(tmp);
+}
+
+//----------------------------------------------------------------------------
+// this is here because we use the swap function above
+namespace Impl {
+template <class IteratorType1, class IteratorType2>
+struct StdIterSwapFunctor {
+  IteratorType1 m_a;
+  IteratorType2 m_b;
+
+  KOKKOS_FUNCTION
+  void operator()(int i) const {
+    (void)i;
+    ::Kokkos::Experimental::swap(*m_a, *m_b);
+  }
+
+  KOKKOS_FUNCTION
+  StdIterSwapFunctor(IteratorType1 _a, IteratorType2 _b)
+      : m_a(std::move(_a)), m_b(std::move(_b)) {}
+};
+
+template <class IteratorType1, class IteratorType2>
+void iter_swap_impl(IteratorType1 a, IteratorType2 b) {
+  // is there a better way to do this maybe?
+  ::Kokkos::parallel_for(
+      1, StdIterSwapFunctor<IteratorType1, IteratorType2>(a, b));
+  Kokkos::DefaultExecutionSpace().fence(
+      "Kokkos::iter_swap: fence after operation");
+}
+}  // namespace Impl
+//----------------------------------------------------------------------------
+
+// iter_swap
+template <class IteratorType1, class IteratorType2>
+void iter_swap(IteratorType1 a, IteratorType2 b) {
+  Impl::iter_swap_impl(a, b);
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingSequenceOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingSequenceOperations.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..dacb82bfc2a6028157c877de9c42c3efd2ca5e85
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingSequenceOperations.hpp
@@ -0,0 +1,51 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_MOD_SEQ_OPS_INC_ALL_HPP
+#define KOKKOS_STD_MOD_SEQ_OPS_INC_ALL_HPP
+
+#include "./modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet1.hpp"
+#include "./modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet2.hpp"
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d273f092a7413d50d021e4b8da6682aacbd1216e
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp
@@ -0,0 +1,2406 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_NON_MODIFYING_SEQUENCE_OPERATIONS_HPP
+#define KOKKOS_NON_MODIFYING_SEQUENCE_OPERATIONS_HPP
+
+#include <Kokkos_Core.hpp>
+#include "Kokkos_BeginEnd.hpp"
+#include "Kokkos_Constraints.hpp"
+#include "Kokkos_ModifyingOperations.hpp"
+#include "Kokkos_HelperPredicates.hpp"
+#include "Kokkos_RandomAccessIterator.hpp"
+#include "Kokkos_Distance.hpp"
+#include <string>
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+// ------------------------------------------
+//
+// functors
+//
+// ------------------------------------------
+
+template <bool is_find_if, class IndexType, class IteratorType,
+          class ReducerType, class PredicateType>
+struct StdFindIfOrNotFunctor {
+  using red_value_type = typename ReducerType::value_type;
+
+  IteratorType m_first;
+  ReducerType m_reducer;
+  PredicateType m_p;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    const auto& my_value = m_first[i];
+
+    // if doing find_if, look for when predicate is true
+    // if doing find_if_not, look for when predicate is false
+    const bool found_condition = is_find_if ? m_p(my_value) : !m_p(my_value);
+
+    auto rv =
+        found_condition
+            ? red_value_type{i}
+            : red_value_type{::Kokkos::reduction_identity<IndexType>::min()};
+
+    m_reducer.join(red_value, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdFindIfOrNotFunctor(IteratorType first, ReducerType reducer,
+                        PredicateType p)
+      : m_first(std::move(first)),
+        m_reducer(std::move(reducer)),
+        m_p(std::move(p)) {}
+};
+
+template <class IteratorType, class UnaryFunctorType>
+struct StdForEachFunctor {
+  using index_type = typename IteratorType::difference_type;
+  IteratorType m_first;
+  UnaryFunctorType m_functor;
+
+  KOKKOS_FUNCTION
+  void operator()(index_type i) const { m_functor(m_first[i]); }
+
+  KOKKOS_FUNCTION
+  StdForEachFunctor(IteratorType _first, UnaryFunctorType _functor)
+      : m_first(std::move(_first)), m_functor(std::move(_functor)) {}
+};
+
+template <class IteratorType, class Predicate>
+struct StdCountIfFunctor {
+  using index_type = typename IteratorType::difference_type;
+  IteratorType m_first;
+  Predicate m_predicate;
+
+  KOKKOS_FUNCTION
+  void operator()(index_type i, index_type& lsum) const {
+    if (m_predicate(m_first[i])) {
+      lsum++;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdCountIfFunctor(IteratorType _first, Predicate _predicate)
+      : m_first(std::move(_first)), m_predicate(std::move(_predicate)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2,
+          class ReducerType, class BinaryPredicateType>
+struct StdMismatchRedFunctor {
+  using red_value_type = typename ReducerType::value_type;
+
+  IteratorType1 m_first1;
+  IteratorType2 m_first2;
+  ReducerType m_reducer;
+  BinaryPredicateType m_predicate;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    const auto& my_value1 = m_first1[i];
+    const auto& my_value2 = m_first2[i];
+
+    auto rv =
+        !m_predicate(my_value1, my_value2)
+            ? red_value_type{i}
+            : red_value_type{::Kokkos::reduction_identity<IndexType>::min()};
+
+    m_reducer.join(red_value, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdMismatchRedFunctor(IteratorType1 first1, IteratorType2 first2,
+                        ReducerType reducer, BinaryPredicateType predicate)
+      : m_first1(std::move(first1)),
+        m_first2(std::move(first2)),
+        m_reducer(std::move(reducer)),
+        m_predicate(std::move(predicate)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+struct StdEqualFunctor {
+  IteratorType1 m_first1;
+  IteratorType2 m_first2;
+  BinaryPredicateType m_predicate;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i, std::size_t& lsum) const {
+    if (!m_predicate(m_first1[i], m_first2[i])) {
+      lsum = 1;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdEqualFunctor(IteratorType1 _first1, IteratorType2 _first2,
+                  BinaryPredicateType _predicate)
+      : m_first1(std::move(_first1)),
+        m_first2(std::move(_first2)),
+        m_predicate(std::move(_predicate)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2,
+          class ReducerType, class ComparatorType>
+struct StdLexicographicalCompareFunctor {
+  using red_value_type = typename ReducerType::value_type;
+  IteratorType1 m_first1;
+  IteratorType2 m_first2;
+  ReducerType m_reducer;
+  ComparatorType m_comparator;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    const auto& my_value1 = m_first1[i];
+    const auto& my_value2 = m_first2[i];
+
+    bool different = m_comparator(my_value1, my_value2) ||
+                     m_comparator(my_value2, my_value1);
+    auto rv =
+        different
+            ? red_value_type{i}
+            : red_value_type{::Kokkos::reduction_identity<IndexType>::min()};
+
+    m_reducer.join(red_value, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdLexicographicalCompareFunctor(IteratorType1 _first1, IteratorType2 _first2,
+                                   ReducerType _reducer, ComparatorType _comp)
+      : m_first1(std::move(_first1)),
+        m_first2(std::move(_first2)),
+        m_reducer(std::move(_reducer)),
+        m_comparator(std::move(_comp)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2,
+          class ComparatorType>
+struct StdCompareFunctor {
+  IteratorType1 m_it1;
+  IteratorType2 m_it2;
+  ComparatorType m_predicate;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType /* i is unused */, int& lsum) const {
+    if (m_predicate(*m_it1, *m_it2)) {
+      lsum = 1;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdCompareFunctor(IteratorType1 _it1, IteratorType2 _it2,
+                    ComparatorType _predicate)
+      : m_it1(std::move(_it1)),
+        m_it2(std::move(_it2)),
+        m_predicate(std::move(_predicate)) {}
+};
+
+template <class IndexType, class IteratorType, class ReducerType,
+          class PredicateType>
+struct StdAdjacentFindFunctor {
+  using red_value_type = typename ReducerType::value_type;
+
+  IteratorType m_first;
+  ReducerType m_reducer;
+  PredicateType m_p;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    const auto& my_value   = m_first[i];
+    const auto& next_value = m_first[i + 1];
+    const bool are_equal   = m_p(my_value, next_value);
+
+    auto rv =
+        are_equal
+            ? red_value_type{i}
+            : red_value_type{::Kokkos::reduction_identity<IndexType>::min()};
+
+    m_reducer.join(red_value, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdAdjacentFindFunctor(IteratorType first, ReducerType reducer,
+                         PredicateType p)
+      : m_first(std::move(first)),
+        m_reducer(std::move(reducer)),
+        m_p(std::move(p)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2,
+          class ReducerType, class PredicateType>
+struct StdSearchFunctor {
+  using red_value_type = typename ReducerType::value_type;
+
+  IteratorType1 m_first;
+  IteratorType1 m_last;
+  IteratorType2 m_s_first;
+  IteratorType2 m_s_last;
+  ReducerType m_reducer;
+  PredicateType m_p;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    namespace KE = ::Kokkos::Experimental;
+    auto myit    = m_first + i;
+    bool found   = true;
+
+    const auto search_count = KE::distance(m_s_first, m_s_last);
+    for (IndexType k = 0; k < search_count; ++k) {
+      // note that we add this EXPECT to check if we are in a valid range
+      // but I think we can remove this beceause the guarantee we don't go
+      // out of bounds is taken care of at the calling site
+      // where we launch the par-reduce.
+      KOKKOS_EXPECTS((myit + k) < m_last);
+
+      if (!m_p(myit[k], m_s_first[k])) {
+        found = false;
+        break;
+      }
+    }
+
+    const auto rv =
+        found ? red_value_type{i}
+              : red_value_type{::Kokkos::reduction_identity<IndexType>::min()};
+
+    m_reducer.join(red_value, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdSearchFunctor(IteratorType1 first, IteratorType1 last,
+                   IteratorType2 s_first, IteratorType2 s_last,
+                   ReducerType reducer, PredicateType p)
+      : m_first(std::move(first)),
+        m_last(std::move(last)),
+        m_s_first(std::move(s_first)),
+        m_s_last(std::move(s_last)),
+        m_reducer(std::move(reducer)),
+        m_p(std::move(p)) {}
+};
+
+template <class IndexType, class IteratorType, class SizeType, class ValueType,
+          class ReducerType, class PredicateType>
+struct StdSearchNFunctor {
+  using red_value_type = typename ReducerType::value_type;
+
+  IteratorType m_first;
+  IteratorType m_last;
+  SizeType m_count;
+  ValueType m_value;
+  ReducerType m_reducer;
+  PredicateType m_p;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    namespace KE = ::Kokkos::Experimental;
+    auto myit    = m_first + i;
+    bool found   = true;
+
+    for (SizeType k = 0; k < m_count; ++k) {
+      // note that we add this EXPECT to check if we are in a valid range
+      // but I think we can remove this beceause the guarantee we don't go
+      // out of bounds is taken care of at the calling site
+      // where we launch the par-reduce.
+      KOKKOS_EXPECTS((myit + k) < m_last);
+
+      if (!m_p(myit[k], m_value)) {
+        found = false;
+        break;
+      }
+    }
+
+    const auto rv =
+        found ? red_value_type{i}
+              : red_value_type{::Kokkos::reduction_identity<IndexType>::min()};
+
+    m_reducer.join(red_value, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdSearchNFunctor(IteratorType first, IteratorType last, SizeType count,
+                    ValueType value, ReducerType reducer, PredicateType p)
+      : m_first(std::move(first)),
+        m_last(std::move(last)),
+        m_count(std::move(count)),
+        m_value(std::move(value)),
+        m_reducer(std::move(reducer)),
+        m_p(std::move(p)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2,
+          class ReducerType, class PredicateType>
+struct StdFindFirstOfFunctor {
+  using red_value_type = typename ReducerType::value_type;
+
+  IteratorType1 m_first;
+  IteratorType2 m_s_first;
+  IteratorType2 m_s_last;
+  ReducerType m_reducer;
+  PredicateType m_p;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    namespace KE        = ::Kokkos::Experimental;
+    const auto& myvalue = m_first[i];
+    bool found          = false;
+
+    const auto search_count = KE::distance(m_s_first, m_s_last);
+    for (IndexType k = 0; k < search_count; ++k) {
+      if (m_p(myvalue, m_s_first[k])) {
+        found = true;
+        break;
+      }
+    }
+
+    const auto rv =
+        found ? red_value_type{i}
+              : red_value_type{::Kokkos::reduction_identity<IndexType>::min()};
+
+    m_reducer.join(red_value, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdFindFirstOfFunctor(IteratorType1 first, IteratorType2 s_first,
+                        IteratorType2 s_last, ReducerType reducer,
+                        PredicateType p)
+      : m_first(std::move(first)),
+        m_s_first(std::move(s_first)),
+        m_s_last(std::move(s_last)),
+        m_reducer(std::move(reducer)),
+        m_p(std::move(p)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2,
+          class ReducerType, class PredicateType>
+struct StdFindEndFunctor {
+  using red_value_type = typename ReducerType::value_type;
+
+  IteratorType1 m_first;
+  IteratorType1 m_last;
+  IteratorType2 m_s_first;
+  IteratorType2 m_s_last;
+  ReducerType m_reducer;
+  PredicateType m_p;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    namespace KE = ::Kokkos::Experimental;
+    auto myit    = m_first + i;
+    bool found   = true;
+
+    const auto search_count = KE::distance(m_s_first, m_s_last);
+    for (IndexType k = 0; k < search_count; ++k) {
+      // note that we add this EXPECT to check if we are in a valid range
+      // but I think we can remvoe this beceause the guarantee we don't go
+      // out of bounds is taken care of at the calling site
+      // where we launch the par-reduce.
+      KOKKOS_EXPECTS((myit + k) < m_last);
+
+      if (!m_p(myit[k], m_s_first[k])) {
+        found = false;
+        break;
+      }
+    }
+
+    const auto rv =
+        found ? red_value_type{i}
+              : red_value_type{::Kokkos::reduction_identity<IndexType>::max()};
+
+    m_reducer.join(red_value, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdFindEndFunctor(IteratorType1 first, IteratorType1 last,
+                    IteratorType2 s_first, IteratorType2 s_last,
+                    ReducerType reducer, PredicateType p)
+      : m_first(std::move(first)),
+        m_last(std::move(last)),
+        m_s_first(std::move(s_first)),
+        m_s_last(std::move(s_last)),
+        m_reducer(std::move(reducer)),
+        m_p(std::move(p)) {}
+};
+
+// ------------------------------------------
+// find_if_or_not_impl
+// ------------------------------------------
+template <bool is_find_if, class ExecutionSpace, class IteratorType,
+          class PredicateType>
+IteratorType find_if_or_not_impl(const std::string& label,
+                                 const ExecutionSpace& ex, IteratorType first,
+                                 IteratorType last, PredicateType pred) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(
+      ex, first);  // only need one It per type
+  Impl::expect_valid_range(first, last);
+
+  if (first == last) {
+    return last;
+  }
+
+  // aliases
+  using index_type           = typename IteratorType::difference_type;
+  using reducer_type         = FirstLoc<index_type>;
+  using reduction_value_type = typename reducer_type::value_type;
+  using func_t = StdFindIfOrNotFunctor<is_find_if, index_type, IteratorType,
+                                       reducer_type, PredicateType>;
+
+  // run
+  reduction_value_type red_result;
+  reducer_type reducer(red_result);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            func_t(first, reducer, pred), reducer);
+
+  // fence not needed because reducing into scalar
+
+  // decide and return
+  if (red_result.min_loc_true ==
+      ::Kokkos::reduction_identity<index_type>::min()) {
+    // here, it means a valid loc has not been found,
+    return last;
+  } else {
+    // a location has been found
+    return first + red_result.min_loc_true;
+  }
+}
+
+// ------------------------------------------
+// find_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class T>
+InputIterator find_impl(const std::string& label, ExecutionSpace ex,
+                        InputIterator first, InputIterator last,
+                        const T& value) {
+  return find_if_or_not_impl<true>(
+      label, ex, first, last,
+      ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate<T>(value));
+}
+
+// ------------------------------------------
+// for_each_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class UnaryFunctorType>
+UnaryFunctorType for_each_impl(const std::string& label,
+                               const ExecutionSpace& ex, IteratorType first,
+                               IteratorType last, UnaryFunctorType functor) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor));
+  ex.fence("Kokkos::for_each: fence after operation");
+
+  return functor;
+}
+
+// ------------------------------------------
+// for_each_n_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class SizeType,
+          class UnaryFunctorType>
+IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex,
+                             IteratorType first, SizeType n,
+                             UnaryFunctorType functor) {
+  auto last = first + n;
+  Impl::static_assert_random_access_and_accessible(ex, first, last);
+  Impl::expect_valid_range(first, last);
+
+  if (n == 0) {
+    return first;
+  }
+
+  for_each_impl(label, ex, first, last, std::move(functor));
+  // no neeed to fence since for_each_impl fences already
+
+  return last;
+}
+
+// ------------------------------------------
+// count_if_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class Predicate>
+typename IteratorType::difference_type count_if_impl(const std::string& label,
+                                                     const ExecutionSpace& ex,
+                                                     IteratorType first,
+                                                     IteratorType last,
+                                                     Predicate predicate) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using func_t = StdCountIfFunctor<IteratorType, Predicate>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  typename IteratorType::difference_type count = 0;
+  ::Kokkos::parallel_reduce(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            func_t(first, predicate), count);
+  ex.fence("Kokkos::count_if: fence after operation");
+
+  return count;
+}
+
+// ------------------------------------------
+// count_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class T>
+auto count_impl(const std::string& label, const ExecutionSpace& ex,
+                IteratorType first, IteratorType last, const T& value) {
+  return count_if_impl(
+      label, ex, first, last,
+      ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate<T>(value));
+}
+
+// ------------------------------------------
+// mismatch_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+::Kokkos::pair<IteratorType1, IteratorType2> mismatch_impl(
+    const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+    IteratorType1 last1, IteratorType2 first2, IteratorType2 last2,
+    BinaryPredicateType predicate) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first1, first2);
+  Impl::static_assert_iterators_have_matching_difference_type(first1, first2);
+  Impl::expect_valid_range(first1, last1);
+  Impl::expect_valid_range(first2, last2);
+
+  // aliases
+  using return_type          = ::Kokkos::pair<IteratorType1, IteratorType2>;
+  using index_type           = typename IteratorType1::difference_type;
+  using reducer_type         = FirstLoc<index_type>;
+  using reduction_value_type = typename reducer_type::value_type;
+  using functor_type =
+      StdMismatchRedFunctor<index_type, IteratorType1, IteratorType2,
+                            reducer_type, BinaryPredicateType>;
+
+  // trivial case: note that this is important,
+  // for OpenMPTarget, omitting special handling of
+  // the trivial case was giving all sorts of strange stuff.
+  const auto num_e1 = last1 - first1;
+  const auto num_e2 = last2 - first2;
+  if (num_e1 == 0 || num_e2 == 0) {
+    return return_type(first1, first2);
+  }
+
+  // run
+  const auto num_elemen_par_reduce = (num_e1 <= num_e2) ? num_e1 : num_e2;
+  reduction_value_type red_result;
+  reducer_type reducer(red_result);
+  ::Kokkos::parallel_reduce(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elemen_par_reduce),
+      functor_type(first1, first2, reducer, std::move(predicate)), reducer);
+
+  // fence not needed because reducing into scalar
+
+  // decide and return
+  constexpr auto red_min = ::Kokkos::reduction_identity<index_type>::min();
+  if (red_result.min_loc_true == red_min) {
+    // in here means mismatch has not been found
+    if (num_e1 == num_e2) {
+      return return_type(last1, last2);
+    } else if (num_e1 < num_e2) {
+      return return_type(last1, first2 + num_e1);
+    } else {
+      return return_type(first1 + num_e2, last2);
+    }
+  } else {
+    // in here means mismatch has been found
+    return return_type(first1 + red_result.min_loc_true,
+                       first2 + red_result.min_loc_true);
+  }
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+::Kokkos::pair<IteratorType1, IteratorType2> mismatch_impl(
+    const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+    IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) {
+  using value_type1 = typename IteratorType1::value_type;
+  using value_type2 = typename IteratorType2::value_type;
+  using pred_t      = StdAlgoEqualBinaryPredicate<value_type1, value_type2>;
+  return mismatch_impl(label, ex, first1, last1, first2, last2, pred_t());
+}
+
+// ------------------------------------------
+// all_of_impl, any_of_impl, none_of_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class Predicate>
+bool all_of_impl(const std::string& label, const ExecutionSpace& ex,
+                 InputIterator first, InputIterator last, Predicate predicate) {
+  return (find_if_or_not_impl<false>(label, ex, first, last, predicate) ==
+          last);
+}
+
+template <class ExecutionSpace, class InputIterator, class Predicate>
+bool any_of_impl(const std::string& label, const ExecutionSpace& ex,
+                 InputIterator first, InputIterator last, Predicate predicate) {
+  return (find_if_or_not_impl<true>(label, ex, first, last, predicate) != last);
+}
+
+template <class ExecutionSpace, class IteratorType, class Predicate>
+bool none_of_impl(const std::string& label, const ExecutionSpace& ex,
+                  IteratorType first, IteratorType last, Predicate predicate) {
+  return (find_if_or_not_impl<true>(label, ex, first, last, predicate) == last);
+}
+
+// ------------------------------------------
+// equal_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+bool equal_impl(const std::string& label, const ExecutionSpace& ex,
+                IteratorType1 first1, IteratorType1 last1, IteratorType2 first2,
+                BinaryPredicateType predicate) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first1, first2);
+  Impl::static_assert_iterators_have_matching_difference_type(first1, first2);
+  Impl::expect_valid_range(first1, last1);
+
+  // aliases
+  using index_type = typename IteratorType1::difference_type;
+  using func_t     = StdEqualFunctor<index_type, IteratorType1, IteratorType2,
+                                 BinaryPredicateType>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first1, last1);
+  std::size_t different   = 0;
+  ::Kokkos::parallel_reduce(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            func_t(first1, first2, predicate), different);
+  ex.fence("Kokkos::equal: fence after operation");
+
+  return !different;
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+bool equal_impl(const std::string& label, const ExecutionSpace& ex,
+                IteratorType1 first1, IteratorType1 last1,
+                IteratorType2 first2) {
+  using value_type1 = typename IteratorType1::value_type;
+  using value_type2 = typename IteratorType2::value_type;
+  using pred_t      = StdAlgoEqualBinaryPredicate<value_type1, value_type2>;
+  return equal_impl(label, ex, first1, last1, first2, pred_t());
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+bool equal_impl(const std::string& label, const ExecutionSpace& ex,
+                IteratorType1 first1, IteratorType1 last1, IteratorType2 first2,
+                IteratorType2 last2, BinaryPredicateType predicate) {
+  const auto d1 = ::Kokkos::Experimental::distance(first1, last1);
+  const auto d2 = ::Kokkos::Experimental::distance(first2, last2);
+  if (d1 != d2) {
+    return false;
+  }
+
+  return equal_impl(label, ex, first1, last1, first2, predicate);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+bool equal_impl(const std::string& label, const ExecutionSpace& ex,
+                IteratorType1 first1, IteratorType1 last1, IteratorType2 first2,
+                IteratorType2 last2) {
+  Impl::expect_valid_range(first1, last1);
+  Impl::expect_valid_range(first2, last2);
+
+  using value_type1 = typename IteratorType1::value_type;
+  using value_type2 = typename IteratorType2::value_type;
+  using pred_t      = StdAlgoEqualBinaryPredicate<value_type1, value_type2>;
+  return equal_impl(label, ex, first1, last1, first2, last2, pred_t());
+}
+
+// ------------------------------------------
+// lexicographical_compare_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class ComparatorType>
+bool lexicographical_compare_impl(const std::string& label,
+                                  const ExecutionSpace& ex,
+                                  IteratorType1 first1, IteratorType1 last1,
+                                  IteratorType2 first2, IteratorType2 last2,
+                                  ComparatorType comp) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first1, first2);
+  Impl::static_assert_iterators_have_matching_difference_type(first1, first2);
+  Impl::expect_valid_range(first1, last1);
+  Impl::expect_valid_range(first2, last2);
+
+  // aliases
+  using index_type           = typename IteratorType1::difference_type;
+  using reducer_type         = FirstLoc<index_type>;
+  using reduction_value_type = typename reducer_type::value_type;
+
+  // run
+  const auto d1    = Kokkos::Experimental::distance(first1, last1);
+  const auto d2    = Kokkos::Experimental::distance(first2, last2);
+  const auto range = Kokkos::Experimental::min(d1, d2);
+  reduction_value_type red_result;
+  reducer_type reducer(red_result);
+  using func1_t =
+      StdLexicographicalCompareFunctor<index_type, IteratorType1, IteratorType2,
+                                       reducer_type, ComparatorType>;
+
+  ::Kokkos::parallel_reduce(label, RangePolicy<ExecutionSpace>(ex, 0, range),
+                            func1_t(first1, first2, reducer, comp), reducer);
+
+  // fence not needed because reducing into scalar
+  // no mismatch
+  if (red_result.min_loc_true ==
+      ::Kokkos::reduction_identity<index_type>::min()) {
+    auto new_last1 = first1 + range;
+    auto new_last2 = first2 + range;
+    bool is_prefix = (new_last1 == last1) && (new_last2 != last2);
+    return is_prefix;
+  }
+
+  // check mismatched
+  int less      = 0;
+  auto it1      = first1 + red_result.min_loc_true;
+  auto it2      = first2 + red_result.min_loc_true;
+  using func2_t = StdCompareFunctor<index_type, IteratorType1, IteratorType2,
+                                    ComparatorType>;
+  ::Kokkos::parallel_reduce(label, RangePolicy<ExecutionSpace>(ex, 0, 1),
+                            func2_t(it1, it2, comp), less);
+
+  // fence not needed because reducing into scalar
+  return static_cast<bool>(less);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+bool lexicographical_compare_impl(const std::string& label,
+                                  const ExecutionSpace& ex,
+                                  IteratorType1 first1, IteratorType1 last1,
+                                  IteratorType2 first2, IteratorType2 last2) {
+  using value_type_1 = typename IteratorType1::value_type;
+  using value_type_2 = typename IteratorType2::value_type;
+  using predicate_t =
+      Impl::StdAlgoLessThanBinaryPredicate<value_type_1, value_type_2>;
+  return lexicographical_compare_impl(label, ex, first1, last1, first2, last2,
+                                      predicate_t());
+}
+
+// ------------------------------------------
+// adjacent_find_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class PredicateType>
+IteratorType adjacent_find_impl(const std::string& label,
+                                const ExecutionSpace& ex, IteratorType first,
+                                IteratorType last, PredicateType pred) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+
+  if (num_elements <= 1) {
+    return last;
+  }
+
+  using index_type           = typename IteratorType::difference_type;
+  using reducer_type         = FirstLoc<index_type>;
+  using reduction_value_type = typename reducer_type::value_type;
+  using func_t = StdAdjacentFindFunctor<index_type, IteratorType, reducer_type,
+                                        PredicateType>;
+
+  reduction_value_type red_result;
+  reducer_type reducer(red_result);
+
+  // note that we use below num_elements-1 because
+  // each index i in the reduction checks i and (i+1).
+  ::Kokkos::parallel_reduce(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements - 1),
+      func_t(first, reducer, pred), reducer);
+
+  // fence not needed because reducing into scalar
+  if (red_result.min_loc_true ==
+      ::Kokkos::reduction_identity<index_type>::min()) {
+    return last;
+  } else {
+    return first + red_result.min_loc_true;
+  }
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType adjacent_find_impl(const std::string& label,
+                                const ExecutionSpace& ex, IteratorType first,
+                                IteratorType last) {
+  using value_type     = typename IteratorType::value_type;
+  using default_pred_t = StdAlgoEqualBinaryPredicate<value_type>;
+  return adjacent_find_impl(label, ex, first, last, default_pred_t());
+}
+
+// ------------------------------------------
+// search_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+IteratorType1 search_impl(const std::string& label, const ExecutionSpace& ex,
+                          IteratorType1 first, IteratorType1 last,
+                          IteratorType2 s_first, IteratorType2 s_last,
+                          const BinaryPredicateType& pred) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, s_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, s_first);
+  Impl::expect_valid_range(first, last);
+  Impl::expect_valid_range(s_first, s_last);
+
+  // the target sequence should not be larger than the range [first, last)
+  namespace KE            = ::Kokkos::Experimental;
+  const auto num_elements = KE::distance(first, last);
+  const auto s_count      = KE::distance(s_first, s_last);
+  KOKKOS_EXPECTS(num_elements >= s_count);
+  (void)s_count;  // needed when macro above is a no-op
+
+  if (s_first == s_last) {
+    return first;
+  }
+
+  if (first == last) {
+    return last;
+  }
+
+  // special case where the two ranges have equal size
+  if (num_elements == s_count) {
+    const auto equal_result = equal_impl(label, ex, first, last, s_first, pred);
+    return (equal_result) ? first : last;
+  } else {
+    using index_type           = typename IteratorType1::difference_type;
+    using reducer_type         = FirstLoc<index_type>;
+    using reduction_value_type = typename reducer_type::value_type;
+    using func_t = StdSearchFunctor<index_type, IteratorType1, IteratorType2,
+                                    reducer_type, BinaryPredicateType>;
+
+    // run
+    reduction_value_type red_result;
+    reducer_type reducer(red_result);
+
+    // decide the size of the range policy of the par_red:
+    // note that the last feasible index to start looking is the index
+    // whose distance from the "last" is equal to the sequence count.
+    // the +1 is because we need to include that location too.
+    const auto range_size = num_elements - s_count + 1;
+
+    // run par reduce
+    ::Kokkos::parallel_reduce(
+        label, RangePolicy<ExecutionSpace>(ex, 0, range_size),
+        func_t(first, last, s_first, s_last, reducer, pred), reducer);
+
+    // fence not needed because reducing into scalar
+
+    // decide and return
+    if (red_result.min_loc_true ==
+        ::Kokkos::reduction_identity<index_type>::min()) {
+      // location has not been found
+      return last;
+    } else {
+      // location has been found
+      return first + red_result.min_loc_true;
+    }
+  }
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType1 search_impl(const std::string& label, const ExecutionSpace& ex,
+                          IteratorType1 first, IteratorType1 last,
+                          IteratorType2 s_first, IteratorType2 s_last) {
+  using value_type1    = typename IteratorType1::value_type;
+  using value_type2    = typename IteratorType2::value_type;
+  using predicate_type = StdAlgoEqualBinaryPredicate<value_type1, value_type2>;
+  return search_impl(label, ex, first, last, s_first, s_last, predicate_type());
+}
+
+// ------------------------------------------
+// search_n_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class SizeType,
+          class ValueType, class BinaryPredicateType>
+IteratorType search_n_impl(const std::string& label, const ExecutionSpace& ex,
+                           IteratorType first, IteratorType last,
+                           SizeType count, const ValueType& value,
+                           const BinaryPredicateType& pred) {
+  // checks
+  static_assert_random_access_and_accessible(ex, first);
+  expect_valid_range(first, last);
+  KOKKOS_EXPECTS((std::ptrdiff_t)count >= 0);
+
+  // count should not be larger than the range [first, last)
+  namespace KE            = ::Kokkos::Experimental;
+  const auto num_elements = KE::distance(first, last);
+  // cast things to avoid compiler warning
+  KOKKOS_EXPECTS((std::size_t)num_elements >= (std::size_t)count);
+
+  if (first == last) {
+    return first;
+  }
+
+  // special case where num elements in [first, last) == count
+  if ((std::size_t)num_elements == (std::size_t)count) {
+    using equal_to_value = StdAlgoEqualsValUnaryPredicate<ValueType>;
+    const auto satisfies =
+        all_of_impl(label, ex, first, last, equal_to_value(value));
+    return (satisfies) ? first : last;
+  } else {
+    // aliases
+    using index_type           = typename IteratorType::difference_type;
+    using reducer_type         = FirstLoc<index_type>;
+    using reduction_value_type = typename reducer_type::value_type;
+    using func_t =
+        StdSearchNFunctor<index_type, IteratorType, SizeType, ValueType,
+                          reducer_type, BinaryPredicateType>;
+
+    // run
+    reduction_value_type red_result;
+    reducer_type reducer(red_result);
+
+    // decide the size of the range policy of the par_red:
+    // the last feasible index to start looking is the index
+    // whose distance from the "last" is equal to count.
+    // the +1 is because we need to include that location too.
+    const auto range_size = num_elements - count + 1;
+
+    // run par reduce
+    ::Kokkos::parallel_reduce(
+        label, RangePolicy<ExecutionSpace>(ex, 0, range_size),
+        func_t(first, last, count, value, reducer, pred), reducer);
+
+    // fence not needed because reducing into scalar
+
+    // decide and return
+    if (red_result.min_loc_true ==
+        ::Kokkos::reduction_identity<index_type>::min()) {
+      // location has not been found
+      return last;
+    } else {
+      // location has been found
+      return first + red_result.min_loc_true;
+    }
+  }
+}
+
+template <class ExecutionSpace, class IteratorType, class SizeType,
+          class ValueType>
+IteratorType search_n_impl(const std::string& label, const ExecutionSpace& ex,
+                           IteratorType first, IteratorType last,
+                           SizeType count, const ValueType& value) {
+  using iter_value_type = typename IteratorType::value_type;
+  using predicate_type =
+      StdAlgoEqualBinaryPredicate<iter_value_type, ValueType>;
+
+  /* above we use <iter_value_type, ValueType> for the predicate_type
+     to be consistent with the standard, which says:
+
+     "
+     The signature of the predicate function should be equivalent to:
+
+        bool pred(const Type1 &a, const Type2 &b);
+
+     The type Type1 must be such that an object of type ForwardIt can be
+     dereferenced and then implicitly converted to Type1. The type Type2 must be
+     such that an object of type T can be implicitly converted to Type2.
+     "
+
+     In our case, IteratorType = ForwardIt, and ValueType = T.
+   */
+
+  return search_n_impl(label, ex, first, last, count, value, predicate_type());
+}
+
+// ------------------------------------------
+// find_first_of_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+IteratorType1 find_first_of_impl(const std::string& label,
+                                 const ExecutionSpace& ex, IteratorType1 first,
+                                 IteratorType1 last, IteratorType2 s_first,
+                                 IteratorType2 s_last,
+                                 const BinaryPredicateType& pred) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, s_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, s_first);
+  Impl::expect_valid_range(first, last);
+  Impl::expect_valid_range(s_first, s_last);
+
+  if ((s_first == s_last) || (first == last)) {
+    return last;
+  }
+
+  using index_type           = typename IteratorType1::difference_type;
+  using reducer_type         = FirstLoc<index_type>;
+  using reduction_value_type = typename reducer_type::value_type;
+  using func_t = StdFindFirstOfFunctor<index_type, IteratorType1, IteratorType2,
+                                       reducer_type, BinaryPredicateType>;
+
+  // run
+  reduction_value_type red_result;
+  reducer_type reducer(red_result);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_t(first, s_first, s_last, reducer, pred), reducer);
+
+  // fence not needed because reducing into scalar
+
+  // decide and return
+  if (red_result.min_loc_true ==
+      ::Kokkos::reduction_identity<index_type>::min()) {
+    // if here, nothing found
+    return last;
+  } else {
+    // a location has been found
+    return first + red_result.min_loc_true;
+  }
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType1 find_first_of_impl(const std::string& label,
+                                 const ExecutionSpace& ex, IteratorType1 first,
+                                 IteratorType1 last, IteratorType2 s_first,
+                                 IteratorType2 s_last) {
+  using value_type1    = typename IteratorType1::value_type;
+  using value_type2    = typename IteratorType2::value_type;
+  using predicate_type = StdAlgoEqualBinaryPredicate<value_type1, value_type2>;
+  return find_first_of_impl(label, ex, first, last, s_first, s_last,
+                            predicate_type());
+}
+
+// ------------------------------------------
+// find_end_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex,
+                            IteratorType1 first, IteratorType1 last,
+                            IteratorType2 s_first, IteratorType2 s_last,
+                            const BinaryPredicateType& pred) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, s_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, s_first);
+  Impl::expect_valid_range(first, last);
+  Impl::expect_valid_range(s_first, s_last);
+
+  // the target sequence should not be larger than the range [first, last)
+  namespace KE            = ::Kokkos::Experimental;
+  const auto num_elements = KE::distance(first, last);
+  const auto s_count      = KE::distance(s_first, s_last);
+  KOKKOS_EXPECTS(num_elements >= s_count);
+  (void)s_count;  // needed when macro above is a no-op
+
+  if (s_first == s_last) {
+    return last;
+  }
+
+  if (first == last) {
+    return last;
+  }
+
+  // special case where the two ranges have equal size
+  if (num_elements == s_count) {
+    const auto equal_result = equal_impl(label, ex, first, last, s_first, pred);
+    return (equal_result) ? first : last;
+  } else {
+    using index_type           = typename IteratorType1::difference_type;
+    using reducer_type         = LastLoc<index_type>;
+    using reduction_value_type = typename reducer_type::value_type;
+    using func_t = StdFindEndFunctor<index_type, IteratorType1, IteratorType2,
+                                     reducer_type, BinaryPredicateType>;
+
+    // run
+    reduction_value_type red_result;
+    reducer_type reducer(red_result);
+
+    // decide the size of the range policy of the par_red:
+    // note that the last feasible index to start looking is the index
+    // whose distance from the "last" is equal to the sequence count.
+    // the +1 is because we need to include that location too.
+    const auto range_size = num_elements - s_count + 1;
+
+    // run par reduce
+    ::Kokkos::parallel_reduce(
+        label, RangePolicy<ExecutionSpace>(ex, 0, range_size),
+        func_t(first, last, s_first, s_last, reducer, pred), reducer);
+
+    // fence not needed because reducing into scalar
+
+    // decide and return
+    if (red_result.max_loc_true ==
+        ::Kokkos::reduction_identity<index_type>::max()) {
+      // if here, a subrange has not been found
+      return last;
+    } else {
+      // a location has been found
+      return first + red_result.max_loc_true;
+    }
+  }
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex,
+                            IteratorType1 first, IteratorType1 last,
+                            IteratorType2 s_first, IteratorType2 s_last) {
+  using value_type1    = typename IteratorType1::value_type;
+  using value_type2    = typename IteratorType2::value_type;
+  using predicate_type = StdAlgoEqualBinaryPredicate<value_type1, value_type2>;
+  return find_end_impl(label, ex, first, last, s_first, s_last,
+                       predicate_type());
+}
+
+}  // namespace Impl
+
+// ----------------------------------
+// find public API
+// ----------------------------------
+template <class ExecutionSpace, class InputIterator, class T>
+InputIterator find(const ExecutionSpace& ex, InputIterator first,
+                   InputIterator last, const T& value) {
+  return Impl::find_impl("Kokkos::find_iterator_api_default", ex, first, last,
+                         value);
+}
+
+template <class ExecutionSpace, class InputIterator, class T>
+InputIterator find(const std::string& label, const ExecutionSpace& ex,
+                   InputIterator first, InputIterator last, const T& value) {
+  return Impl::find_impl(label, ex, first, last, value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties, class T>
+auto find(const ExecutionSpace& ex,
+          const ::Kokkos::View<DataType, Properties...>& view, const T& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_impl("Kokkos::find_view_api_default", ex, KE::begin(view),
+                         KE::end(view), value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties, class T>
+auto find(const std::string& label, const ExecutionSpace& ex,
+          const ::Kokkos::View<DataType, Properties...>& view, const T& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_impl(label, ex, KE::begin(view), KE::end(view), value);
+}
+
+// -------------------
+// find_if public API
+// -------------------
+template <class ExecutionSpace, class IteratorType, class PredicateType>
+IteratorType find_if(const ExecutionSpace& ex, IteratorType first,
+                     IteratorType last, PredicateType predicate) {
+  return Impl::find_if_or_not_impl<true>("Kokkos::find_if_iterator_api_default",
+                                         ex, first, last, std::move(predicate));
+}
+
+template <class ExecutionSpace, class IteratorType, class PredicateType>
+IteratorType find_if(const std::string& label, const ExecutionSpace& ex,
+                     IteratorType first, IteratorType last,
+                     PredicateType predicate) {
+  return Impl::find_if_or_not_impl<true>(label, ex, first, last,
+                                         std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+auto find_if(const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType, Properties...>& v,
+             Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_if_or_not_impl<true>("Kokkos::find_if_view_api_default", ex,
+                                         KE::begin(v), KE::end(v),
+                                         std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+auto find_if(const std::string& label, const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType, Properties...>& v,
+             Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_if_or_not_impl<true>(label, ex, KE::begin(v), KE::end(v),
+                                         std::move(predicate));
+}
+
+// ----------------------------------
+// find_if_not public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType, class Predicate>
+IteratorType find_if_not(const ExecutionSpace& ex, IteratorType first,
+                         IteratorType last, Predicate predicate) {
+  return Impl::find_if_or_not_impl<false>(
+      "Kokkos::find_if_not_iterator_api_default", ex, first, last,
+      std::move(predicate));
+}
+
+template <class ExecutionSpace, class IteratorType, class Predicate>
+IteratorType find_if_not(const std::string& label, const ExecutionSpace& ex,
+                         IteratorType first, IteratorType last,
+                         Predicate predicate) {
+  return Impl::find_if_or_not_impl<false>(label, ex, first, last,
+                                          std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+auto find_if_not(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v,
+                 Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_if_or_not_impl<false>(
+      "Kokkos::find_if_not_view_api_default", ex, KE::begin(v), KE::end(v),
+      std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+auto find_if_not(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& v,
+                 Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_if_or_not_impl<false>(label, ex, KE::begin(v), KE::end(v),
+                                          std::move(predicate));
+}
+
+// ----------------------------------
+// for_each public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType, class UnaryFunctorType>
+UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex,
+                          IteratorType first, IteratorType last,
+                          UnaryFunctorType functor) {
+  return Impl::for_each_impl(label, ex, first, last, std::move(functor));
+}
+
+template <class ExecutionSpace, class IteratorType, class UnaryFunctorType>
+UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first,
+                          IteratorType last, UnaryFunctorType functor) {
+  return Impl::for_each_impl("Kokkos::for_each_iterator_api_default", ex, first,
+                             last, std::move(functor));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class UnaryFunctorType>
+UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex,
+                          const ::Kokkos::View<DataType, Properties...>& v,
+                          UnaryFunctorType functor) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::for_each_impl(label, ex, KE::begin(v), KE::end(v),
+                             std::move(functor));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class UnaryFunctorType>
+UnaryFunctorType for_each(const ExecutionSpace& ex,
+                          const ::Kokkos::View<DataType, Properties...>& v,
+                          UnaryFunctorType functor) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::for_each_impl("Kokkos::for_each_view_api_default", ex,
+                             KE::begin(v), KE::end(v), std::move(functor));
+}
+
+// ----------------------------------
+// for_each_n public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType, class SizeType,
+          class UnaryFunctorType>
+IteratorType for_each_n(const std::string& label, const ExecutionSpace& ex,
+                        IteratorType first, SizeType n,
+                        UnaryFunctorType functor) {
+  return Impl::for_each_n_impl(label, ex, first, n, std::move(functor));
+}
+
+template <class ExecutionSpace, class IteratorType, class SizeType,
+          class UnaryFunctorType>
+IteratorType for_each_n(const ExecutionSpace& ex, IteratorType first,
+                        SizeType n, UnaryFunctorType functor) {
+  return Impl::for_each_n_impl("Kokkos::for_each_n_iterator_api_default", ex,
+                               first, n, std::move(functor));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class SizeType, class UnaryFunctorType>
+auto for_each_n(const std::string& label, const ExecutionSpace& ex,
+                const ::Kokkos::View<DataType, Properties...>& v, SizeType n,
+                UnaryFunctorType functor) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::for_each_n_impl(label, ex, KE::begin(v), n, std::move(functor));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class SizeType, class UnaryFunctorType>
+auto for_each_n(const ExecutionSpace& ex,
+                const ::Kokkos::View<DataType, Properties...>& v, SizeType n,
+                UnaryFunctorType functor) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::for_each_n_impl("Kokkos::for_each_n_view_api_default", ex,
+                               KE::begin(v), n, std::move(functor));
+}
+
+// ----------------------------------
+// count_if public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType, class Predicate>
+typename IteratorType::difference_type count_if(const ExecutionSpace& ex,
+                                                IteratorType first,
+                                                IteratorType last,
+                                                Predicate predicate) {
+  return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first,
+                             last, std::move(predicate));
+}
+
+template <class ExecutionSpace, class IteratorType, class Predicate>
+typename IteratorType::difference_type count_if(const std::string& label,
+                                                const ExecutionSpace& ex,
+                                                IteratorType first,
+                                                IteratorType last,
+                                                Predicate predicate) {
+  return Impl::count_if_impl(label, ex, first, last, std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+auto count_if(const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType, Properties...>& v,
+              Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex,
+                             KE::cbegin(v), KE::cend(v), std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+auto count_if(const std::string& label, const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType, Properties...>& v,
+              Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v),
+                             std::move(predicate));
+}
+
+// ----------------------------------
+// count public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType, class T>
+typename IteratorType::difference_type count(const ExecutionSpace& ex,
+                                             IteratorType first,
+                                             IteratorType last,
+                                             const T& value) {
+  return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last,
+                          value);
+}
+
+template <class ExecutionSpace, class IteratorType, class T>
+typename IteratorType::difference_type count(const std::string& label,
+                                             const ExecutionSpace& ex,
+                                             IteratorType first,
+                                             IteratorType last,
+                                             const T& value) {
+  return Impl::count_impl(label, ex, first, last, value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties, class T>
+auto count(const ExecutionSpace& ex,
+           const ::Kokkos::View<DataType, Properties...>& v, const T& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v),
+                          KE::cend(v), value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties, class T>
+auto count(const std::string& label, const ExecutionSpace& ex,
+           const ::Kokkos::View<DataType, Properties...>& v, const T& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value);
+}
+
+// ----------------------------------
+// mismatch public API
+// ----------------------------------
+// FIXME: add mismatch overloads accepting 3 iterators.
+// An overload consistent with other algorithms:
+//
+// auto mismatch(const ExecSpace& ex, It1 first1, It1 last1, It2 first2) {...}
+//
+// makes API ambiguous (with the overload accepting views).
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+::Kokkos::pair<IteratorType1, IteratorType2> mismatch(const ExecutionSpace& ex,
+                                                      IteratorType1 first1,
+                                                      IteratorType1 last1,
+                                                      IteratorType2 first2,
+                                                      IteratorType2 last2) {
+  return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex,
+                             first1, last1, first2, last2);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+::Kokkos::pair<IteratorType1, IteratorType2> mismatch(
+    const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
+    IteratorType2 first2, IteratorType2 last2,
+    BinaryPredicateType&& predicate) {
+  return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex,
+                             first1, last1, first2, last2,
+                             std::forward<BinaryPredicateType>(predicate));
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+::Kokkos::pair<IteratorType1, IteratorType2> mismatch(
+    const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+    IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) {
+  return Impl::mismatch_impl(label, ex, first1, last1, first2, last2);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+::Kokkos::pair<IteratorType1, IteratorType2> mismatch(
+    const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+    IteratorType1 last1, IteratorType2 first2, IteratorType2 last2,
+    BinaryPredicateType&& predicate) {
+  return Impl::mismatch_impl(label, ex, first1, last1, first2, last2,
+                             std::forward<BinaryPredicateType>(predicate));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto mismatch(const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType1, Properties1...>& view1,
+              const ::Kokkos::View<DataType2, Properties2...>& view2) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex,
+                             KE::begin(view1), KE::end(view1), KE::begin(view2),
+                             KE::end(view2));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+auto mismatch(const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType1, Properties1...>& view1,
+              const ::Kokkos::View<DataType2, Properties2...>& view2,
+              BinaryPredicateType&& predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex,
+                             KE::begin(view1), KE::end(view1), KE::begin(view2),
+                             KE::end(view2),
+                             std::forward<BinaryPredicateType>(predicate));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto mismatch(const std::string& label, const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType1, Properties1...>& view1,
+              const ::Kokkos::View<DataType2, Properties2...>& view2) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1),
+                             KE::begin(view2), KE::end(view2));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+auto mismatch(const std::string& label, const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType1, Properties1...>& view1,
+              const ::Kokkos::View<DataType2, Properties2...>& view2,
+              BinaryPredicateType&& predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1),
+                             KE::begin(view2), KE::end(view2),
+                             std::forward<BinaryPredicateType>(predicate));
+}
+
+// ----------------------------------
+// all_of public API
+// ----------------------------------
+template <class ExecutionSpace, class InputIterator, class Predicate>
+bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last,
+            Predicate predicate) {
+  return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first,
+                           last, predicate);
+}
+
+template <class ExecutionSpace, class InputIterator, class Predicate>
+bool all_of(const std::string& label, const ExecutionSpace& ex,
+            InputIterator first, InputIterator last, Predicate predicate) {
+  return Impl::all_of_impl(label, ex, first, last, predicate);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+bool all_of(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& v,
+            Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v),
+                           KE::cend(v), std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+bool all_of(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& v,
+            Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v),
+                           std::move(predicate));
+}
+
+// ----------------------------------
+// any_of public API
+// ----------------------------------
+template <class ExecutionSpace, class InputIterator, class Predicate>
+bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last,
+            Predicate predicate) {
+  return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last,
+                           predicate);
+}
+
+template <class ExecutionSpace, class InputIterator, class Predicate>
+bool any_of(const std::string& label, const ExecutionSpace& ex,
+            InputIterator first, InputIterator last, Predicate predicate) {
+  return Impl::any_of_impl(label, ex, first, last, predicate);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+bool any_of(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& v,
+            Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v),
+                           KE::cend(v), std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+bool any_of(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& v,
+            Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v),
+                           std::move(predicate));
+}
+
+// ----------------------------------
+// none_of public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType, class Predicate>
+bool none_of(const ExecutionSpace& ex, IteratorType first, IteratorType last,
+             Predicate predicate) {
+  return Impl::none_of_impl("Kokkos::none_of_iterator_api_default", ex, first,
+                            last, predicate);
+}
+
+template <class ExecutionSpace, class IteratorType, class Predicate>
+bool none_of(const std::string& label, const ExecutionSpace& ex,
+             IteratorType first, IteratorType last, Predicate predicate) {
+  return Impl::none_of_impl(label, ex, first, last, predicate);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+bool none_of(const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType, Properties...>& v,
+             Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::none_of_impl("Kokkos::none_of_view_api_default", ex,
+                            KE::cbegin(v), KE::cend(v), std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Predicate>
+bool none_of(const std::string& label, const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType, Properties...>& v,
+             Predicate predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::none_of_impl(label, ex, KE::cbegin(v), KE::cend(v),
+                            std::move(predicate));
+}
+
+// ----------------------------------
+// equal public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      IteratorType1, IteratorType2>::value,
+                  bool>
+equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
+      IteratorType2 first2) {
+  return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
+                          last1, first2);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      IteratorType1, IteratorType2>::value,
+                  bool>
+equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+      IteratorType1 last1, IteratorType2 first2) {
+  return Impl::equal_impl(label, ex, first1, last1, first2);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      IteratorType1, IteratorType2>::value,
+                  bool>
+equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
+      IteratorType2 first2, BinaryPredicateType predicate) {
+  return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
+                          last1, first2, std::move(predicate));
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      IteratorType1, IteratorType2>::value,
+                  bool>
+equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+      IteratorType1 last1, IteratorType2 first2,
+      BinaryPredicateType predicate) {
+  return Impl::equal_impl(label, ex, first1, last1, first2,
+                          std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+bool equal(const ExecutionSpace& ex,
+           const ::Kokkos::View<DataType1, Properties1...>& view1,
+           ::Kokkos::View<DataType2, Properties2...>& view2) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::equal_impl("Kokkos::equal_view_api_default", ex,
+                          KE::cbegin(view1), KE::cend(view1),
+                          KE::cbegin(view2));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+bool equal(const std::string& label, const ExecutionSpace& ex,
+           const ::Kokkos::View<DataType1, Properties1...>& view1,
+           ::Kokkos::View<DataType2, Properties2...>& view2) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1),
+                          KE::cbegin(view2));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+bool equal(const ExecutionSpace& ex,
+           const ::Kokkos::View<DataType1, Properties1...>& view1,
+           ::Kokkos::View<DataType2, Properties2...>& view2,
+           BinaryPredicateType predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::equal_impl("Kokkos::equal_view_api_default", ex,
+                          KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2),
+                          std::move(predicate));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+bool equal(const std::string& label, const ExecutionSpace& ex,
+           const ::Kokkos::View<DataType1, Properties1...>& view1,
+           ::Kokkos::View<DataType2, Properties2...>& view2,
+           BinaryPredicateType predicate) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1),
+                          KE::cbegin(view2), std::move(predicate));
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      IteratorType1, IteratorType2>::value,
+                  bool>
+equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
+      IteratorType2 first2, IteratorType2 last2) {
+  return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
+                          last1, first2, last2);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      IteratorType1, IteratorType2>::value,
+                  bool>
+equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+      IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) {
+  return Impl::equal_impl(label, ex, first1, last1, first2, last2);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      IteratorType1, IteratorType2>::value,
+                  bool>
+equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
+      IteratorType2 first2, IteratorType2 last2,
+      BinaryPredicateType predicate) {
+  return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
+                          last1, first2, last2, std::move(predicate));
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      IteratorType1, IteratorType2>::value,
+                  bool>
+equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+      IteratorType1 last1, IteratorType2 first2, IteratorType2 last2,
+      BinaryPredicateType predicate) {
+  return Impl::equal_impl(label, ex, first1, last1, first2, last2,
+                          std::move(predicate));
+}
+
+// ----------------------------------
+// lexicographical_compare public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1,
+                             IteratorType1 last1, IteratorType2 first2,
+                             IteratorType2 last2) {
+  return Impl::lexicographical_compare_impl(
+      "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1,
+      first2, last2);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex,
+                             IteratorType1 first1, IteratorType1 last1,
+                             IteratorType2 first2, IteratorType2 last2) {
+  return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2,
+                                            last2);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+bool lexicographical_compare(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view1,
+    ::Kokkos::View<DataType2, Properties2...>& view2) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::lexicographical_compare_impl(
+      "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1),
+      KE::cend(view1), KE::cbegin(view2), KE::cend(view2));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+bool lexicographical_compare(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view1,
+    ::Kokkos::View<DataType2, Properties2...>& view2) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1),
+                                            KE::cend(view1), KE::cbegin(view2),
+                                            KE::cend(view2));
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class ComparatorType>
+bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1,
+                             IteratorType1 last1, IteratorType2 first2,
+                             IteratorType2 last2, ComparatorType comp) {
+  return Impl::lexicographical_compare_impl(
+      "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1,
+      first2, last2, comp);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class ComparatorType>
+bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex,
+                             IteratorType1 first1, IteratorType1 last1,
+                             IteratorType2 first2, IteratorType2 last2,
+                             ComparatorType comp) {
+  return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2,
+                                            last2, comp);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ComparatorType>
+bool lexicographical_compare(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view1,
+    ::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::lexicographical_compare_impl(
+      "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1),
+      KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ComparatorType>
+bool lexicographical_compare(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view1,
+    ::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1),
+                                            KE::cend(view1), KE::cbegin(view2),
+                                            KE::cend(view2), comp);
+}
+
+// ----------------------------------
+// adjacent_find
+// ----------------------------------
+// overload set1
+template <class ExecutionSpace, class IteratorType>
+IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first,
+                           IteratorType last) {
+  return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default",
+                                  ex, first, last);
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex,
+                           IteratorType first, IteratorType last) {
+  return Impl::adjacent_find_impl(label, ex, first, last);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto adjacent_find(const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex,
+                                  KE::begin(v), KE::end(v));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto adjacent_find(const std::string& label, const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType, Properties...>& v) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v));
+}
+
+// overload set2
+template <class ExecutionSpace, class IteratorType, class BinaryPredicateType>
+IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first,
+                           IteratorType last, BinaryPredicateType pred) {
+  return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default",
+                                  ex, first, last, pred);
+}
+
+template <class ExecutionSpace, class IteratorType, class BinaryPredicateType>
+IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex,
+                           IteratorType first, IteratorType last,
+                           BinaryPredicateType pred) {
+  return Impl::adjacent_find_impl(label, ex, first, last, pred);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class BinaryPredicateType>
+auto adjacent_find(const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType, Properties...>& v,
+                   BinaryPredicateType pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex,
+                                  KE::begin(v), KE::end(v), pred);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class BinaryPredicateType>
+auto adjacent_find(const std::string& label, const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType, Properties...>& v,
+                   BinaryPredicateType pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred);
+}
+
+// ----------------------------------
+// search
+// ----------------------------------
+// overload set 1: no binary predicate passed
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first,
+                     IteratorType1 last, IteratorType2 s_first,
+                     IteratorType2 s_last) {
+  return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first,
+                           last, s_first, s_last);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType1 search(const std::string& label, const ExecutionSpace& ex,
+                     IteratorType1 first, IteratorType1 last,
+                     IteratorType2 s_first, IteratorType2 s_last) {
+  return Impl::search_impl(label, ex, first, last, s_first, s_last);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto search(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType1, Properties1...>& view,
+            const ::Kokkos::View<DataType2, Properties2...>& s_view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::search_impl("Kokkos::search_view_api_default", ex,
+                           KE::begin(view), KE::end(view), KE::begin(s_view),
+                           KE::end(s_view));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto search(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType1, Properties1...>& view,
+            const ::Kokkos::View<DataType2, Properties2...>& s_view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::search_impl(label, ex, KE::begin(view), KE::end(view),
+                           KE::begin(s_view), KE::end(s_view));
+}
+
+// overload set 2: binary predicate passed
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first,
+                     IteratorType1 last, IteratorType2 s_first,
+                     IteratorType2 s_last, const BinaryPredicateType& pred) {
+  return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first,
+                           last, s_first, s_last, pred);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+IteratorType1 search(const std::string& label, const ExecutionSpace& ex,
+                     IteratorType1 first, IteratorType1 last,
+                     IteratorType2 s_first, IteratorType2 s_last,
+                     const BinaryPredicateType& pred) {
+  return Impl::search_impl(label, ex, first, last, s_first, s_last, pred);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+auto search(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType1, Properties1...>& view,
+            const ::Kokkos::View<DataType2, Properties2...>& s_view,
+            const BinaryPredicateType& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::search_impl("Kokkos::search_view_api_default", ex,
+                           KE::begin(view), KE::end(view), KE::begin(s_view),
+                           KE::end(s_view), pred);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+auto search(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType1, Properties1...>& view,
+            const ::Kokkos::View<DataType2, Properties2...>& s_view,
+            const BinaryPredicateType& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::search_impl(label, ex, KE::begin(view), KE::end(view),
+                           KE::begin(s_view), KE::end(s_view), pred);
+}
+
+// ----------------------------------
+// find_first_of
+// ----------------------------------
+// overload set 1: no binary predicate passed
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first,
+                            IteratorType1 last, IteratorType2 s_first,
+                            IteratorType2 s_last) {
+  return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default",
+                                  ex, first, last, s_first, s_last);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex,
+                            IteratorType1 first, IteratorType1 last,
+                            IteratorType2 s_first, IteratorType2 s_last) {
+  return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto find_first_of(const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType1, Properties1...>& view,
+                   const ::Kokkos::View<DataType2, Properties2...>& s_view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex,
+                                  KE::begin(view), KE::end(view),
+                                  KE::begin(s_view), KE::end(s_view));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto find_first_of(const std::string& label, const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType1, Properties1...>& view,
+                   const ::Kokkos::View<DataType2, Properties2...>& s_view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view),
+                                  KE::begin(s_view), KE::end(s_view));
+}
+
+// overload set 2: binary predicate passed
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first,
+                            IteratorType1 last, IteratorType2 s_first,
+                            IteratorType2 s_last,
+                            const BinaryPredicateType& pred) {
+  return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default",
+                                  ex, first, last, s_first, s_last, pred);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex,
+                            IteratorType1 first, IteratorType1 last,
+                            IteratorType2 s_first, IteratorType2 s_last,
+                            const BinaryPredicateType& pred) {
+  return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last,
+                                  pred);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+auto find_first_of(const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType1, Properties1...>& view,
+                   const ::Kokkos::View<DataType2, Properties2...>& s_view,
+                   const BinaryPredicateType& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex,
+                                  KE::begin(view), KE::end(view),
+                                  KE::begin(s_view), KE::end(s_view), pred);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+auto find_first_of(const std::string& label, const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType1, Properties1...>& view,
+                   const ::Kokkos::View<DataType2, Properties2...>& s_view,
+                   const BinaryPredicateType& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view),
+                                  KE::begin(s_view), KE::end(s_view), pred);
+}
+
+// ----------------------------------
+// search_n
+// ----------------------------------
+// overload set 1: no binary predicate passed
+template <class ExecutionSpace, class IteratorType, class SizeType,
+          class ValueType>
+IteratorType search_n(const ExecutionSpace& ex, IteratorType first,
+                      IteratorType last, SizeType count,
+                      const ValueType& value) {
+  return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first,
+                             last, count, value);
+}
+
+template <class ExecutionSpace, class IteratorType, class SizeType,
+          class ValueType>
+IteratorType search_n(const std::string& label, const ExecutionSpace& ex,
+                      IteratorType first, IteratorType last, SizeType count,
+                      const ValueType& value) {
+  return Impl::search_n_impl(label, ex, first, last, count, value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class SizeType, class ValueType>
+auto search_n(const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType, Properties...>& view,
+              SizeType count, const ValueType& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex,
+                             KE::begin(view), KE::end(view), count, value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class SizeType, class ValueType>
+auto search_n(const std::string& label, const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType, Properties...>& view,
+              SizeType count, const ValueType& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count,
+                             value);
+}
+
+// overload set 2: binary predicate passed
+template <class ExecutionSpace, class IteratorType, class SizeType,
+          class ValueType, class BinaryPredicateType>
+IteratorType search_n(const ExecutionSpace& ex, IteratorType first,
+                      IteratorType last, SizeType count, const ValueType& value,
+                      const BinaryPredicateType& pred) {
+  return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first,
+                             last, count, value, pred);
+}
+
+template <class ExecutionSpace, class IteratorType, class SizeType,
+          class ValueType, class BinaryPredicateType>
+IteratorType search_n(const std::string& label, const ExecutionSpace& ex,
+                      IteratorType first, IteratorType last, SizeType count,
+                      const ValueType& value, const BinaryPredicateType& pred) {
+  return Impl::search_n_impl(label, ex, first, last, count, value, pred);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class SizeType, class ValueType, class BinaryPredicateType>
+auto search_n(const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType, Properties...>& view,
+              SizeType count, const ValueType& value,
+              const BinaryPredicateType& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex,
+                             KE::begin(view), KE::end(view), count, value,
+                             pred);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class SizeType, class ValueType, class BinaryPredicateType>
+auto search_n(const std::string& label, const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType, Properties...>& view,
+              SizeType count, const ValueType& value,
+              const BinaryPredicateType& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count,
+                             value, pred);
+}
+
+// ----------------------------------
+// find_end
+// ----------------------------------
+// overload set 1: no binary predicate passed
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first,
+                       IteratorType1 last, IteratorType2 s_first,
+                       IteratorType2 s_last) {
+  return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first,
+                             last, s_first, s_last);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex,
+                       IteratorType1 first, IteratorType1 last,
+                       IteratorType2 s_first, IteratorType2 s_last) {
+  return Impl::find_end_impl(label, ex, first, last, s_first, s_last);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto find_end(const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType1, Properties1...>& view,
+              const ::Kokkos::View<DataType2, Properties2...>& s_view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex,
+                             KE::begin(view), KE::end(view), KE::begin(s_view),
+                             KE::end(s_view));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto find_end(const std::string& label, const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType1, Properties1...>& view,
+              const ::Kokkos::View<DataType2, Properties2...>& s_view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view),
+                             KE::begin(s_view), KE::end(s_view));
+}
+
+// overload set 2: binary predicate passed
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first,
+                       IteratorType1 last, IteratorType2 s_first,
+                       IteratorType2 s_last, const BinaryPredicateType& pred) {
+  return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first,
+                             last, s_first, s_last, pred);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class BinaryPredicateType>
+IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex,
+                       IteratorType1 first, IteratorType1 last,
+                       IteratorType2 s_first, IteratorType2 s_last,
+                       const BinaryPredicateType& pred) {
+  return Impl::find_end_impl(label, ex, first, last, s_first, s_last, pred);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+auto find_end(const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType1, Properties1...>& view,
+              const ::Kokkos::View<DataType2, Properties2...>& s_view,
+              const BinaryPredicateType& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex,
+                             KE::begin(view), KE::end(view), KE::begin(s_view),
+                             KE::end(s_view), pred);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicateType>
+auto find_end(const std::string& label, const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType1, Properties1...>& view,
+              const ::Kokkos::View<DataType2, Properties2...>& s_view,
+              const BinaryPredicateType& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view),
+                             KE::begin(s_view), KE::end(s_view), pred);
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Numeric.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Numeric.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..793927e99af23326d5c882e894a4287f06ee1004
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Numeric.hpp
@@ -0,0 +1,59 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_NUMERIC_ALL_HPP
+#define KOKKOS_STD_NUMERIC_ALL_HPP
+
+#include "./numeric/Kokkos_AdjacentDifference.hpp"
+
+// contains exclusive_scan, transform_exclusive_scan
+#include "./numeric/Kokkos_ExclusiveScan.hpp"
+
+// contains inclusive_scan, transform_inclusive_scan
+#include "./numeric/Kokkos_InclusiveScan.hpp"
+
+#include "./numeric/Kokkos_Reduce.hpp"
+#include "./numeric/Kokkos_TransformReduce.hpp"
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitioningOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitioningOperations.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9806084fc0e53d45d42e3f88d6ccf95a1fcc5748
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitioningOperations.hpp
@@ -0,0 +1,491 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_PARTITIONING_OPERATIONS_HPP
+#define KOKKOS_STD_PARTITIONING_OPERATIONS_HPP
+
+#include <Kokkos_Core.hpp>
+#include "Kokkos_BeginEnd.hpp"
+#include "Kokkos_Constraints.hpp"
+#include "Kokkos_ModifyingOperations.hpp"
+#include "Kokkos_NonModifyingSequenceOperations.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+// -------------------------
+//
+// functors
+//
+// -------------------------
+
+template <class IteratorType, class ReducerType, class PredicateType>
+struct StdIsPartitionedFunctor {
+  using red_value_type = typename ReducerType::value_type;
+  using index_type     = typename IteratorType::difference_type;
+
+  IteratorType m_first;
+  ReducerType m_reducer;
+  PredicateType m_p;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i, red_value_type& redValue) const {
+    const auto predicate_value = m_p(m_first[i]);
+    constexpr index_type m_red_id_min =
+        ::Kokkos::reduction_identity<index_type>::min();
+    constexpr index_type m_red_id_max =
+        ::Kokkos::reduction_identity<index_type>::max();
+    auto rv = predicate_value ? red_value_type{i, m_red_id_min}
+                              : red_value_type{m_red_id_max, i};
+
+    m_reducer.join(redValue, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdIsPartitionedFunctor(IteratorType first, ReducerType reducer,
+                          PredicateType p)
+      : m_first(std::move(first)),
+        m_reducer(std::move(reducer)),
+        m_p(std::move(p)) {}
+};
+
+template <class IteratorType, class ReducerType, class PredicateType>
+struct StdPartitionPointFunctor {
+  using red_value_type = typename ReducerType::value_type;
+  using index_type     = typename IteratorType::difference_type;
+
+  IteratorType m_first;
+  ReducerType m_reducer;
+  PredicateType m_p;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i, red_value_type& redValue) const {
+    const auto predicate_value = m_p(m_first[i]);
+    auto rv =
+        predicate_value
+            ? red_value_type{::Kokkos::reduction_identity<index_type>::min()}
+            : red_value_type{i};
+    m_reducer.join(redValue, rv);
+  }
+
+  KOKKOS_FUNCTION
+  StdPartitionPointFunctor(IteratorType first, ReducerType reducer,
+                           PredicateType p)
+      : m_first(std::move(first)),
+        m_reducer(std::move(reducer)),
+        m_p(std::move(p)) {}
+};
+
+template <class ValueType>
+struct StdPartitionCopyScalar {
+  ValueType true_count_;
+  ValueType false_count_;
+
+  // Here we implement the copy assignment operators explicitly for consistency
+  // with how the Scalar structs are implemented inside
+  // Kokkos_Parallel_Reduce.hpp.
+  KOKKOS_FUNCTION
+  void operator=(const StdPartitionCopyScalar& other) {
+    true_count_  = other.true_count_;
+    false_count_ = other.false_count_;
+  }
+
+  KOKKOS_FUNCTION
+  void operator=(const volatile StdPartitionCopyScalar& other) volatile {
+    true_count_  = other.true_count_;
+    false_count_ = other.false_count_;
+  }
+
+  // this is needed for
+  // OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp:699:21: error: no viable
+  // overloaded '=' m_returnvalue = 0;
+  //
+  KOKKOS_FUNCTION
+  void operator=(const ValueType value) {
+    true_count_  = value;
+    false_count_ = value;
+  }
+};
+
+template <class IndexType, class FirstFrom, class FirstDestTrue,
+          class FirstDestFalse, class PredType>
+struct StdPartitionCopyFunctor {
+  using value_type = StdPartitionCopyScalar<IndexType>;
+
+  FirstFrom m_first_from;
+  FirstDestTrue m_first_dest_true;
+  FirstDestFalse m_first_dest_false;
+  PredType m_pred;
+
+  KOKKOS_FUNCTION
+  StdPartitionCopyFunctor(FirstFrom first_from, FirstDestTrue first_dest_true,
+                          FirstDestFalse first_dest_false, PredType pred)
+      : m_first_from(std::move(first_from)),
+        m_first_dest_true(std::move(first_dest_true)),
+        m_first_dest_false(std::move(first_dest_false)),
+        m_pred(std::move(pred)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, value_type& update,
+                  const bool final_pass) const {
+    const auto& myval = m_first_from[i];
+    if (final_pass) {
+      if (m_pred(myval)) {
+        m_first_dest_true[update.true_count_] = myval;
+      } else {
+        m_first_dest_false[update.false_count_] = myval;
+      }
+    }
+
+    if (m_pred(myval)) {
+      update.true_count_ += 1;
+    } else {
+      update.false_count_ += 1;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  void init(value_type& update) const {
+    update.true_count_  = 0;
+    update.false_count_ = 0;
+  }
+
+  KOKKOS_FUNCTION
+  void join(volatile value_type& update,
+            volatile const value_type& input) const {
+    update.true_count_ += input.true_count_;
+    update.false_count_ += input.false_count_;
+  }
+};
+
+// ------------------------------------------
+// is_partitioned_impl
+// ------------------------------------------
+
+template <class ExecutionSpace, class IteratorType, class PredicateType>
+bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex,
+                         IteratorType first, IteratorType last,
+                         PredicateType pred) {
+  // true if all elements in the range [first, last) that satisfy
+  // the predicate "pred" appear before all elements that don't.
+  // Also returns true if [first, last) is empty.
+  // also true if all elements satisfy the predicate.
+
+  // we implement it by finding:
+  // - the max location where predicate is true  (max_loc_true)
+  // - the min location where predicate is false (min_loc_false)
+  // so the range is partitioned if max_loc_true < (min_loc_false)
+
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  // trivial case
+  if (first == last) {
+    return true;
+  }
+
+  // aliases
+  using index_type           = typename IteratorType::difference_type;
+  using reducer_type         = StdIsPartitioned<index_type>;
+  using reduction_value_type = typename reducer_type::value_type;
+  using func_t =
+      StdIsPartitionedFunctor<IteratorType, reducer_type, PredicateType>;
+
+  // run
+  reduction_value_type red_result;
+  reducer_type reducer(red_result);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            func_t(first, reducer, pred), reducer);
+
+  // fence not needed because reducing into scalar
+
+  // decide and return
+  constexpr index_type red_id_min =
+      ::Kokkos::reduction_identity<index_type>::min();
+  constexpr index_type red_id_max =
+      ::Kokkos::reduction_identity<index_type>::max();
+
+  if (red_result.max_loc_true != red_id_max &&
+      red_result.min_loc_false != red_id_min) {
+    return red_result.max_loc_true < red_result.min_loc_false;
+  } else if (first + red_result.max_loc_true == --last) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// ------------------------------------------
+// partition_point_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class PredicateType>
+IteratorType partition_point_impl(const std::string& label,
+                                  const ExecutionSpace& ex, IteratorType first,
+                                  IteratorType last, PredicateType pred) {
+  // locates the end of the first partition, that is, the first
+  // element that does not satisfy p or last if all elements satisfy p.
+  // Implementation below finds the first location where p is false.
+
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  if (first == last) {
+    return first;
+  }
+
+  // aliases
+  using index_type           = typename IteratorType::difference_type;
+  using reducer_type         = StdPartitionPoint<index_type>;
+  using reduction_value_type = typename reducer_type::value_type;
+  using func_t =
+      StdPartitionPointFunctor<IteratorType, reducer_type, PredicateType>;
+
+  // run
+  reduction_value_type red_result;
+  reducer_type reducer(red_result);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            func_t(first, reducer, pred), reducer);
+
+  // fence not needed because reducing into scalar
+
+  // decide and return
+  if (red_result.min_loc_false ==
+      ::Kokkos::reduction_identity<index_type>::min()) {
+    // if all elements are true, return last
+    return last;
+  } else {
+    return first + red_result.min_loc_false;
+  }
+}
+
+// ------------------------------------------
+// partition_copy_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorTrueType, class OutputIteratorFalseType,
+          class PredicateType>
+::Kokkos::pair<OutputIteratorTrueType, OutputIteratorFalseType>
+partition_copy_impl(const std::string& label, const ExecutionSpace& ex,
+                    InputIteratorType from_first, InputIteratorType from_last,
+                    OutputIteratorTrueType to_first_true,
+                    OutputIteratorFalseType to_first_false,
+                    PredicateType pred) {
+  // impl uses a scan, this is similar how we implemented copy_if
+
+  // checks
+  Impl::static_assert_random_access_and_accessible(
+      ex, from_first, to_first_true, to_first_false);
+  Impl::static_assert_iterators_have_matching_difference_type(
+      from_first, to_first_true, to_first_false);
+  Impl::expect_valid_range(from_first, from_last);
+
+  if (from_first == from_last) {
+    return {to_first_true, to_first_false};
+  }
+
+  // aliases
+  using index_type = typename InputIteratorType::difference_type;
+  using func_type =
+      StdPartitionCopyFunctor<index_type, InputIteratorType,
+                              OutputIteratorTrueType, OutputIteratorFalseType,
+                              PredicateType>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(from_first, from_last);
+  typename func_type::value_type counts{0, 0};
+  ::Kokkos::parallel_scan(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_type(from_first, to_first_true, to_first_false, pred), counts);
+
+  // fence not needed here because of the scan into counts
+
+  return {to_first_true + counts.true_count_,
+          to_first_false + counts.false_count_};
+}
+
+}  // end namespace Impl
+
+// ----------------------
+// is_partitioned public API
+// ----------------------
+template <class ExecutionSpace, class IteratorType, class PredicateType>
+bool is_partitioned(const ExecutionSpace& ex, IteratorType first,
+                    IteratorType last, PredicateType p) {
+  return Impl::is_partitioned_impl(
+      "Kokkos::is_partitioned_iterator_api_default", ex, first, last,
+      std::move(p));
+}
+
+template <class ExecutionSpace, class IteratorType, class PredicateType>
+bool is_partitioned(const std::string& label, const ExecutionSpace& ex,
+                    IteratorType first, IteratorType last, PredicateType p) {
+  return Impl::is_partitioned_impl(label, ex, first, last, std::move(p));
+}
+
+template <class ExecutionSpace, class PredicateType, class DataType,
+          class... Properties>
+bool is_partitioned(const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType, Properties...>& v,
+                    PredicateType p) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  return Impl::is_partitioned_impl("Kokkos::is_partitioned_view_api_default",
+                                   ex, cbegin(v), cend(v), std::move(p));
+}
+
+template <class ExecutionSpace, class PredicateType, class DataType,
+          class... Properties>
+bool is_partitioned(const std::string& label, const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType, Properties...>& v,
+                    PredicateType p) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+
+  return Impl::is_partitioned_impl(label, ex, cbegin(v), cend(v), std::move(p));
+}
+
+// ----------------------
+// partition_copy
+// ----------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorTrueType, class OutputIteratorFalseType,
+          class PredicateType>
+::Kokkos::pair<OutputIteratorTrueType, OutputIteratorFalseType> partition_copy(
+    const ExecutionSpace& ex, InputIteratorType from_first,
+    InputIteratorType from_last, OutputIteratorTrueType to_first_true,
+    OutputIteratorFalseType to_first_false, PredicateType p) {
+  return Impl::partition_copy_impl(
+      "Kokkos::partition_copy_iterator_api_default", ex, from_first, from_last,
+      to_first_true, to_first_false, std::move(p));
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorTrueType, class OutputIteratorFalseType,
+          class PredicateType>
+::Kokkos::pair<OutputIteratorTrueType, OutputIteratorFalseType> partition_copy(
+    const std::string& label, const ExecutionSpace& ex,
+    InputIteratorType from_first, InputIteratorType from_last,
+    OutputIteratorTrueType to_first_true,
+    OutputIteratorFalseType to_first_false, PredicateType p) {
+  return Impl::partition_copy_impl(label, ex, from_first, from_last,
+                                   to_first_true, to_first_false, std::move(p));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class DataType3,
+          class... Properties3, class PredicateType>
+auto partition_copy(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest_true,
+    const ::Kokkos::View<DataType3, Properties3...>& view_dest_false,
+    PredicateType p) {
+  return Impl::partition_copy_impl("Kokkos::partition_copy_view_api_default",
+                                   ex, cbegin(view_from), cend(view_from),
+                                   begin(view_dest_true),
+                                   begin(view_dest_false), std::move(p));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class DataType3,
+          class... Properties3, class PredicateType>
+auto partition_copy(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest_true,
+    const ::Kokkos::View<DataType3, Properties3...>& view_dest_false,
+    PredicateType p) {
+  return Impl::partition_copy_impl(label, ex, cbegin(view_from),
+                                   cend(view_from), begin(view_dest_true),
+                                   begin(view_dest_false), std::move(p));
+}
+
+// ----------------------
+// partition_point
+// ----------------------
+template <class ExecutionSpace, class IteratorType, class UnaryPredicate>
+IteratorType partition_point(const ExecutionSpace& ex, IteratorType first,
+                             IteratorType last, UnaryPredicate p) {
+  return Impl::partition_point_impl(
+      "Kokkos::partitioned_point_iterator_api_default", ex, first, last,
+      std::move(p));
+}
+
+template <class ExecutionSpace, class IteratorType, class UnaryPredicate>
+IteratorType partition_point(const std::string& label, const ExecutionSpace& ex,
+                             IteratorType first, IteratorType last,
+                             UnaryPredicate p) {
+  return Impl::partition_point_impl(label, ex, first, last, std::move(p));
+}
+
+template <class ExecutionSpace, class UnaryPredicate, class DataType,
+          class... Properties>
+auto partition_point(const std::string& label, const ExecutionSpace& ex,
+                     const ::Kokkos::View<DataType, Properties...>& v,
+                     UnaryPredicate p) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  return Impl::partition_point_impl(label, ex, begin(v), end(v), std::move(p));
+}
+
+template <class ExecutionSpace, class UnaryPredicate, class DataType,
+          class... Properties>
+auto partition_point(const ExecutionSpace& ex,
+                     const ::Kokkos::View<DataType, Properties...>& v,
+                     UnaryPredicate p) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
+  return Impl::partition_point_impl("Kokkos::partition_point_view_api_default",
+                                    ex, begin(v), end(v), std::move(p));
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RandomAccessIterator.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RandomAccessIterator.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..01c0d76720118198e6ee9a14bdd51ea5bdfcd229
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RandomAccessIterator.hpp
@@ -0,0 +1,194 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_RANDOM_ACCESS_ITERATOR_HPP
+#define KOKKOS_RANDOM_ACCESS_ITERATOR_HPP
+
+#include <iterator>
+#include <Kokkos_Macros.hpp>
+#include <Kokkos_View.hpp>
+#include "Kokkos_Constraints.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template <class T>
+class RandomAccessIterator;
+
+template <class DataType, class... Args>
+class RandomAccessIterator< ::Kokkos::View<DataType, Args...> >
+    : public std::iterator<
+          std::random_access_iterator_tag,
+          typename ::Kokkos::View<DataType, Args...>::value_type, ptrdiff_t,
+          typename ::Kokkos::View<DataType, Args...>::pointer_type,
+          typename ::Kokkos::View<DataType, Args...>::reference_type> {
+ public:
+  using view_type       = ::Kokkos::View<DataType, Args...>;
+  using iterator_type   = RandomAccessIterator<view_type>;
+  using difference_type = ptrdiff_t;
+  using value_type      = typename view_type::value_type;
+  using reference       = typename view_type::reference_type;
+
+  static_assert(view_type::rank == 1 &&
+                    (std::is_same<typename view_type::traits::array_layout,
+                                  Kokkos::LayoutLeft>::value ||
+                     std::is_same<typename view_type::traits::array_layout,
+                                  Kokkos::LayoutRight>::value ||
+                     std::is_same<typename view_type::traits::array_layout,
+                                  Kokkos::LayoutStride>::value),
+                "RandomAccessIterator only supports 1D Views with LayoutLeft, "
+                "LayoutRight, LayoutStride.");
+
+  KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default;
+
+  explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view)
+      : m_view(view) {}
+  explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view,
+                                                ptrdiff_t current_index)
+      : m_view(view), m_current_index(current_index) {}
+
+  KOKKOS_FUNCTION
+  iterator_type& operator++() {
+    ++m_current_index;
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  iterator_type operator++(int) {
+    auto tmp = *this;
+    ++*this;
+    return tmp;
+  }
+
+  KOKKOS_FUNCTION
+  iterator_type& operator--() {
+    --m_current_index;
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  iterator_type operator--(int) {
+    auto tmp = *this;
+    --*this;
+    return tmp;
+  }
+
+  KOKKOS_FUNCTION
+  reference operator[](difference_type n) const {
+    return m_view(m_current_index + n);
+  }
+
+  KOKKOS_FUNCTION
+  iterator_type& operator+=(difference_type n) {
+    m_current_index += n;
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  iterator_type& operator-=(difference_type n) {
+    m_current_index -= n;
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  iterator_type operator+(difference_type n) const {
+    return iterator_type(m_view, m_current_index + n);
+  }
+
+  KOKKOS_FUNCTION
+  iterator_type operator-(difference_type n) const {
+    return iterator_type(m_view, m_current_index - n);
+  }
+
+  KOKKOS_FUNCTION
+  difference_type operator-(iterator_type it) const {
+    return m_current_index - it.m_current_index;
+  }
+
+  KOKKOS_FUNCTION
+  bool operator==(iterator_type other) const {
+    return m_current_index == other.m_current_index &&
+           m_view.data() == other.m_view.data();
+  }
+
+  KOKKOS_FUNCTION
+  bool operator!=(iterator_type other) const {
+    return m_current_index != other.m_current_index ||
+           m_view.data() != other.m_view.data();
+  }
+
+  KOKKOS_FUNCTION
+  bool operator<(iterator_type other) const {
+    return m_current_index < other.m_current_index;
+  }
+
+  KOKKOS_FUNCTION
+  bool operator<=(iterator_type other) const {
+    return m_current_index <= other.m_current_index;
+  }
+
+  KOKKOS_FUNCTION
+  bool operator>(iterator_type other) const {
+    return m_current_index > other.m_current_index;
+  }
+
+  KOKKOS_FUNCTION
+  bool operator>=(iterator_type other) const {
+    return m_current_index >= other.m_current_index;
+  }
+
+  KOKKOS_FUNCTION
+  reference operator*() const { return m_view(m_current_index); }
+
+ private:
+  view_type m_view;
+  ptrdiff_t m_current_index = 0;
+};
+
+}  // namespace Impl
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..dd529a25c6508ce9d07a90c77a6ec6fd5beef7d0
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp
@@ -0,0 +1,118 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_ReducerWithArbitraryJoinerNoNeutralElement_hpp_
+#define KOKKOS_STD_ReducerWithArbitraryJoinerNoNeutralElement_hpp_
+
+#include <Kokkos_Core.hpp>
+#include "Kokkos_ValueWrapperForNoNeutralElement.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+// This reducer is here and not where all other reducers are
+// because it is inside Impl and also because it would not work
+// for OpenMPTarget backend. We can move this later.
+
+template <class Scalar, class JoinerType, class Space = HostSpace>
+struct ReducerWithArbitraryJoinerNoNeutralElement {
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+
+ public:
+  // Required
+  using reducer =
+      ReducerWithArbitraryJoinerNoNeutralElement<Scalar, JoinerType, Space>;
+  using value_type = ValueWrapperForNoNeutralElement<scalar_type>;
+
+  using result_view_type = Kokkos::View<value_type, Space>;
+
+ private:
+  JoinerType m_joiner;
+  result_view_type m_value;
+  bool m_references_scalar_v;
+
+ public:
+  KOKKOS_FUNCTION
+  ReducerWithArbitraryJoinerNoNeutralElement(value_type& value_,
+                                             JoinerType joiner_)
+      : m_joiner(joiner_), m_value(&value_), m_references_scalar_v(true) {}
+
+  KOKKOS_FUNCTION
+  ReducerWithArbitraryJoinerNoNeutralElement(const result_view_type& value_,
+                                             JoinerType joiner_)
+      : m_joiner(joiner_), m_value(value_), m_references_scalar_v(false) {}
+
+  // Required
+  KOKKOS_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    dest.val = m_joiner(dest.val, src.val);
+  }
+
+  KOKKOS_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    dest.val = m_joiner(dest.val, src.val);
+  }
+
+  KOKKOS_FUNCTION
+  void init(value_type& val) const {
+    // I cannot call reduction_identity, so need to default this
+    val = {};
+  }
+
+  KOKKOS_FUNCTION
+  value_type& reference() const { return *m_value.data(); }
+
+  KOKKOS_FUNCTION
+  result_view_type view() const { return m_value; }
+
+  KOKKOS_FUNCTION
+  bool references_scalar() const { return m_references_scalar_v; }
+};
+
+}  // namespace Impl
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SortingOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SortingOperations.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bcc38fb38cb4b905e7ece4538ad18009e62a6884
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SortingOperations.hpp
@@ -0,0 +1,378 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_SORTING_OPERATIONS_HPP
+#define KOKKOS_STD_SORTING_OPERATIONS_HPP
+
+#include <Kokkos_Core.hpp>
+#include "Kokkos_BeginEnd.hpp"
+#include "Kokkos_Constraints.hpp"
+#include "Kokkos_NonModifyingSequenceOperations.hpp"
+#include "Kokkos_HelperPredicates.hpp"
+#include <string>
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+// ------------------
+//
+// functors
+//
+// ------------------
+
+template <class IteratorType, class IndicatorViewType, class ComparatorType>
+struct StdIsSortedUntilFunctor {
+  using index_type = typename IteratorType::difference_type;
+  IteratorType m_first;
+  IndicatorViewType m_indicator;
+  ComparatorType m_comparator;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i, int& update, const bool final) const {
+    const auto& val_i   = m_first[i];
+    const auto& val_ip1 = m_first[i + 1];
+
+    if (m_comparator(val_ip1, val_i)) {
+      ++update;
+    }
+
+    if (final) {
+      m_indicator(i) = update;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdIsSortedUntilFunctor(IteratorType _first1, IndicatorViewType indicator,
+                          ComparatorType comparator)
+      : m_first(std::move(_first1)),
+        m_indicator(std::move(indicator)),
+        m_comparator(std::move(comparator)) {}
+};
+
+template <class IteratorType, class ComparatorType>
+struct StdIsSortedFunctor {
+  using index_type = typename IteratorType::difference_type;
+  IteratorType m_first;
+  ComparatorType m_comparator;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i, std::size_t& update) const {
+    const auto& val_i   = m_first[i];
+    const auto& val_ip1 = m_first[i + 1];
+
+    if (m_comparator(val_ip1, val_i)) {
+      ++update;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdIsSortedFunctor(IteratorType _first1, ComparatorType comparator)
+      : m_first(std::move(_first1)), m_comparator(std::move(comparator)) {}
+};
+
+// ------------------------------------------
+// is_sorted_until_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+IteratorType is_sorted_until_impl(const std::string& label,
+                                  const ExecutionSpace& ex, IteratorType first,
+                                  IteratorType last, ComparatorType comp) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+
+  // trivial case
+  if (num_elements <= 1) {
+    return last;
+  }
+
+  /*
+    use scan and a helper "indicator" view
+    such that we scan the data and fill the indicator with
+    partial sum that is always 0 unless we find a pair that
+    breaks the sorting, so in that case the indicator will
+    have a 1 starting at the location where the sorting breaks.
+    So finding that 1 means finding the location we want.
+   */
+
+  // aliases
+  using indicator_value_type = std::size_t;
+  using indicator_view_type =
+      ::Kokkos::View<indicator_value_type*, ExecutionSpace>;
+  using functor_type =
+      StdIsSortedUntilFunctor<IteratorType, indicator_view_type,
+                              ComparatorType>;
+
+  // do scan
+  // use num_elements-1 because each index handles i and i+1
+  const auto num_elements_minus_one = num_elements - 1;
+  indicator_view_type indicator("is_sorted_until_indicator_helper",
+                                num_elements_minus_one);
+  ::Kokkos::parallel_scan(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_minus_one),
+      functor_type(first, indicator, std::move(comp)));
+
+  // try to find the first sentinel value, which indicates
+  // where the sorting condition breaks
+  namespace KE                                  = ::Kokkos::Experimental;
+  constexpr indicator_value_type sentinel_value = 1;
+  auto r =
+      KE::find(ex, KE::cbegin(indicator), KE::cend(indicator), sentinel_value);
+  const auto shift = r - ::Kokkos::Experimental::cbegin(indicator);
+
+  return first + (shift + 1);
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType is_sorted_until_impl(const std::string& label,
+                                  const ExecutionSpace& ex, IteratorType first,
+                                  IteratorType last) {
+  using value_type = typename IteratorType::value_type;
+  using pred_t     = Impl::StdAlgoLessThanBinaryPredicate<value_type>;
+  return is_sorted_until_impl(label, ex, first, last, pred_t());
+}
+
+// ------------------------------------------
+// is_sorted_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex,
+                    IteratorType first, IteratorType last,
+                    ComparatorType comp) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  if (num_elements <= 1) {
+    return true;
+  }
+
+  // use num_elements-1 because each index handles i and i+1
+  const auto num_elements_minus_one = num_elements - 1;
+  using functor_type = StdIsSortedFunctor<IteratorType, ComparatorType>;
+
+  // result is incremented by one if sorting breaks at index i
+  std::size_t result = 0;
+  ::Kokkos::parallel_reduce(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_minus_one),
+      functor_type(first, std::move(comp)), result);
+
+  return result == 0;
+}
+
+template <class ExecutionSpace, class IteratorType>
+bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex,
+                    IteratorType first, IteratorType last) {
+  using value_type = typename IteratorType::value_type;
+  using pred_t     = Impl::StdAlgoLessThanBinaryPredicate<value_type>;
+  return is_sorted_impl(label, ex, first, last, pred_t());
+}
+
+}  // namespace Impl
+
+// ----------------------------------
+// is_sorted_until public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType>
+IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first,
+                             IteratorType last) {
+  return Impl::is_sorted_until_impl(
+      "Kokkos::is_sorted_until_iterator_api_default", ex, first, last);
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex,
+                             IteratorType first, IteratorType last) {
+  return Impl::is_sorted_until_impl(label, ex, first, last);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto is_sorted_until(const ExecutionSpace& ex,
+                     const ::Kokkos::View<DataType, Properties...>& view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default",
+                                    ex, KE::begin(view), KE::end(view));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto is_sorted_until(const std::string& label, const ExecutionSpace& ex,
+                     const ::Kokkos::View<DataType, Properties...>& view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view));
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first,
+                             IteratorType last, ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  return Impl::is_sorted_until_impl(
+      "Kokkos::is_sorted_until_iterator_api_default", ex, first, last,
+      std::move(comp));
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex,
+                             IteratorType first, IteratorType last,
+                             ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::is_sorted_until_impl(label, ex, first, last, std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ComparatorType>
+auto is_sorted_until(const ExecutionSpace& ex,
+                     const ::Kokkos::View<DataType, Properties...>& view,
+                     ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default",
+                                    ex, KE::begin(view), KE::end(view),
+                                    std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ComparatorType>
+auto is_sorted_until(const std::string& label, const ExecutionSpace& ex,
+                     const ::Kokkos::View<DataType, Properties...>& view,
+                     ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view),
+                                    std::move(comp));
+}
+
+// ----------------------------------
+// is_sorted public API
+// ----------------------------------
+template <class ExecutionSpace, class IteratorType>
+bool is_sorted(const ExecutionSpace& ex, IteratorType first,
+               IteratorType last) {
+  return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex,
+                              first, last);
+}
+
+template <class ExecutionSpace, class IteratorType>
+bool is_sorted(const std::string& label, const ExecutionSpace& ex,
+               IteratorType first, IteratorType last) {
+  return Impl::is_sorted_impl(label, ex, first, last);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+bool is_sorted(const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType, Properties...>& view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex,
+                              KE::cbegin(view), KE::cend(view));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+bool is_sorted(const std::string& label, const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType, Properties...>& view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view));
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last,
+               ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex,
+                              first, last, std::move(comp));
+}
+
+template <class ExecutionSpace, class IteratorType, class ComparatorType>
+bool is_sorted(const std::string& label, const ExecutionSpace& ex,
+               IteratorType first, IteratorType last, ComparatorType comp) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  return Impl::is_sorted_impl(label, ex, first, last, std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ComparatorType>
+bool is_sorted(const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType, Properties...>& view,
+               ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex,
+                              KE::cbegin(view), KE::cend(view),
+                              std::move(comp));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ComparatorType>
+bool is_sorted(const std::string& label, const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType, Properties...>& view,
+               ComparatorType comp) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view),
+                              std::move(comp));
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ValueWrapperForNoNeutralElement.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ValueWrapperForNoNeutralElement.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..019a0049db1f1f60ad3dfcb28ed7d2cff80a88f2
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ValueWrapperForNoNeutralElement.hpp
@@ -0,0 +1,78 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_VALUE_WRAPPER_FOR_NO_NEUTRAL_ELEMENT_HPP
+#define KOKKOS_STD_VALUE_WRAPPER_FOR_NO_NEUTRAL_ELEMENT_HPP
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+//
+// scalar wrapper used for reductions and scans
+// when we don't have neutral element
+//
+template <class Scalar>
+struct ValueWrapperForNoNeutralElement {
+  Scalar val;
+  bool is_initial = true;
+
+  KOKKOS_FUNCTION
+  void operator=(const ValueWrapperForNoNeutralElement& rhs) {
+    val        = rhs.val;
+    is_initial = rhs.is_initial;
+  }
+
+  KOKKOS_FUNCTION
+  void operator=(const volatile ValueWrapperForNoNeutralElement& rhs) volatile {
+    val        = rhs.val;
+    is_initial = rhs.is_initial;
+  }
+};
+
+}  // namespace Impl
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet1.hpp b/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet1.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b9b1b96aea466b9190d72f778d40ed429334904f
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet1.hpp
@@ -0,0 +1,1285 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_MODIFYING_SEQUENCE_OPERATIONS_SET1_HPP
+#define KOKKOS_MODIFYING_SEQUENCE_OPERATIONS_SET1_HPP
+
+#include <Kokkos_Core.hpp>
+#include "../Kokkos_BeginEnd.hpp"
+#include "../Kokkos_Constraints.hpp"
+#include "../Kokkos_ModifyingOperations.hpp"
+#include "../Kokkos_NonModifyingSequenceOperations.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+//---------------------------
+//
+// functors
+//
+//---------------------------
+template <class IndexType, class InputIterator, class OutputIterator>
+struct StdCopyFunctor {
+  InputIterator m_first;
+  OutputIterator m_dest_first;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const { m_dest_first[i] = m_first[i]; }
+
+  KOKKOS_FUNCTION
+  StdCopyFunctor(InputIterator _first, OutputIterator _dest_first)
+      : m_first(std::move(_first)), m_dest_first(std::move(_dest_first)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2>
+struct StdCopyBackwardFunctor {
+  static_assert(std::is_signed<IndexType>::value,
+                "Kokkos: StdCopyBackwardFunctor requires signed index type");
+
+  IteratorType1 m_last;
+  IteratorType2 m_dest_last;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const { m_dest_last[-i - 1] = m_last[-i - 1]; }
+
+  KOKKOS_FUNCTION
+  StdCopyBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last)
+      : m_last(std::move(_last)), m_dest_last(std::move(_dest_last)) {}
+};
+
+template <class IndexType, class FirstFrom, class FirstDest, class PredType>
+struct StdCopyIfFunctor {
+  FirstFrom m_first_from;
+  FirstDest m_first_dest;
+  PredType m_pred;
+
+  KOKKOS_FUNCTION
+  StdCopyIfFunctor(FirstFrom first_from, FirstDest first_dest, PredType pred)
+      : m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)),
+        m_pred(std::move(pred)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, IndexType& update,
+                  const bool final_pass) const {
+    const auto& myval = m_first_from[i];
+    if (final_pass) {
+      if (m_pred(myval)) {
+        m_first_dest[update] = myval;
+      }
+    }
+
+    if (m_pred(myval)) {
+      update += 1;
+    }
+  }
+};
+
+template <class InputIterator, class T>
+struct StdFillFunctor {
+  using index_type = typename InputIterator::difference_type;
+  InputIterator m_first;
+  T m_value;
+
+  KOKKOS_FUNCTION
+  void operator()(index_type i) const { m_first[i] = m_value; }
+
+  KOKKOS_FUNCTION
+  StdFillFunctor(InputIterator _first, T _value)
+      : m_first(std::move(_first)), m_value(std::move(_value)) {}
+};
+
+template <class IndexType, class InputIterator, class OutputIterator,
+          class UnaryFunctorType>
+struct StdTransformFunctor {
+  InputIterator m_first;
+  OutputIterator m_d_first;
+  UnaryFunctorType m_unary_op;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const { m_d_first[i] = m_unary_op(m_first[i]); }
+
+  KOKKOS_FUNCTION
+  StdTransformFunctor(InputIterator _first, OutputIterator _m_d_first,
+                      UnaryFunctorType _functor)
+      : m_first(std::move(_first)),
+        m_d_first(std::move(_m_d_first)),
+        m_unary_op(std::move(_functor)) {}
+};
+
+template <class IndexType, class InputIterator1, class InputIterator2,
+          class OutputIterator, class BinaryFunctorType>
+struct StdTransformBinaryFunctor {
+  InputIterator1 m_first1;
+  InputIterator2 m_first2;
+  OutputIterator m_d_first;
+  BinaryFunctorType m_binary_op;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const {
+    m_d_first[i] = m_binary_op(m_first1[i], m_first2[i]);
+  }
+
+  KOKKOS_FUNCTION
+  StdTransformBinaryFunctor(InputIterator1 _first1, InputIterator2 _first2,
+                            OutputIterator _m_d_first,
+                            BinaryFunctorType _functor)
+      : m_first1(std::move(_first1)),
+        m_first2(std::move(_first2)),
+        m_d_first(std::move(_m_d_first)),
+        m_binary_op(std::move(_functor)) {}
+};
+
+template <class IteratorType, class Generator>
+struct StdGenerateFunctor {
+  using index_type = typename IteratorType::difference_type;
+  IteratorType m_first;
+  Generator m_generator;
+
+  KOKKOS_FUNCTION
+  void operator()(index_type i) const { m_first[i] = m_generator(); }
+
+  KOKKOS_FUNCTION
+  StdGenerateFunctor(IteratorType _first, Generator _g)
+      : m_first(std::move(_first)), m_generator(std::move(_g)) {}
+};
+
+template <class InputIterator, class PredicateType, class NewValueType>
+struct StdReplaceIfFunctor {
+  using index_type = typename InputIterator::difference_type;
+
+  InputIterator m_first;
+  PredicateType m_predicate;
+  NewValueType m_new_value;
+
+  KOKKOS_FUNCTION
+  void operator()(index_type i) const {
+    if (m_predicate(m_first[i])) {
+      m_first[i] = m_new_value;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdReplaceIfFunctor(InputIterator first, PredicateType pred,
+                      NewValueType new_value)
+      : m_first(std::move(first)),
+        m_predicate(std::move(pred)),
+        m_new_value(std::move(new_value)) {}
+};
+
+template <class InputIterator, class ValueType>
+struct StdReplaceFunctor {
+  using index_type = typename InputIterator::difference_type;
+  InputIterator m_first;
+  ValueType m_old_value;
+  ValueType m_new_value;
+
+  KOKKOS_FUNCTION
+  void operator()(index_type i) const {
+    if (m_first[i] == m_old_value) {
+      m_first[i] = m_new_value;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdReplaceFunctor(InputIterator first, ValueType old_value,
+                    ValueType new_value)
+      : m_first(std::move(first)),
+        m_old_value(std::move(old_value)),
+        m_new_value(std::move(new_value)) {}
+};
+
+template <class InputIterator, class OutputIterator, class ValueType>
+struct StdReplaceCopyFunctor {
+  using index_type = typename InputIterator::difference_type;
+
+  InputIterator m_first_from;
+  OutputIterator m_first_dest;
+  ValueType m_old_value;
+  ValueType m_new_value;
+
+  KOKKOS_FUNCTION
+  void operator()(index_type i) const {
+    const auto& myvalue_from = m_first_from[i];
+
+    if (myvalue_from == m_old_value) {
+      m_first_dest[i] = m_new_value;
+    } else {
+      m_first_dest[i] = myvalue_from;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdReplaceCopyFunctor(InputIterator first_from, OutputIterator first_dest,
+                        ValueType old_value, ValueType new_value)
+      : m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)),
+        m_old_value(std::move(old_value)),
+        m_new_value(std::move(new_value)) {}
+};
+
+template <class IndexType, class InputIterator, class OutputIterator,
+          class PredicateType, class ValueType>
+struct StdReplaceIfCopyFunctor {
+  InputIterator m_first_from;
+  OutputIterator m_first_dest;
+  PredicateType m_pred;
+  ValueType m_new_value;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const {
+    const auto& myvalue_from = m_first_from[i];
+
+    if (m_pred(myvalue_from)) {
+      m_first_dest[i] = m_new_value;
+    } else {
+      m_first_dest[i] = myvalue_from;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdReplaceIfCopyFunctor(InputIterator first_from, OutputIterator first_dest,
+                          PredicateType pred, ValueType new_value)
+      : m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)),
+        m_pred(std::move(pred)),
+        m_new_value(std::move(new_value)) {}
+};
+
+// ------------------------------------------
+// copy_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex,
+                         InputIterator first, InputIterator last,
+                         OutputIterator d_first) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, d_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, d_first);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using index_type = typename InputIterator::difference_type;
+  using func_t     = StdCopyFunctor<index_type, InputIterator, OutputIterator>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(first, d_first));
+  ex.fence("Kokkos::copy: fence after operation");
+
+  // return
+  return d_first + num_elements;
+}
+
+// ------------------------------------------
+// copy_n_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class Size,
+          class OutputIterator>
+OutputIterator copy_n_impl(const std::string& label, const ExecutionSpace& ex,
+                           InputIterator first_from, Size count,
+                           OutputIterator first_dest) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+
+  if (count > 0) {
+    return copy_impl(label, ex, first_from, first_from + count, first_dest);
+  } else {
+    return first_dest;
+  }
+}
+
+// ------------------------------------------
+// copy_backward_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType2 copy_backward_impl(const std::string& label,
+                                 const ExecutionSpace& ex, IteratorType1 first,
+                                 IteratorType1 last, IteratorType2 d_last) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, d_last);
+  Impl::static_assert_iterators_have_matching_difference_type(first, d_last);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using index_type = typename IteratorType1::difference_type;
+  using func_t =
+      StdCopyBackwardFunctor<index_type, IteratorType1, IteratorType2>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(last, d_last));
+  ex.fence("Kokkos::copy_backward: fence after operation");
+
+  // return
+  return d_last - num_elements;
+}
+
+// ------------------------------------------
+// copy_if_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class PredicateType>
+OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex,
+                            InputIterator first, InputIterator last,
+                            OutputIterator d_first, PredicateType pred) {
+  /*
+    To explain the impl, suppose that our data is:
+
+    | 1 | 1 | 2 | 2 | 3 | -2 | 4 | 4 | 4 | 5 | 7 | -10 |
+
+    and we want to copy only the even entries,
+    We can use an exclusive scan where the "update"
+    is incremented only for the elements that satisfy the predicate.
+    This way, the update allows us to track where in the destination
+    we need to copy the elements:
+
+    In this case, counting only the even entries, the exlusive scan
+    during the final pass would yield:
+
+    | 0 | 0 | 0 | 1 | 2 | 2 | 3 | 4 | 5 | 6 | 6 | 6 |
+              *   *       *   *   *   *           *
+
+    which provides the indexing in the destination where
+    each starred (*) element needs to be copied to since
+    the starred elements are those that satisfy the predicate.
+   */
+
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, d_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, d_first);
+  Impl::expect_valid_range(first, last);
+
+  if (first == last) {
+    return d_first;
+  } else {
+    // aliases
+    using index_type = typename InputIterator::difference_type;
+    using func_type  = StdCopyIfFunctor<index_type, InputIterator,
+                                       OutputIterator, PredicateType>;
+
+    // run
+    const auto num_elements = Kokkos::Experimental::distance(first, last);
+    index_type count        = 0;
+    ::Kokkos::parallel_scan(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            func_type(first, d_first, pred), count);
+
+    // fence not needed because of the scan accumulating into count
+    return d_first + count;
+  }
+}
+
+// ------------------------------------------
+// fill_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class T>
+void fill_impl(const std::string& label, const ExecutionSpace& ex,
+               IteratorType first, IteratorType last, const T& value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         StdFillFunctor<IteratorType, T>(first, value));
+  ex.fence("Kokkos::fill: fence after operation");
+}
+
+template <class ExecutionSpace, class IteratorType, class SizeType, class T>
+IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex,
+                         IteratorType first, SizeType n, const T& value) {
+  auto last = first + n;
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  if (n <= 0) {
+    return first;
+  }
+
+  fill_impl(label, ex, first, last, value);
+  return last;
+}
+
+// ------------------------------------------
+// transform_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class UnaryOperation>
+OutputIterator transform_impl(const std::string& label,
+                              const ExecutionSpace& ex, InputIterator first1,
+                              InputIterator last1, OutputIterator d_first,
+                              UnaryOperation unary_op) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first1, d_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first1, d_first);
+  Impl::expect_valid_range(first1, last1);
+
+  // aliases
+  using index_type = typename InputIterator::difference_type;
+  using func_t = StdTransformFunctor<index_type, InputIterator, OutputIterator,
+                                     UnaryOperation>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first1, last1);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(first1, d_first, unary_op));
+  ex.fence("Kokkos::transform: fence after operation");
+
+  // return
+  return d_first + num_elements;
+}
+
+template <class ExecutionSpace, class InputIterator1, class InputIterator2,
+          class OutputIterator, class BinaryOperation>
+OutputIterator transform_impl(const std::string& label,
+                              const ExecutionSpace& ex, InputIterator1 first1,
+                              InputIterator1 last1, InputIterator2 first2,
+                              OutputIterator d_first,
+                              BinaryOperation binary_op) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first1, first2, d_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first1, first2,
+                                                              d_first);
+  Impl::expect_valid_range(first1, last1);
+
+  // aliases
+  using index_type = typename InputIterator1::difference_type;
+  using func_t =
+      StdTransformBinaryFunctor<index_type, InputIterator1, InputIterator2,
+                                OutputIterator, BinaryOperation>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first1, last1);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(first1, first2, d_first, binary_op));
+  ex.fence("Kokkos::transform: fence after operation");
+  return d_first + num_elements;
+}
+
+// ------------------------------------------
+// generate_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class Generator>
+void generate_impl(const std::string& label, const ExecutionSpace& ex,
+                   IteratorType first, IteratorType last, Generator g) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using func_t = StdGenerateFunctor<IteratorType, Generator>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(first, g));
+  ex.fence("Kokkos::generate: fence after operation");
+}
+
+template <class ExecutionSpace, class IteratorType, class Size, class Generator>
+IteratorType generate_n_impl(const std::string& label, const ExecutionSpace& ex,
+                             IteratorType first, Size count, Generator g) {
+  if (count <= 0) {
+    return first;
+  }
+
+  generate_impl(label, ex, first, first + count, g);
+  return first + count;
+}
+
+// ------------------------------------------
+// replace_if_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class PredicateType,
+          class ValueType>
+void replace_if_impl(const std::string& label, const ExecutionSpace& ex,
+                     IteratorType first, IteratorType last, PredicateType pred,
+                     const ValueType& new_value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using func_t = StdReplaceIfFunctor<IteratorType, PredicateType, ValueType>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(first, std::move(pred), new_value));
+  ex.fence("Kokkos::replace_if: fence after operation");
+}
+
+// ------------------------------------------
+// replace_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class ValueType>
+void replace_impl(const std::string& label, const ExecutionSpace& ex,
+                  IteratorType first, IteratorType last,
+                  const ValueType& old_value, const ValueType& new_value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using func_t = StdReplaceFunctor<IteratorType, ValueType>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(first, old_value, new_value));
+  ex.fence("Kokkos::replace: fence after operation");
+}
+
+// ------------------------------------------
+// replace_copy_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType>
+OutputIteratorType replace_copy_impl(const std::string& label,
+                                     const ExecutionSpace& ex,
+                                     InputIteratorType first_from,
+                                     InputIteratorType last_from,
+                                     OutputIteratorType first_dest,
+                                     const ValueType& old_value,
+                                     const ValueType& new_value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // aliases
+  using func_t =
+      StdReplaceCopyFunctor<InputIteratorType, OutputIteratorType, ValueType>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(first_from, first_dest, old_value, new_value));
+  ex.fence("Kokkos::replace_copy: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+// ------------------------------------------
+// replace_copy_if_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class PredicateType, class ValueType>
+OutputIteratorType replace_copy_if_impl(const std::string& label,
+                                        const ExecutionSpace& ex,
+                                        InputIteratorType first_from,
+                                        InputIteratorType last_from,
+                                        OutputIteratorType first_dest,
+                                        PredicateType pred,
+                                        const ValueType& new_value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // aliases
+  using index_type = typename InputIteratorType::difference_type;
+  using func_t =
+      StdReplaceIfCopyFunctor<index_type, InputIteratorType, OutputIteratorType,
+                              PredicateType, ValueType>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_t(first_from, first_dest, std::move(pred), new_value));
+  ex.fence("Kokkos::replace_copy_if: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+}  // namespace Impl
+
+// -------------------
+// replace copy
+// -------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class ValueType>
+OutputIterator replace_copy(const ExecutionSpace& ex, InputIterator first_from,
+                            InputIterator last_from, OutputIterator first_dest,
+                            const ValueType& old_value,
+                            const ValueType& new_value) {
+  return Impl::replace_copy_impl("Kokkos::replace_copy_iterator_api", ex,
+                                 first_from, last_from, first_dest, old_value,
+                                 new_value);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class ValueType>
+OutputIterator replace_copy(const std::string& label, const ExecutionSpace& ex,
+                            InputIterator first_from, InputIterator last_from,
+                            OutputIterator first_dest,
+                            const ValueType& old_value,
+                            const ValueType& new_value) {
+  return Impl::replace_copy_impl(label, ex, first_from, last_from, first_dest,
+                                 old_value, new_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType>
+auto replace_copy(const ExecutionSpace& ex,
+                  const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                  const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                  const ValueType& old_value, const ValueType& new_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::replace_copy_impl("Kokkos::replace_copy_view_api", ex,
+                                 KE::cbegin(view_from), KE::cend(view_from),
+                                 KE::begin(view_dest), old_value, new_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType>
+auto replace_copy(const std::string& label, const ExecutionSpace& ex,
+                  const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                  const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                  const ValueType& old_value, const ValueType& new_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::replace_copy_impl(label, ex, KE::cbegin(view_from),
+                                 KE::cend(view_from), KE::begin(view_dest),
+                                 old_value, new_value);
+}
+
+// -------------------
+// replace_copy_if
+// -------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class PredicateType, class ValueType>
+OutputIterator replace_copy_if(const ExecutionSpace& ex,
+                               InputIterator first_from,
+                               InputIterator last_from,
+                               OutputIterator first_dest, PredicateType pred,
+                               const ValueType& new_value) {
+  return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_iterator_api", ex,
+                                    first_from, last_from, first_dest, pred,
+                                    new_value);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class PredicateType, class ValueType>
+OutputIterator replace_copy_if(const std::string& label,
+                               const ExecutionSpace& ex,
+                               InputIterator first_from,
+                               InputIterator last_from,
+                               OutputIterator first_dest, PredicateType pred,
+                               const ValueType& new_value) {
+  return Impl::replace_copy_if_impl(label, ex, first_from, last_from,
+                                    first_dest, pred, new_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class PredicateType,
+          class ValueType>
+auto replace_copy_if(const ExecutionSpace& ex,
+                     const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                     const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                     PredicateType pred, const ValueType& new_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_view_api", ex,
+                                    KE::cbegin(view_from), KE::cend(view_from),
+                                    KE::begin(view_dest), pred, new_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class PredicateType,
+          class ValueType>
+auto replace_copy_if(const std::string& label, const ExecutionSpace& ex,
+                     const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                     const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                     PredicateType pred, const ValueType& new_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::replace_copy_if_impl(label, ex, KE::cbegin(view_from),
+                                    KE::cend(view_from), KE::begin(view_dest),
+                                    pred, new_value);
+}
+
+// -------------------
+// replace
+// -------------------
+template <class ExecutionSpace, class Iterator, class ValueType>
+void replace(const ExecutionSpace& ex, Iterator first, Iterator last,
+             const ValueType& old_value, const ValueType& new_value) {
+  return Impl::replace_impl("Kokkos::replace_iterator_api", ex, first, last,
+                            old_value, new_value);
+}
+
+template <class ExecutionSpace, class Iterator, class ValueType>
+void replace(const std::string& label, const ExecutionSpace& ex, Iterator first,
+             Iterator last, const ValueType& old_value,
+             const ValueType& new_value) {
+  return Impl::replace_impl(label, ex, first, last, old_value, new_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class ValueType>
+void replace(const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType1, Properties1...>& view,
+             const ValueType& old_value, const ValueType& new_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::replace_impl("Kokkos::replace_view_api", ex, KE::begin(view),
+                            KE::end(view), old_value, new_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class ValueType>
+void replace(const std::string& label, const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType1, Properties1...>& view,
+             const ValueType& old_value, const ValueType& new_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::replace_impl(label, ex, KE::begin(view), KE::end(view),
+                            old_value, new_value);
+}
+
+// -------------------
+// replace_if
+// -------------------
+template <class ExecutionSpace, class InputIterator, class Predicate,
+          class ValueType>
+void replace_if(const ExecutionSpace& ex, InputIterator first,
+                InputIterator last, Predicate pred,
+                const ValueType& new_value) {
+  return Impl::replace_if_impl("Kokkos::replace_if_iterator_api", ex, first,
+                               last, pred, new_value);
+}
+
+template <class ExecutionSpace, class InputIterator, class Predicate,
+          class ValueType>
+void replace_if(const std::string& label, const ExecutionSpace& ex,
+                InputIterator first, InputIterator last, Predicate pred,
+                const ValueType& new_value) {
+  return Impl::replace_if_impl(label, ex, first, last, pred, new_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class Predicate, class ValueType>
+void replace_if(const ExecutionSpace& ex,
+                const ::Kokkos::View<DataType1, Properties1...>& view,
+                Predicate pred, const ValueType& new_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::replace_if_impl("Kokkos::replace_if_view_api", ex,
+                               KE::begin(view), KE::end(view), pred, new_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class Predicate, class ValueType>
+void replace_if(const std::string& label, const ExecutionSpace& ex,
+                const ::Kokkos::View<DataType1, Properties1...>& view,
+                Predicate pred, const ValueType& new_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::replace_if_impl(label, ex, KE::begin(view), KE::end(view), pred,
+                               new_value);
+}
+
+// -------------------
+// copy
+// -------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator copy(const ExecutionSpace& ex, InputIterator first,
+                    InputIterator last, OutputIterator d_first) {
+  return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last,
+                         d_first);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator copy(const std::string& label, const ExecutionSpace& ex,
+                    InputIterator first, InputIterator last,
+                    OutputIterator d_first) {
+  return Impl::copy_impl(label, ex, first, last, d_first);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto copy(const ExecutionSpace& ex,
+          const ::Kokkos::View<DataType1, Properties1...>& source,
+          ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::copy_impl("Kokkos::copy_view_api_default", ex,
+                         KE::cbegin(source), KE::cend(source), KE::begin(dest));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto copy(const std::string& label, const ExecutionSpace& ex,
+          const ::Kokkos::View<DataType1, Properties1...>& source,
+          ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source),
+                         KE::begin(dest));
+}
+
+// -------------------
+// copy_n
+// -------------------
+template <class ExecutionSpace, class InputIterator, class Size,
+          class OutputIterator>
+OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count,
+                      OutputIterator result) {
+  return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first,
+                           count, result);
+}
+
+template <class ExecutionSpace, class InputIterator, class Size,
+          class OutputIterator>
+OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex,
+                      InputIterator first, Size count, OutputIterator result) {
+  return Impl::copy_n_impl(label, ex, first, count, result);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class Size, class DataType2, class... Properties2>
+auto copy_n(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
+            ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex,
+                           KE::cbegin(source), count, KE::begin(dest));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class Size, class DataType2, class... Properties2>
+auto copy_n(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
+            ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::copy_n_impl(label, ex, KE::cbegin(source), count,
+                           KE::begin(dest));
+}
+
+// -------------------
+// copy_backward
+// -------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first,
+                            IteratorType1 last, IteratorType2 d_last) {
+  return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default",
+                                  ex, first, last, d_last);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex,
+                            IteratorType1 first, IteratorType1 last,
+                            IteratorType2 d_last) {
+  return Impl::copy_backward_impl(label, ex, first, last, d_last);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto copy_backward(const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType1, Properties1...>& source,
+                   ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex,
+                                  cbegin(source), cend(source), end(dest));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto copy_backward(const std::string& label, const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType1, Properties1...>& source,
+                   ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source),
+                                  end(dest));
+}
+
+// -------------------
+// copy_if
+// -------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class Predicate>
+OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first,
+                       InputIterator last, OutputIterator d_first,
+                       Predicate pred) {
+  return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first,
+                            last, d_first, std::move(pred));
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class Predicate>
+OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex,
+                       InputIterator first, InputIterator last,
+                       OutputIterator d_first, Predicate pred) {
+  return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class Predicate>
+auto copy_if(const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType1, Properties1...>& source,
+             ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex,
+                            cbegin(source), cend(source), begin(dest),
+                            std::move(pred));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class Predicate>
+auto copy_if(const std::string& label, const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType1, Properties1...>& source,
+             ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::copy_if_impl(label, ex, cbegin(source), cend(source),
+                            begin(dest), std::move(pred));
+}
+
+// -------------------
+// fill
+// -------------------
+template <class ExecutionSpace, class IteratorType, class T>
+void fill(const ExecutionSpace& ex, IteratorType first, IteratorType last,
+          const T& value) {
+  Impl::fill_impl("Kokkos::fill_iterator_api_default", ex, first, last, value);
+}
+
+template <class ExecutionSpace, class IteratorType, class T>
+void fill(const std::string& label, const ExecutionSpace& ex,
+          IteratorType first, IteratorType last, const T& value) {
+  Impl::fill_impl(label, ex, first, last, value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties, class T>
+void fill(const ExecutionSpace& ex,
+          const ::Kokkos::View<DataType, Properties...>& view, const T& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  Impl::fill_impl("Kokkos::fill_view_api_default", ex, begin(view), end(view),
+                  value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties, class T>
+void fill(const std::string& label, const ExecutionSpace& ex,
+          const ::Kokkos::View<DataType, Properties...>& view, const T& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  Impl::fill_impl(label, ex, begin(view), end(view), value);
+}
+
+// -------------------
+// fill_n
+// -------------------
+template <class ExecutionSpace, class IteratorType, class SizeType, class T>
+IteratorType fill_n(const ExecutionSpace& ex, IteratorType first, SizeType n,
+                    const T& value) {
+  return Impl::fill_n_impl("Kokkos::fill_n_iterator_api_default", ex, first, n,
+                           value);
+}
+
+template <class ExecutionSpace, class IteratorType, class SizeType, class T>
+IteratorType fill_n(const std::string& label, const ExecutionSpace& ex,
+                    IteratorType first, SizeType n, const T& value) {
+  return Impl::fill_n_impl(label, ex, first, n, value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class SizeType, class T>
+auto fill_n(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view, SizeType n,
+            const T& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::fill_n_impl("Kokkos::fill_n_view_api_default", ex, begin(view),
+                           n, value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class SizeType, class T>
+auto fill_n(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view, SizeType n,
+            const T& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::fill_n_impl(label, ex, begin(view), n, value);
+}
+
+// -------------------
+// transform
+// -------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class UnaryOperation>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIterator, OutputIterator>::value,
+                  OutputIterator>
+transform(const ExecutionSpace& ex, InputIterator first1, InputIterator last1,
+          OutputIterator d_first, UnaryOperation unary_op) {
+  return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex,
+                              first1, last1, d_first, std::move(unary_op));
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class UnaryOperation>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIterator, OutputIterator>::value,
+                  OutputIterator>
+transform(const std::string& label, const ExecutionSpace& ex,
+          InputIterator first1, InputIterator last1, OutputIterator d_first,
+          UnaryOperation unary_op) {
+  return Impl::transform_impl(label, ex, first1, last1, d_first,
+                              std::move(unary_op));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class UnaryOperation>
+auto transform(const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType1, Properties1...>& source,
+               ::Kokkos::View<DataType2, Properties2...>& dest,
+               UnaryOperation unary_op) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::transform_impl("Kokkos::transform_view_api_default", ex,
+                              begin(source), end(source), begin(dest),
+                              std::move(unary_op));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class UnaryOperation>
+auto transform(const std::string& label, const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType1, Properties1...>& source,
+               ::Kokkos::View<DataType2, Properties2...>& dest,
+               UnaryOperation unary_op) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::transform_impl(label, ex, begin(source), end(source),
+                              begin(dest), std::move(unary_op));
+}
+
+template <class ExecutionSpace, class InputIterator1, class InputIterator2,
+          class OutputIterator, class BinaryOperation>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIterator1, InputIterator2, OutputIterator>::value,
+                  OutputIterator>
+transform(const ExecutionSpace& ex, InputIterator1 first1, InputIterator1 last1,
+          InputIterator2 first2, OutputIterator d_first,
+          BinaryOperation binary_op) {
+  return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex,
+                              first1, last1, first2, d_first,
+                              std::move(binary_op));
+}
+
+template <class ExecutionSpace, class InputIterator1, class InputIterator2,
+          class OutputIterator, class BinaryOperation>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIterator1, InputIterator2, OutputIterator>::value,
+                  OutputIterator>
+transform(const std::string& label, const ExecutionSpace& ex,
+          InputIterator1 first1, InputIterator1 last1, InputIterator2 first2,
+          OutputIterator d_first, BinaryOperation binary_op) {
+  return Impl::transform_impl(label, ex, first1, last1, first2, d_first,
+                              std::move(binary_op));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class DataType3,
+          class... Properties3, class BinaryOperation>
+auto transform(const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType1, Properties1...>& source1,
+               const ::Kokkos::View<DataType2, Properties2...>& source2,
+               ::Kokkos::View<DataType3, Properties3...>& dest,
+               BinaryOperation binary_op) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::transform_impl("Kokkos::transform_view_api_default", ex,
+                              begin(source1), end(source1), begin(source2),
+                              begin(dest), std::move(binary_op));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class DataType3,
+          class... Properties3, class BinaryOperation>
+auto transform(const std::string& label, const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType1, Properties1...>& source1,
+               const ::Kokkos::View<DataType2, Properties2...>& source2,
+               ::Kokkos::View<DataType3, Properties3...>& dest,
+               BinaryOperation binary_op) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::transform_impl(label, ex, begin(source1), end(source1),
+                              begin(source2), begin(dest),
+                              std::move(binary_op));
+}
+
+// -------------------
+// generate
+// -------------------
+template <class ExecutionSpace, class IteratorType, class Generator>
+void generate(const ExecutionSpace& ex, IteratorType first, IteratorType last,
+              Generator g) {
+  Impl::generate_impl("Kokkos::generate_iterator_api_default", ex, first, last,
+                      std::move(g));
+}
+
+template <class ExecutionSpace, class IteratorType, class Generator>
+void generate(const std::string& label, const ExecutionSpace& ex,
+              IteratorType first, IteratorType last, Generator g) {
+  Impl::generate_impl(label, ex, first, last, std::move(g));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Generator>
+void generate(const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType, Properties...>& view,
+              Generator g) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  Impl::generate_impl("Kokkos::generate_view_api_default", ex, begin(view),
+                      end(view), std::move(g));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class Generator>
+void generate(const std::string& label, const ExecutionSpace& ex,
+              const ::Kokkos::View<DataType, Properties...>& view,
+              Generator g) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  Impl::generate_impl(label, ex, begin(view), end(view), std::move(g));
+}
+
+// -------------------
+// generate_n
+// -------------------
+template <class ExecutionSpace, class IteratorType, class Size, class Generator>
+IteratorType generate_n(const ExecutionSpace& ex, IteratorType first,
+                        Size count, Generator g) {
+  Impl::generate_n_impl("Kokkos::generate_n_iterator_api_default", ex, first,
+                        count, std::move(g));
+  return first + count;
+}
+
+template <class ExecutionSpace, class IteratorType, class Size, class Generator>
+IteratorType generate_n(const std::string& label, const ExecutionSpace& ex,
+                        IteratorType first, Size count, Generator g) {
+  Impl::generate_n_impl(label, ex, first, count, std::move(g));
+  return first + count;
+}
+
+template <class ExecutionSpace, class DataType, class... Properties, class Size,
+          class Generator>
+auto generate_n(const ExecutionSpace& ex,
+                const ::Kokkos::View<DataType, Properties...>& view, Size count,
+                Generator g) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::generate_n_impl("Kokkos::generate_n_view_api_default", ex,
+                               begin(view), count, std::move(g));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties, class Size,
+          class Generator>
+auto generate_n(const std::string& label, const ExecutionSpace& ex,
+                const ::Kokkos::View<DataType, Properties...>& view, Size count,
+                Generator g) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::generate_n_impl(label, ex, begin(view), count, std::move(g));
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet2.hpp b/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet2.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9d2c85f00d38c97595da35d6833a799ebff36170
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet2.hpp
@@ -0,0 +1,1783 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_MODIFYING_SEQUENCE_OPERATIONS_SET2_HPP
+#define KOKKOS_MODIFYING_SEQUENCE_OPERATIONS_SET2_HPP
+
+#include <Kokkos_Core.hpp>
+#include "../Kokkos_BeginEnd.hpp"
+#include "../Kokkos_Constraints.hpp"
+#include "../Kokkos_ModifyingOperations.hpp"
+#include "../Kokkos_NonModifyingSequenceOperations.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+//-------------------------
+//
+// functors
+//
+//-------------------------
+
+template <class IndexType, class InputIt, class OutputIt,
+          class BinaryPredicateType>
+struct StdUniqueCopyFunctor {
+  InputIt m_first_from;
+  InputIt m_last_from;
+  OutputIt m_first_dest;
+  BinaryPredicateType m_pred;
+
+  KOKKOS_FUNCTION
+  StdUniqueCopyFunctor(InputIt first_from, InputIt last_from,
+                       OutputIt first_dest, BinaryPredicateType pred)
+      : m_first_from(std::move(first_from)),
+        m_last_from(std::move(last_from)),
+        m_first_dest(std::move(first_dest)),
+        m_pred(std::move(pred)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, IndexType& update,
+                  const bool final_pass) const {
+    const auto& val_i   = m_first_from[i];
+    const auto& val_ip1 = m_first_from[i + 1];
+
+    if (final_pass) {
+      if (!m_pred(val_i, val_ip1)) {
+        m_first_dest[update] = val_i;
+      }
+    }
+
+    if (!m_pred(val_i, val_ip1)) {
+      update += 1;
+    }
+  }
+};
+
+template <class InputIterator>
+struct StdReverseFunctor {
+  using index_type = typename InputIterator::difference_type;
+  static_assert(std::is_signed<index_type>::value,
+                "Kokkos: StdReverseFunctor requires signed index type");
+
+  InputIterator m_first;
+  InputIterator m_last;
+
+  KOKKOS_FUNCTION
+  void operator()(index_type i) const {
+    // the swap below is doing the same thing, but
+    // for Intel 18.0.5 does not work.
+    // But putting the impl directly here, it works.
+#ifdef KOKKOS_COMPILER_INTEL
+    typename InputIterator::value_type tmp = std::move(m_first[i]);
+    m_first[i]                             = std::move(m_last[-i - 1]);
+    m_last[-i - 1]                         = std::move(tmp);
+#else
+    ::Kokkos::Experimental::swap(m_first[i], m_last[-i - 1]);
+#endif
+  }
+
+  StdReverseFunctor(InputIterator first, InputIterator last)
+      : m_first(std::move(first)), m_last(std::move(last)) {}
+};
+
+template <class IndexType, class InputIterator, class OutputIterator>
+struct StdReverseCopyFunctor {
+  static_assert(std::is_signed<IndexType>::value,
+                "Kokkos: StdReverseCopyFunctor requires signed index type");
+
+  InputIterator m_last;
+  OutputIterator m_dest_first;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const { m_dest_first[i] = m_last[-1 - i]; }
+
+  StdReverseCopyFunctor(InputIterator _last, OutputIterator _dest_first)
+      : m_last(std::move(_last)), m_dest_first(std::move(_dest_first)) {}
+};
+
+template <class IndexType, class InputIterator, class OutputIterator>
+struct StdMoveFunctor {
+  InputIterator m_first;
+  OutputIterator m_dest_first;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const {
+    m_dest_first[i] = std::move(m_first[i]);
+  }
+
+  StdMoveFunctor(InputIterator _first, OutputIterator _dest_first)
+      : m_first(std::move(_first)), m_dest_first(std::move(_dest_first)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2>
+struct StdMoveBackwardFunctor {
+  static_assert(std::is_signed<IndexType>::value,
+                "Kokkos: StdMoveBackwardFunctor requires signed index type");
+
+  IteratorType1 m_last;
+  IteratorType2 m_dest_last;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const {
+    m_dest_last[-i] = std::move(m_last[-i]);
+  }
+
+  StdMoveBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last)
+      : m_last(std::move(_last)), m_dest_last(std::move(_dest_last)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2>
+struct StdSwapRangesFunctor {
+  IteratorType1 m_first1;
+  IteratorType2 m_first2;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const {
+    // the swap below is doing the same thing, but
+    // for Intel 18.0.5 does not work.
+    // But putting the impl directly here, it works.
+#ifdef KOKKOS_COMPILER_INTEL
+    typename IteratorType1::value_type tmp = std::move(m_first1[i]);
+    m_first1[i]                            = std::move(m_first2[i]);
+    m_first2[i]                            = std::move(tmp);
+#else
+    ::Kokkos::Experimental::swap(m_first1[i], m_first2[i]);
+#endif
+  }
+
+  KOKKOS_FUNCTION
+  StdSwapRangesFunctor(IteratorType1 _first1, IteratorType2 _first2)
+      : m_first1(std::move(_first1)), m_first2(std::move(_first2)) {}
+};
+
+template <class IndexType, class InputIt, class OutputIt,
+          class BinaryPredicateType>
+struct StdUniqueFunctor {
+  InputIt m_first_from;
+  InputIt m_last_from;
+  OutputIt m_first_dest;
+  BinaryPredicateType m_pred;
+
+  KOKKOS_FUNCTION
+  StdUniqueFunctor(InputIt first_from, InputIt last_from, OutputIt first_dest,
+                   BinaryPredicateType pred)
+      : m_first_from(std::move(first_from)),
+        m_last_from(std::move(last_from)),
+        m_first_dest(std::move(first_dest)),
+        m_pred(std::move(pred)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, IndexType& update,
+                  const bool final_pass) const {
+    auto& val_i         = m_first_from[i];
+    const auto& val_ip1 = m_first_from[i + 1];
+
+    if (final_pass) {
+      if (!m_pred(val_i, val_ip1)) {
+        m_first_dest[update] = std::move(val_i);
+      }
+    }
+
+    if (!m_pred(val_i, val_ip1)) {
+      update += 1;
+    }
+  }
+};
+
+template <class IndexType, class InputIterator, class OutputIterator>
+struct StdRotateCopyFunctor {
+  InputIterator m_first;
+  InputIterator m_last;
+  InputIterator m_first_n;
+  OutputIterator m_dest_first;
+
+  KOKKOS_FUNCTION
+  void operator()(IndexType i) const {
+    const IndexType shift = m_last - m_first_n;
+
+    if (i < shift) {
+      m_dest_first[i] = m_first_n[i];
+    } else {
+      m_dest_first[i] = m_first[i - shift];
+    }
+  }
+
+  StdRotateCopyFunctor(InputIterator first, InputIterator last,
+                       InputIterator first_n, OutputIterator dest_first)
+      : m_first(std::move(first)),
+        m_last(std::move(last)),
+        m_first_n(std::move(first_n)),
+        m_dest_first(std::move(dest_first)) {}
+};
+
+template <class IndexType, class FirstFrom, class FirstDest, class PredType>
+struct StdRemoveIfStage1Functor {
+  FirstFrom m_first_from;
+  FirstDest m_first_dest;
+  PredType m_must_remove;
+
+  KOKKOS_FUNCTION
+  StdRemoveIfStage1Functor(FirstFrom first_from, FirstDest first_dest,
+                           PredType pred)
+      : m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)),
+        m_must_remove(std::move(pred)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, IndexType& update,
+                  const bool final_pass) const {
+    auto& myval = m_first_from[i];
+    if (final_pass) {
+      if (!m_must_remove(myval)) {
+        // calling move here is ok because we are inside final pass
+        // we are calling move assign as specified by the std
+        m_first_dest[update] = std::move(myval);
+      }
+    }
+
+    if (!m_must_remove(myval)) {
+      update += 1;
+    }
+  }
+};
+
+template <class IndexType, class InputIteratorType, class OutputIteratorType>
+struct StdRemoveIfStage2Functor {
+  InputIteratorType m_first_from;
+  OutputIteratorType m_first_to;
+
+  KOKKOS_FUNCTION
+  StdRemoveIfStage2Functor(InputIteratorType first_from,
+                           OutputIteratorType first_to)
+      : m_first_from(std::move(first_from)), m_first_to(std::move(first_to)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i) const {
+    m_first_to[i] = std::move(m_first_from[i]);
+  }
+};
+
+// ------------------------------------------
+// unique_copy_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class PredicateType>
+OutputIterator unique_copy_impl(const std::string& label,
+                                const ExecutionSpace& ex, InputIterator first,
+                                InputIterator last, OutputIterator d_first,
+                                PredicateType pred) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, d_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, d_first);
+  Impl::expect_valid_range(first, last);
+
+  // branch for trivial vs non trivial case
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  if (num_elements == 0) {
+    return d_first;
+  } else if (num_elements == 1) {
+    return Impl::copy_impl("Kokkos::copy_from_unique_copy", ex, first, last,
+                           d_first);
+  } else {
+    // aliases
+    using index_type = typename InputIterator::difference_type;
+    using func_type  = StdUniqueCopyFunctor<index_type, InputIterator,
+                                           OutputIterator, PredicateType>;
+
+    // note here that we run scan for num_elements - 1
+    // because of the way we implement this, the last element is always needed.
+    // We avoid performing checks inside functor that we are within limits
+    // and run a "safe" scan and then copy the last element.
+    const auto scan_size = num_elements - 1;
+    index_type count     = 0;
+    ::Kokkos::parallel_scan(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, scan_size),
+                            func_type(first, last, d_first, pred), count);
+
+    return Impl::copy_impl("Kokkos::copy_from_unique_copy", ex,
+                           first + scan_size, last, d_first + count);
+  }
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator unique_copy_impl(const std::string& label,
+                                const ExecutionSpace& ex, InputIterator first,
+                                InputIterator last, OutputIterator d_first) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, d_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, d_first);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using value_type1 = typename InputIterator::value_type;
+  using value_type2 = typename OutputIterator::value_type;
+
+  // default binary predicate uses ==
+  using binary_pred_t = StdAlgoEqualBinaryPredicate<value_type1, value_type2>;
+
+  // run
+  return unique_copy_impl(label, ex, first, last, d_first, binary_pred_t());
+}
+
+// ------------------------------------------
+// reverse_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator>
+void reverse_impl(const std::string& label, const ExecutionSpace& ex,
+                  InputIterator first, InputIterator last) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using func_t = StdReverseFunctor<InputIterator>;
+
+  // run
+  if (last >= first + 2) {
+    // only need half
+    const auto num_elements = Kokkos::Experimental::distance(first, last) / 2;
+    ::Kokkos::parallel_for(label,
+                           RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                           func_t(first, last));
+    ex.fence("Kokkos::reverse: fence after operation");
+  }
+}
+
+// ------------------------------------------
+// reverse_copy_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator reverse_copy_impl(const std::string& label,
+                                 const ExecutionSpace& ex, InputIterator first,
+                                 InputIterator last, OutputIterator d_first) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, d_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, d_first);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using index_type = typename InputIterator::difference_type;
+  using func_t =
+      StdReverseCopyFunctor<index_type, InputIterator, OutputIterator>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(last, d_first));
+  ex.fence("Kokkos::reverse_copy: fence after operation");
+
+  // return
+  return d_first + num_elements;
+}
+
+// ------------------------------------------
+// move_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator move_impl(const std::string& label, const ExecutionSpace& ex,
+                         InputIterator first, InputIterator last,
+                         OutputIterator d_first) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, d_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, d_first);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using index_type = typename InputIterator::difference_type;
+  using func_t     = StdMoveFunctor<index_type, InputIterator, OutputIterator>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(first, d_first));
+  ex.fence("Kokkos::move: fence after operation");
+
+  // return
+  return d_first + num_elements;
+}
+
+// ------------------------------------------
+// move_backward_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType2 move_backward_impl(const std::string& label,
+                                 const ExecutionSpace& ex, IteratorType1 first,
+                                 IteratorType1 last, IteratorType2 d_last) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, d_last);
+  Impl::static_assert_iterators_have_matching_difference_type(first, d_last);
+  Impl::expect_valid_range(first, last);
+
+  // aliases
+  using index_type = typename IteratorType1::difference_type;
+  using func_t =
+      StdMoveBackwardFunctor<index_type, IteratorType1, IteratorType2>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_t(last, d_last));
+  ex.fence("Kokkos::move_backward: fence after operation");
+
+  // return
+  return d_last - num_elements;
+}
+
+// ------------------------------------------
+// swap_ranges_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType2 swap_ranges_impl(const std::string& label,
+                               const ExecutionSpace& ex, IteratorType1 first1,
+                               IteratorType1 last1, IteratorType2 first2) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first1, first2);
+  Impl::static_assert_iterators_have_matching_difference_type(first1, first2);
+  Impl::expect_valid_range(first1, last1);
+
+  // aliases
+  using index_type = typename IteratorType1::difference_type;
+  using func_t = StdSwapRangesFunctor<index_type, IteratorType1, IteratorType2>;
+
+  // run
+  const auto num_elements_to_swap =
+      Kokkos::Experimental::distance(first1, last1);
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_to_swap),
+      func_t(first1, first2));
+  ex.fence("Kokkos::swap_ranges: fence after operation");
+
+  // return
+  return first2 + num_elements_to_swap;
+}
+
+// ------------------------------------------
+// unique_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class PredicateType>
+IteratorType unique_impl(const std::string& label, const ExecutionSpace& ex,
+                         IteratorType first, IteratorType last,
+                         PredicateType pred) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  if (num_elements == 0) {
+    return first;
+  } else if (num_elements == 1) {
+    return last;
+  } else {
+    // ----------
+    // step 1:
+    // find first location of adjacent equal elements
+    // ----------
+    auto it_found =
+        ::Kokkos::Experimental::adjacent_find(ex, first, last, pred);
+
+    // if none, all elements are unique, so nothing to do
+    if (it_found == last) {
+      return last;
+    } else {
+      // if here, we found some equal adjacent elements,
+      // so count all preceeding unique elements
+      const auto num_unique_found_in_step_one = it_found - first;
+
+      // ----------
+      // step 2:
+      // ----------
+      // since we found some unique elements, we don't need to explore
+      // the full range [first, last), but only need to focus on the
+      // remaining range [it_found, last)
+      const auto num_elements_to_explore = last - it_found;
+
+      // create a tmp view to use to *move* all unique elements
+      // using the same algorithm used for unique_copy but we now move things
+      using value_type    = typename IteratorType::value_type;
+      using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
+      tmp_view_type tmp_view("std_unique_tmp_view", num_elements_to_explore);
+
+      // scan extent is: num_elements_to_explore - 1
+      // for same reason as the one explained in unique_copy
+      const auto scan_size = num_elements_to_explore - 1;
+      auto tmp_first       = ::Kokkos::Experimental::begin(tmp_view);
+      using output_it      = decltype(tmp_first);
+
+      using index_type = typename IteratorType::difference_type;
+      using func_type =
+          StdUniqueFunctor<index_type, IteratorType, output_it, PredicateType>;
+      index_type count = 0;
+      ::Kokkos::parallel_scan(
+          label, RangePolicy<ExecutionSpace>(ex, 0, scan_size),
+          func_type(it_found, last, tmp_first, pred), count);
+
+      // move last element too, for the same reason as the unique_copy
+      auto unused_r =
+          Impl::move_impl("Kokkos::move_from_unique", ex, it_found + scan_size,
+                          last, tmp_first + count);
+      (void)unused_r;  // r1 not used
+
+      // ----------
+      // step 3
+      // ----------
+      // move back from tmp to original range,
+      // ensuring we start overwriting after the original unique found
+      using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
+      using step3_func_t =
+          StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>;
+
+      ::Kokkos::parallel_for(
+          "unique_step3_parfor",
+          RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)),
+          step3_func_t(begin(tmp_view),
+                       (first + num_unique_found_in_step_one)));
+
+      ex.fence("Kokkos::unique: fence after operation");
+
+      // return iterator to one passed the last written
+      // (the +1 is needed to account for the last element, see above)
+      return (first + num_unique_found_in_step_one + count + 1);
+    }
+  }
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType unique_impl(const std::string& label, const ExecutionSpace& ex,
+                         IteratorType first, IteratorType last) {
+  using value_type    = typename IteratorType::value_type;
+  using binary_pred_t = StdAlgoEqualBinaryPredicate<value_type>;
+  return unique_impl(label, ex, first, last, binary_pred_t());
+}
+
+// ------------------------------------------
+// rotate_copy_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator rotate_copy_impl(const std::string& label,
+                                const ExecutionSpace& ex, InputIterator first,
+                                InputIterator n_first, InputIterator last,
+                                OutputIterator d_first) {
+  /*
+    algorithm is implemented as follows:
+
+    first 	   n_first		last
+    |		      |                  |
+    o  o  o  o  o  o  o  o  o  o  o  o
+
+    dest+0 -> first_n
+    dest+1 -> first_n+1
+    dest+2 -> first_n+2
+    dest+3 -> first
+    dest+4 -> first+1
+    dest+5 -> first+2
+    dest+6 -> first+3
+    dest+7 -> first+4
+    dest+8 -> first+5
+    ...
+    let shift = last - first_n;
+
+    then we have:
+    if (i < shift){
+      *(dest_first + i) = *(first_n + i);
+    }
+    else{
+      *(dest_first + i) = *(from + i - shift);
+    }
+  */
+
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first, d_first);
+  Impl::static_assert_iterators_have_matching_difference_type(first, d_first);
+  Impl::expect_valid_range(first, last);
+  Impl::expect_valid_range(first, n_first);
+  Impl::expect_valid_range(n_first, last);
+
+  if (first == last) {
+    return d_first;
+  }
+
+  // aliases
+  using index_type = typename InputIterator::difference_type;
+  using func_type =
+      StdRotateCopyFunctor<index_type, InputIterator, OutputIterator>;
+
+  // run
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         func_type(first, last, n_first, d_first));
+
+  ex.fence("Kokkos::rotate_copy: fence after operation");
+
+  // return
+  return d_first + num_elements;
+}
+
+// ------------------------------------------
+// rotate_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType>
+IteratorType rotate_with_pivot_in_left_half(const std::string& label,
+                                            const ExecutionSpace& ex,
+                                            IteratorType first,
+                                            IteratorType n_first,
+                                            IteratorType last) {
+  /*
+    This impl is specific for when the n_first iterator points to
+    an element that is before or equal to the middle of the range.
+
+    If we have:
+
+    | 0 | 1 | 2 | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | -6 | -5 | 8 | 9 | 11 | *
+      ^           ^              mid					   ^
+    first       n_first							  last
+
+    In step 1, we create a temporary view with extent = distance(n_first, last)
+    and *move* the elements from [n_first, last) to tmp view, such that
+    tmp view becomes:
+
+    | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | -6 | -5 | 8 | 9 | 11 |
+
+    In step 2, we move the elements in [first, n_first)
+    to the new position where they are supposed to end up.
+
+    In step 3, we move the elements from the tmp view to
+    the range starting at first.
+   */
+
+  namespace KE                     = ::Kokkos::Experimental;
+  const auto num_elements_on_left  = KE::distance(first, n_first);
+  const auto num_elements_on_right = KE::distance(n_first, last);
+
+  // create helper tmp view
+  using value_type    = typename IteratorType::value_type;
+  using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
+  tmp_view_type tmp_view("rotate_impl_for_pivot_in_left_half_impl",
+                         num_elements_on_right);
+  using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
+
+  // index_type is the same and needed in all steps
+  using index_type = typename IteratorType::difference_type;
+
+  // stage 1
+  using step1_func_type =
+      StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>;
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_right),
+      step1_func_type(n_first, begin(tmp_view)));
+
+  // stage 2
+  using step2_func_type =
+      StdMoveFunctor<index_type, IteratorType, IteratorType>;
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_left),
+      step2_func_type(first, first + num_elements_on_right));
+
+  // step 3
+  using step3_func_type =
+      StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>;
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)),
+                         step3_func_type(begin(tmp_view), first));
+
+  ex.fence("Kokkos::rotate: fence after operation");
+  return first + (last - n_first);
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType rotate_with_pivot_in_right_half(const std::string& label,
+                                             const ExecutionSpace& ex,
+                                             IteratorType first,
+                                             IteratorType n_first,
+                                             IteratorType last) {
+  /*
+    This impl is specific for when the n_first iterator points to
+    an element that is after the middle of the range.
+
+    If we have:
+
+    | 0 | 1 | 2 | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | -6 | -5 | 8 | 9 | 11 | *
+      ^                          mid            ^                          ^
+    first                                    n_first			  last
+
+    In step 1, we create a temporary view with extent = distance(first, n_first)
+    and *move* the elements from [first, n_first) to tmp view,
+    such that tmp view becomes:
+
+    | 0 | 1 | 2 | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 |
+
+    In step 2, we move the elements in [n_first, last)
+    to the beginning where they are supposed to end up.
+
+    In step 3, we move the elements from the tmp view to
+    the range starting at first.
+   */
+
+  namespace KE                     = ::Kokkos::Experimental;
+  const auto num_elements_on_left  = KE::distance(first, n_first);
+  const auto num_elements_on_right = KE::distance(n_first, last);
+
+  // create helper tmp view
+  using value_type    = typename IteratorType::value_type;
+  using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
+  tmp_view_type tmp_view("rotate_impl_for_pivot_in_left_half_impl",
+                         num_elements_on_left);
+  using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
+
+  // index_type is the same and needed in all steps
+  using index_type = typename IteratorType::difference_type;
+
+  // stage 1
+  using step1_func_type =
+      StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>;
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_left),
+      step1_func_type(first, begin(tmp_view)));
+
+  // stage 2
+  using step2_func_type =
+      StdMoveFunctor<index_type, IteratorType, IteratorType>;
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_right),
+      step2_func_type(n_first, first));
+
+  // step 3:
+  using step3_func_type =
+      StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>;
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)),
+      step3_func_type(begin(tmp_view), first + num_elements_on_right));
+
+  ex.fence("Kokkos::rotate: fence after operation");
+  return first + (last - n_first);
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType rotate_impl(const std::string& label, const ExecutionSpace& ex,
+                         IteratorType first, IteratorType n_first,
+                         IteratorType last) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+  Impl::expect_valid_range(first, n_first);
+  Impl::expect_valid_range(n_first, last);
+
+  namespace KE                     = ::Kokkos::Experimental;
+  const auto num_elements          = KE::distance(first, last);
+  const auto n_distance_from_first = KE::distance(first, n_first);
+  if (n_distance_from_first <= num_elements / 2) {
+    return rotate_with_pivot_in_left_half(label, ex, first, n_first, last);
+  } else {
+    return rotate_with_pivot_in_right_half(label, ex, first, n_first, last);
+  }
+}
+
+// ------------------------------------------
+// remove_if_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class UnaryPredicateType>
+IteratorType remove_if_impl(const std::string& label, const ExecutionSpace& ex,
+                            IteratorType first, IteratorType last,
+                            UnaryPredicateType pred) {
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+
+  if (first == last) {
+    return last;
+  } else {
+    // create tmp buffer to use to *move* all elements that we need to keep.
+    // note that the tmp buffer is just large enought to store
+    // all elements to keep, because ideally we do not need/want one
+    // as large as the original range.
+    // To allocate the right tmp view, we need a call to count_if.
+    // We could just do a "safe" allocation of a buffer as
+    // large as (last-first), but I think a call to count_if is more afforable.
+
+    // count how many elements we need to keep
+    // note that the elements to remove are those that meet the predicate
+    const auto remove_count =
+        ::Kokkos::Experimental::count_if(ex, first, last, pred);
+    const auto keep_count =
+        Kokkos::Experimental::distance(first, last) - remove_count;
+
+    // create helper tmp view
+    using value_type    = typename IteratorType::value_type;
+    using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
+    tmp_view_type tmp_view("std_remove_if_tmp_view", keep_count);
+    using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
+
+    // in stage 1, *move* all elements to keep from original range to tmp
+    // we use similar impl as copy_if except that we *move* rather than copy
+    using index_type = typename IteratorType::difference_type;
+    using func1_type = StdRemoveIfStage1Functor<index_type, IteratorType,
+                                                tmp_readwrite_iterator_type,
+                                                UnaryPredicateType>;
+
+    const auto scan_num_elements = Kokkos::Experimental::distance(first, last);
+    index_type scan_count        = 0;
+    ::Kokkos::parallel_scan(
+        label, RangePolicy<ExecutionSpace>(ex, 0, scan_num_elements),
+        func1_type(first, begin(tmp_view), pred), scan_count);
+
+    // scan_count should be equal to keep_count
+    assert(scan_count == keep_count);
+    (void)scan_count;  // to avoid unused complaints
+
+    // stage 2, we do parfor to move from tmp to original range
+    using func2_type =
+        StdRemoveIfStage2Functor<index_type, tmp_readwrite_iterator_type,
+                                 IteratorType>;
+    ::Kokkos::parallel_for(
+        "remove_if_stage2_parfor",
+        RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)),
+        func2_type(begin(tmp_view), first));
+    ex.fence("Kokkos::remove_if: fence after stage2");
+
+    // return
+    return first + keep_count;
+  }
+}
+
+// ------------------------------------------
+// remove_impl
+// ------------------------------------------
+template <class ExecutionSpace, class IteratorType, class ValueType>
+auto remove_impl(const std::string& label, const ExecutionSpace& ex,
+                 IteratorType first, IteratorType last,
+                 const ValueType& value) {
+  using predicate_type = StdAlgoEqualsValUnaryPredicate<ValueType>;
+  return remove_if_impl(label, ex, first, last, predicate_type(value));
+}
+
+// ------------------------------------------
+// remove_copy_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType>
+auto remove_copy_impl(const std::string& label, const ExecutionSpace& ex,
+                      InputIteratorType first_from, InputIteratorType last_from,
+                      OutputIteratorType first_dest, const ValueType& value) {
+  // this is like copy_if except that we need to *ignore* the elements
+  // that match the value, so we can solve this as follows:
+
+  using predicate_type = StdAlgoNotEqualsValUnaryPredicate<ValueType>;
+  return ::Kokkos::Experimental::copy_if(label, ex, first_from, last_from,
+                                         first_dest, predicate_type(value));
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class UnaryPredicate>
+auto remove_copy_if_impl(const std::string& label, const ExecutionSpace& ex,
+                         InputIteratorType first_from,
+                         InputIteratorType last_from,
+                         OutputIteratorType first_dest,
+                         const UnaryPredicate& pred) {
+  // this is like copy_if except that we need to *ignore* the elements
+  // satisfying the pred, so we can solve this as follows:
+
+  using value_type = typename InputIteratorType::value_type;
+  using pred_wrapper_type =
+      StdAlgoNegateUnaryPredicateWrapper<value_type, UnaryPredicate>;
+  return ::Kokkos::Experimental::copy_if(label, ex, first_from, last_from,
+                                         first_dest, pred_wrapper_type(pred));
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType shift_left_impl(const std::string& label, const ExecutionSpace& ex,
+                             IteratorType first, IteratorType last,
+                             typename IteratorType::difference_type n) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+  KOKKOS_EXPECTS(n >= 0);
+
+  // handle trivial cases
+  if (n == 0) {
+    return last;
+  }
+
+  if (n >= Kokkos::Experimental::distance(first, last)) {
+    return first;
+  }
+
+  /*
+    Suppose that n = 5, and our [first,last) spans:
+
+    | 0  | 1  |  2 | 1  | 2  | 1  | 2  | 2  | 10 | -3 | 1  | -6 | *
+      ^                         				  ^
+    first							 last
+
+    shift_left modifies the range such that we have this data:
+    | 1  | 2  | 2  | 10  | -3 | 1  | -6 | x | x  | x  | x  |  x | *
+                                          ^
+                                   return it pointing here
+
+
+    and returns an iterator pointing to one past the new end.
+    Note: elements marked x are in undefined state because have been moved.
+
+    We implement this in two steps:
+    step 1:
+      we create a temporary view with extent = distance(first+n, last)
+      and *move* assign the elements from [first+n, last) to tmp view, such that
+      tmp view becomes:
+
+      | 1  | 2  | 2  | 10  | -3 | 1  | -6 |
+
+    step 2:
+      move elements of tmp view back to range starting at first.
+   */
+
+  const auto num_elements_to_move =
+      ::Kokkos::Experimental::distance(first + n, last);
+
+  // create tmp view
+  using value_type    = typename IteratorType::value_type;
+  using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
+  tmp_view_type tmp_view("shift_left_impl", num_elements_to_move);
+  using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
+
+  using index_type = typename IteratorType::difference_type;
+
+  // step 1
+  using step1_func_type =
+      StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>;
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_to_move),
+      step1_func_type(first + n, begin(tmp_view)));
+
+  // step 2
+  using step2_func_type =
+      StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>;
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)),
+                         step2_func_type(begin(tmp_view), first));
+
+  ex.fence("Kokkos::shift_left: fence after operation");
+
+  return last - n;
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType shift_right_impl(const std::string& label,
+                              const ExecutionSpace& ex, IteratorType first,
+                              IteratorType last,
+                              typename IteratorType::difference_type n) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::expect_valid_range(first, last);
+  KOKKOS_EXPECTS(n >= 0);
+
+  // handle trivial cases
+  if (n == 0) {
+    return first;
+  }
+
+  if (n >= Kokkos::Experimental::distance(first, last)) {
+    return last;
+  }
+
+  /*
+    Suppose that n = 3, and [first,last) spans:
+
+    | 0  | 1  |  2 | 1  | 2  | 1  | 2  | 2  | 10 | -3 | 1  | -6 | *
+      ^                         				  ^
+    first							 last
+
+    shift_right modifies the range such that we have this data:
+    |  x | x  | x  | 0  | 1  |  2 | 1  | 2  | 1  | 2  | 2  | 10 | *
+                     ^
+             return it points here
+
+    and returns an iterator pointing to the new beginning.
+    Note: elements marked x are in undefined state because have been moved.
+
+    We implement this in two steps:
+    step 1:
+      we create a temporary view with extent = distance(first, last-n)
+      and *move* assign the elements from [first, last-n) to tmp view, such that
+      tmp view becomes:
+
+      | 0  | 1  |  2 | 1  | 2  | 1  | 2  | 2  | 10 |
+
+    step 2:
+      move elements of tmp view back to range starting at first+n.
+   */
+
+  const auto num_elements_to_move =
+      ::Kokkos::Experimental::distance(first, last - n);
+
+  // create tmp view
+  using value_type    = typename IteratorType::value_type;
+  using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
+  tmp_view_type tmp_view("shift_right_impl", num_elements_to_move);
+  using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
+
+  using index_type = typename IteratorType::difference_type;
+
+  // step 1
+  using step1_func_type =
+      StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>;
+  ::Kokkos::parallel_for(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_to_move),
+      step1_func_type(first, begin(tmp_view)));
+
+  // step 2
+  using step2_func_type =
+      StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>;
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)),
+                         step2_func_type(begin(tmp_view), first + n));
+
+  ex.fence("Kokkos::shift_right: fence after operation");
+
+  return first + n;
+}
+
+}  // namespace Impl
+
+// -------------------
+// reverse_copy
+// -------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator reverse_copy(const ExecutionSpace& ex, InputIterator first,
+                            InputIterator last, OutputIterator d_first) {
+  return Impl::reverse_copy_impl("Kokkos::reverse_copy_iterator_api_default",
+                                 ex, first, last, d_first);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator reverse_copy(const std::string& label, const ExecutionSpace& ex,
+                            InputIterator first, InputIterator last,
+                            OutputIterator d_first) {
+  return Impl::reverse_copy_impl(label, ex, first, last, d_first);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto reverse_copy(const ExecutionSpace& ex,
+                  const ::Kokkos::View<DataType1, Properties1...>& source,
+                  ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::reverse_copy_impl("Kokkos::reverse_copy_view_api_default", ex,
+                                 cbegin(source), cend(source), begin(dest));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto reverse_copy(const std::string& label, const ExecutionSpace& ex,
+                  const ::Kokkos::View<DataType1, Properties1...>& source,
+                  ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::reverse_copy_impl(label, ex, cbegin(source), cend(source),
+                                 begin(dest));
+}
+
+// -------------------
+// reverse
+// -------------------
+template <class ExecutionSpace, class InputIterator>
+void reverse(const ExecutionSpace& ex, InputIterator first,
+             InputIterator last) {
+  return Impl::reverse_impl("Kokkos::reverse_iterator_api_default", ex, first,
+                            last);
+}
+
+template <class ExecutionSpace, class InputIterator>
+void reverse(const std::string& label, const ExecutionSpace& ex,
+             InputIterator first, InputIterator last) {
+  return Impl::reverse_impl(label, ex, first, last);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+void reverse(const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType, Properties...>& view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::reverse_impl("Kokkos::reverse_view_api_default", ex,
+                            KE::begin(view), KE::end(view));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+void reverse(const std::string& label, const ExecutionSpace& ex,
+             const ::Kokkos::View<DataType, Properties...>& view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::reverse_impl(label, ex, KE::begin(view), KE::end(view));
+}
+
+// ----------------------
+// move
+// ----------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator move(const ExecutionSpace& ex, InputIterator first,
+                    InputIterator last, OutputIterator d_first) {
+  return Impl::move_impl("Kokkos::move_iterator_api_default", ex, first, last,
+                         d_first);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator move(const std::string& label, const ExecutionSpace& ex,
+                    InputIterator first, InputIterator last,
+                    OutputIterator d_first) {
+  return Impl::move_impl(label, ex, first, last, d_first);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto move(const ExecutionSpace& ex,
+          const ::Kokkos::View<DataType1, Properties1...>& source,
+          ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::move_impl("Kokkos::move_view_api_default", ex, begin(source),
+                         end(source), begin(dest));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto move(const std::string& label, const ExecutionSpace& ex,
+          const ::Kokkos::View<DataType1, Properties1...>& source,
+          ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::move_impl(label, ex, begin(source), end(source), begin(dest));
+}
+
+// -------------------
+// move_backward
+// -------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType2 move_backward(const ExecutionSpace& ex, IteratorType1 first,
+                            IteratorType1 last, IteratorType2 d_last) {
+  return Impl::move_backward_impl("Kokkos::move_backward_iterator_api_default",
+                                  ex, first, last, d_last);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto move_backward(const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType1, Properties1...>& source,
+                   ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::move_backward_impl("Kokkos::move_backward_view_api_default", ex,
+                                  begin(source), end(source), end(dest));
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType2 move_backward(const std::string& label, const ExecutionSpace& ex,
+                            IteratorType1 first, IteratorType1 last,
+                            IteratorType2 d_last) {
+  return Impl::move_backward_impl(label, ex, first, last, d_last);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto move_backward(const std::string& label, const ExecutionSpace& ex,
+                   const ::Kokkos::View<DataType1, Properties1...>& source,
+                   ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::move_backward_impl(label, ex, begin(source), end(source),
+                                  end(dest));
+}
+
+// ----------------------
+// swap_ranges
+// ----------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType2 swap_ranges(const ExecutionSpace& ex, IteratorType1 first1,
+                          IteratorType1 last1, IteratorType2 first2) {
+  return Impl::swap_ranges_impl("Kokkos::swap_ranges_iterator_api_default", ex,
+                                first1, last1, first2);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto swap_ranges(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& source,
+                 ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  assert(source.extent(0) == dest.extent(0));
+  return Impl::swap_ranges_impl("Kokkos::swap_ranges_view_api_default", ex,
+                                begin(source), end(source), begin(dest));
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2>
+IteratorType2 swap_ranges(const std::string& label, const ExecutionSpace& ex,
+                          IteratorType1 first1, IteratorType1 last1,
+                          IteratorType2 first2) {
+  return Impl::swap_ranges_impl(label, ex, first1, last1, first2);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto swap_ranges(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& source,
+                 ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  assert(source.extent(0) == dest.extent(0));
+  return Impl::swap_ranges_impl(label, ex, begin(source), end(source),
+                                begin(dest));
+}
+
+// -------------------
+// unique
+// -------------------
+// note: the enable_if below is to avoid "call to ... is ambiguous"
+// for example in the unit test when using a variadic function
+
+// overload set1
+template <class ExecutionSpace, class IteratorType>
+std::enable_if_t<!::Kokkos::is_view<IteratorType>::value, IteratorType> unique(
+    const ExecutionSpace& ex, IteratorType first, IteratorType last) {
+  return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first,
+                           last);
+}
+
+template <class ExecutionSpace, class IteratorType>
+std::enable_if_t<!::Kokkos::is_view<IteratorType>::value, IteratorType> unique(
+    const std::string& label, const ExecutionSpace& ex, IteratorType first,
+    IteratorType last) {
+  return Impl::unique_impl(label, ex, first, last);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto unique(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return ::Kokkos::Experimental::unique("Kokkos::unique_view_api_default", ex,
+                                        begin(view), end(view));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto unique(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return ::Kokkos::Experimental::unique(label, ex, begin(view), end(view));
+}
+
+// overload set2
+template <class ExecutionSpace, class IteratorType, class BinaryPredicate>
+IteratorType unique(const ExecutionSpace& ex, IteratorType first,
+                    IteratorType last, BinaryPredicate pred) {
+  return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first,
+                           last, pred);
+}
+
+template <class ExecutionSpace, class IteratorType, class BinaryPredicate>
+IteratorType unique(const std::string& label, const ExecutionSpace& ex,
+                    IteratorType first, IteratorType last,
+                    BinaryPredicate pred) {
+  return Impl::unique_impl(label, ex, first, last, pred);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class BinaryPredicate>
+auto unique(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view,
+            BinaryPredicate pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::unique_impl("Kokkos::unique_view_api_default", ex, begin(view),
+                           end(view), std::move(pred));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class BinaryPredicate>
+auto unique(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view,
+            BinaryPredicate pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::unique_impl(label, ex, begin(view), end(view), std::move(pred));
+}
+
+// -------------------
+// unique_copy
+// -------------------
+// note: the enable_if below is to avoid "call to ... is ambiguous"
+// for example in the unit test when using a variadic function
+
+// overload set1
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+std::enable_if_t<!::Kokkos::is_view<InputIterator>::value, OutputIterator>
+unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last,
+            OutputIterator d_first) {
+  return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex,
+                                first, last, d_first);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+std::enable_if_t<!::Kokkos::is_view<InputIterator>::value, OutputIterator>
+unique_copy(const std::string& label, const ExecutionSpace& ex,
+            InputIterator first, InputIterator last, OutputIterator d_first) {
+  return Impl::unique_copy_impl(label, ex, first, last, d_first);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto unique_copy(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& source,
+                 const ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return ::Kokkos::Experimental::unique_copy(
+      "Kokkos::unique_copy_view_api_default", ex, cbegin(source), cend(source),
+      begin(dest));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto unique_copy(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& source,
+                 const ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return ::Kokkos::Experimental::unique_copy(label, ex, cbegin(source),
+                                             cend(source), begin(dest));
+}
+
+// overload set2
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class BinaryPredicate>
+OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first,
+                           InputIterator last, OutputIterator d_first,
+                           BinaryPredicate pred) {
+  return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex,
+                                first, last, d_first, pred);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class BinaryPredicate>
+OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex,
+                           InputIterator first, InputIterator last,
+                           OutputIterator d_first, BinaryPredicate pred) {
+  return Impl::unique_copy_impl(label, ex, first, last, d_first, pred);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicate>
+auto unique_copy(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& source,
+                 const ::Kokkos::View<DataType2, Properties2...>& dest,
+                 BinaryPredicate pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::unique_copy_impl("Kokkos::unique_copy_view_api_default", ex,
+                                cbegin(source), cend(source), begin(dest),
+                                std::move(pred));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryPredicate>
+auto unique_copy(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& source,
+                 const ::Kokkos::View<DataType2, Properties2...>& dest,
+                 BinaryPredicate pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::unique_copy_impl(label, ex, cbegin(source), cend(source),
+                                begin(dest), std::move(pred));
+}
+
+// -------------------
+// rotate
+// -------------------
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType rotate(const ExecutionSpace& ex, IteratorType first,
+                    IteratorType n_first, IteratorType last) {
+  return Impl::rotate_impl("Kokkos::rotate_iterator_api_default", ex, first,
+                           n_first, last);
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType rotate(const std::string& label, const ExecutionSpace& ex,
+                    IteratorType first, IteratorType n_first,
+                    IteratorType last) {
+  return Impl::rotate_impl(label, ex, first, n_first, last);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto rotate(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view,
+            std::size_t n_location) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::rotate_impl("Kokkos::rotate_view_api_default", ex, begin(view),
+                           begin(view) + n_location, end(view));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto rotate(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view,
+            std::size_t n_location) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::rotate_impl(label, ex, begin(view), begin(view) + n_location,
+                           end(view));
+}
+
+// -------------------
+// rotate_copy
+// -------------------
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator rotate_copy(const ExecutionSpace& ex, InputIterator first,
+                           InputIterator n_first, InputIterator last,
+                           OutputIterator d_first) {
+  return Impl::rotate_copy_impl("Kokkos::rotate_copy_iterator_api_default", ex,
+                                first, n_first, last, d_first);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator>
+OutputIterator rotate_copy(const std::string& label, const ExecutionSpace& ex,
+                           InputIterator first, InputIterator n_first,
+                           InputIterator last, OutputIterator d_first) {
+  return Impl::rotate_copy_impl(label, ex, first, n_first, last, d_first);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto rotate_copy(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& source,
+                 std::size_t n_location,
+                 const ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::rotate_copy_impl("Kokkos::rotate_copy_view_api_default", ex,
+                                cbegin(source), cbegin(source) + n_location,
+                                cend(source), begin(dest));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto rotate_copy(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& source,
+                 std::size_t n_location,
+                 const ::Kokkos::View<DataType2, Properties2...>& dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
+
+  return Impl::rotate_copy_impl(label, ex, cbegin(source),
+                                cbegin(source) + n_location, cend(source),
+                                begin(dest));
+}
+
+// -------------------
+// remove_if
+// -------------------
+template <class ExecutionSpace, class Iterator, class UnaryPredicate>
+Iterator remove_if(const ExecutionSpace& ex, Iterator first, Iterator last,
+                   UnaryPredicate pred) {
+  return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex,
+                              first, last, pred);
+}
+
+template <class ExecutionSpace, class Iterator, class UnaryPredicate>
+Iterator remove_if(const std::string& label, const ExecutionSpace& ex,
+                   Iterator first, Iterator last, UnaryPredicate pred) {
+  return Impl::remove_if_impl(label, ex, first, last, pred);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class UnaryPredicate>
+auto remove_if(const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType, Properties...>& view,
+               UnaryPredicate pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex,
+                              ::Kokkos::Experimental::begin(view),
+                              ::Kokkos::Experimental::end(view), pred);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class UnaryPredicate>
+auto remove_if(const std::string& label, const ExecutionSpace& ex,
+               const ::Kokkos::View<DataType, Properties...>& view,
+               UnaryPredicate pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::remove_if_impl(label, ex, ::Kokkos::Experimental::begin(view),
+                              ::Kokkos::Experimental::end(view), pred);
+}
+
+// -------------------
+// remove
+// -------------------
+template <class ExecutionSpace, class Iterator, class ValueType>
+Iterator remove(const ExecutionSpace& ex, Iterator first, Iterator last,
+                const ValueType& value) {
+  return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, first,
+                           last, value);
+}
+
+template <class ExecutionSpace, class Iterator, class ValueType>
+Iterator remove(const std::string& label, const ExecutionSpace& ex,
+                Iterator first, Iterator last, const ValueType& value) {
+  return Impl::remove_impl(label, ex, first, last, value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ValueType>
+auto remove(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view,
+            const ValueType& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex,
+                           ::Kokkos::Experimental::begin(view),
+                           ::Kokkos::Experimental::end(view), value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ValueType>
+auto remove(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view,
+            const ValueType& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::remove_impl(label, ex, ::Kokkos::Experimental::begin(view),
+                           ::Kokkos::Experimental::end(view), value);
+}
+
+// -------------------
+// remove_copy
+// -------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class ValueType>
+OutputIterator remove_copy(const ExecutionSpace& ex, InputIterator first_from,
+                           InputIterator last_from, OutputIterator first_dest,
+                           const ValueType& value) {
+  return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex,
+                                first_from, last_from, first_dest, value);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class ValueType>
+OutputIterator remove_copy(const std::string& label, const ExecutionSpace& ex,
+                           InputIterator first_from, InputIterator last_from,
+                           OutputIterator first_dest, const ValueType& value) {
+  return Impl::remove_copy_impl(label, ex, first_from, last_from, first_dest,
+                                value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType>
+auto remove_copy(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                 const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                 const ValueType& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+
+  return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex,
+                                ::Kokkos::Experimental::cbegin(view_from),
+                                ::Kokkos::Experimental::cend(view_from),
+                                ::Kokkos::Experimental::begin(view_dest),
+                                value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType>
+auto remove_copy(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                 const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                 const ValueType& value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+
+  return Impl::remove_copy_impl(
+      label, ex, ::Kokkos::Experimental::cbegin(view_from),
+      ::Kokkos::Experimental::cend(view_from),
+      ::Kokkos::Experimental::begin(view_dest), value);
+}
+
+// -------------------
+// remove_copy_if
+// -------------------
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class UnaryPredicate>
+OutputIterator remove_copy_if(const ExecutionSpace& ex,
+                              InputIterator first_from, InputIterator last_from,
+                              OutputIterator first_dest,
+                              const UnaryPredicate& pred) {
+  return Impl::remove_copy_if_impl(
+      "Kokkos::remove_copy_if_iterator_api_default", ex, first_from, last_from,
+      first_dest, pred);
+}
+
+template <class ExecutionSpace, class InputIterator, class OutputIterator,
+          class UnaryPredicate>
+OutputIterator remove_copy_if(const std::string& label,
+                              const ExecutionSpace& ex,
+                              InputIterator first_from, InputIterator last_from,
+                              OutputIterator first_dest,
+                              const UnaryPredicate& pred) {
+  return Impl::remove_copy_if_impl(label, ex, first_from, last_from, first_dest,
+                                   pred);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class UnaryPredicate>
+auto remove_copy_if(const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    const UnaryPredicate& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+
+  return Impl::remove_copy_if_impl(
+      "Kokkos::remove_copy_if_iterator_api_default", ex,
+      ::Kokkos::Experimental::cbegin(view_from),
+      ::Kokkos::Experimental::cend(view_from),
+      ::Kokkos::Experimental::begin(view_dest), pred);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class UnaryPredicate>
+auto remove_copy_if(const std::string& label, const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    const UnaryPredicate& pred) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+
+  return Impl::remove_copy_if_impl(
+      label, ex, ::Kokkos::Experimental::cbegin(view_from),
+      ::Kokkos::Experimental::cend(view_from),
+      ::Kokkos::Experimental::begin(view_dest), pred);
+}
+
+// -------------------
+// shift_left
+// -------------------
+template <class ExecutionSpace, class IteratorType>
+IteratorType shift_left(const ExecutionSpace& ex, IteratorType first,
+                        IteratorType last,
+                        typename IteratorType::difference_type n) {
+  return Impl::shift_left_impl("Kokkos::shift_left_iterator_api_default", ex,
+                               first, last, n);
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType shift_left(const std::string& label, const ExecutionSpace& ex,
+                        IteratorType first, IteratorType last,
+                        typename IteratorType::difference_type n) {
+  return Impl::shift_left_impl(label, ex, first, last, n);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto shift_left(const ExecutionSpace& ex,
+                const ::Kokkos::View<DataType, Properties...>& view,
+                typename decltype(begin(view))::difference_type n) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::shift_left_impl("Kokkos::shift_left_view_api_default", ex,
+                               begin(view), end(view), n);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto shift_left(const std::string& label, const ExecutionSpace& ex,
+                const ::Kokkos::View<DataType, Properties...>& view,
+                typename decltype(begin(view))::difference_type n) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::shift_left_impl(label, ex, begin(view), end(view), n);
+}
+
+// -------------------
+// shift_right
+// -------------------
+template <class ExecutionSpace, class IteratorType>
+IteratorType shift_right(const ExecutionSpace& ex, IteratorType first,
+                         IteratorType last,
+                         typename IteratorType::difference_type n) {
+  return Impl::shift_right_impl("Kokkos::shift_right_iterator_api_default", ex,
+                                first, last, n);
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType shift_right(const std::string& label, const ExecutionSpace& ex,
+                         IteratorType first, IteratorType last,
+                         typename IteratorType::difference_type n) {
+  return Impl::shift_right_impl(label, ex, first, last, n);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto shift_right(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& view,
+                 typename decltype(begin(view))::difference_type n) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::shift_right_impl("Kokkos::shift_right_view_api_default", ex,
+                                begin(view), end(view), n);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto shift_right(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& view,
+                 typename decltype(begin(view))::difference_type n) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+  return Impl::shift_right_impl(label, ex, begin(view), end(view), n);
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..03e5fd6aeb09975f6ced7da152c577b1a7fd3cc9
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp
@@ -0,0 +1,279 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP
+#define KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP
+
+#include <Kokkos_Core.hpp>
+#include "../Kokkos_BeginEnd.hpp"
+#include "../Kokkos_Constraints.hpp"
+#include "../Kokkos_Distance.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+// ------------------------
+//
+// functors
+//
+// ------------------------
+template <class ValueType1, class ValueType2, class RetType = ValueType2>
+struct StdAdjacentDifferenceDefaultBinaryOpFunctor {
+  KOKKOS_FUNCTION
+  constexpr RetType operator()(const ValueType1& a, const ValueType2& b) const {
+    return a - b;
+  }
+};
+
+template <class InputIteratorType, class OutputIteratorType,
+          class BinaryOperator>
+struct StdAdjacentDiffFunctor {
+  using index_type = typename InputIteratorType::difference_type;
+
+  const InputIteratorType m_first_from;
+  const OutputIteratorType m_first_dest;
+  BinaryOperator m_op;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i) const {
+    const auto& my_value = m_first_from[i];
+    if (i == 0) {
+      m_first_dest[i] = my_value;
+    } else {
+      const auto& left_value = m_first_from[i - 1];
+      m_first_dest[i]        = m_op(my_value, left_value);
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdAdjacentDiffFunctor(InputIteratorType first_from,
+                         OutputIteratorType first_dest, BinaryOperator op)
+      : m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)),
+        m_op(std::move(op)) {}
+};
+
+// ------------------------------------------
+// adjacent_difference_impl
+// ------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOp>
+OutputIteratorType adjacent_difference_impl(const std::string& label,
+                                            const ExecutionSpace& ex,
+                                            InputIteratorType first_from,
+                                            InputIteratorType last_from,
+                                            OutputIteratorType first_dest,
+                                            BinaryOp bin_op) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  if (first_from == last_from) {
+    return first_dest;
+  }
+
+  // aliases
+  using value_type    = typename OutputIteratorType::value_type;
+  using aux_view_type = ::Kokkos::View<value_type*, ExecutionSpace>;
+  using functor_t =
+      StdAdjacentDiffFunctor<InputIteratorType, OutputIteratorType, BinaryOp>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  aux_view_type aux_view("aux_view", num_elements);
+  ::Kokkos::parallel_for(label,
+                         RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                         functor_t(first_from, first_dest, bin_op));
+  ex.fence("Kokkos::adjacent_difference: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+}  // end namespace Impl
+
+// ------------------------
+//
+// public API
+//
+// ------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType>
+std::enable_if_t<!::Kokkos::is_view<InputIteratorType>::value,
+                 OutputIteratorType>
+adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from,
+                    InputIteratorType last_from,
+                    OutputIteratorType first_dest) {
+  using value_type1 = typename InputIteratorType::value_type;
+  using value_type2 = typename OutputIteratorType::value_type;
+  using binary_op =
+      Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor<value_type1,
+                                                        value_type2>;
+  return Impl::adjacent_difference_impl(
+      "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from,
+      first_dest, binary_op());
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOp>
+std::enable_if_t<!::Kokkos::is_view<InputIteratorType>::value,
+                 OutputIteratorType>
+adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from,
+                    InputIteratorType last_from, OutputIteratorType first_dest,
+                    BinaryOp bin_op) {
+  return Impl::adjacent_difference_impl(
+      "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from,
+      first_dest, bin_op);
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType>
+std::enable_if_t<!::Kokkos::is_view<InputIteratorType>::value,
+                 OutputIteratorType>
+adjacent_difference(const std::string& label, const ExecutionSpace& ex,
+                    InputIteratorType first_from, InputIteratorType last_from,
+                    OutputIteratorType first_dest) {
+  using value_type1 = typename InputIteratorType::value_type;
+  using value_type2 = typename OutputIteratorType::value_type;
+  using binary_op =
+      Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor<value_type1,
+                                                        value_type2>;
+  return Impl::adjacent_difference_impl(label, ex, first_from, last_from,
+                                        first_dest, binary_op());
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOp>
+std::enable_if_t<!::Kokkos::is_view<InputIteratorType>::value,
+                 OutputIteratorType>
+adjacent_difference(const std::string& label, const ExecutionSpace& ex,
+                    InputIteratorType first_from, InputIteratorType last_from,
+                    OutputIteratorType first_dest, BinaryOp bin_op) {
+  return Impl::adjacent_difference_impl(label, ex, first_from, last_from,
+                                        first_dest, bin_op);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto adjacent_difference(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest) {
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+
+  using view_type1  = ::Kokkos::View<DataType1, Properties1...>;
+  using view_type2  = ::Kokkos::View<DataType2, Properties2...>;
+  using value_type1 = typename view_type1::value_type;
+  using value_type2 = typename view_type2::value_type;
+  using binary_op =
+      Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor<value_type1,
+                                                        value_type2>;
+  return Impl::adjacent_difference_impl(
+      "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from),
+      KE::cend(view_from), KE::begin(view_dest), binary_op());
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOp>
+auto adjacent_difference(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+    BinaryOp bin_op) {
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  return Impl::adjacent_difference_impl(
+      "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from),
+      KE::cend(view_from), KE::begin(view_dest), bin_op);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto adjacent_difference(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest) {
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+
+  using view_type1  = ::Kokkos::View<DataType1, Properties1...>;
+  using view_type2  = ::Kokkos::View<DataType2, Properties2...>;
+  using value_type1 = typename view_type1::value_type;
+  using value_type2 = typename view_type2::value_type;
+  using binary_op =
+      Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor<value_type1,
+                                                        value_type2>;
+
+  return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from),
+                                        KE::cend(view_from),
+                                        KE::begin(view_dest), binary_op());
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOp>
+auto adjacent_difference(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+    BinaryOp bin_op) {
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from),
+                                        KE::cend(view_from),
+                                        KE::begin(view_dest), bin_op);
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_ExclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_ExclusiveScan.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..62ebbec427e2b0d5e8159d703f117ec0a7ca4b06
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_ExclusiveScan.hpp
@@ -0,0 +1,517 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_NUMERICS_EXCLUSIVE_SCAN_HPP
+#define KOKKOS_STD_NUMERICS_EXCLUSIVE_SCAN_HPP
+
+#include <Kokkos_Core.hpp>
+#include "../Kokkos_BeginEnd.hpp"
+#include "../Kokkos_Constraints.hpp"
+#include "../Kokkos_Distance.hpp"
+#include "../Kokkos_ModifyingOperations.hpp"
+#include "../Kokkos_ValueWrapperForNoNeutralElement.hpp"
+#include "Kokkos_IdentityReferenceUnaryFunctor.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template <class ExeSpace, class IndexType, class ValueType, class FirstFrom,
+          class FirstDest>
+struct ExclusiveScanDefaultFunctor {
+  using execution_space = ExeSpace;
+  using value_type =
+      ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement<ValueType>;
+
+  ValueType m_init_value;
+  FirstFrom m_first_from;
+  FirstDest m_first_dest;
+
+  KOKKOS_FUNCTION
+  ExclusiveScanDefaultFunctor(ValueType init, FirstFrom first_from,
+                              FirstDest first_dest)
+      : m_init_value(std::move(init)),
+        m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, value_type& update,
+                  const bool final_pass) const {
+    if (final_pass) {
+      if (i == 0) {
+        m_first_dest[i] = m_init_value;
+      } else {
+        m_first_dest[i] = update.val + m_init_value;
+      }
+    }
+
+    const auto tmp = value_type{m_first_from[i], false};
+    this->join(update, tmp);
+  }
+
+  KOKKOS_FUNCTION
+  void init(value_type& update) const {
+    update.val        = {};
+    update.is_initial = true;
+  }
+
+  KOKKOS_FUNCTION
+  void join(volatile value_type& update,
+            volatile const value_type& input) const {
+    if (update.is_initial) {
+      update.val        = input.val;
+      update.is_initial = false;
+    } else {
+      update.val = update.val + input.val;
+    }
+  }
+};
+
+template <class ExeSpace, class IndexType, class ValueType, class FirstFrom,
+          class FirstDest, class BinaryOpType, class UnaryOpType>
+struct TransformExclusiveScanFunctor {
+  using execution_space = ExeSpace;
+  using value_type =
+      ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement<ValueType>;
+
+  ValueType m_init_value;
+  FirstFrom m_first_from;
+  FirstDest m_first_dest;
+  BinaryOpType m_binary_op;
+  UnaryOpType m_unary_op;
+
+  KOKKOS_FUNCTION
+  TransformExclusiveScanFunctor(ValueType init, FirstFrom first_from,
+                                FirstDest first_dest, BinaryOpType bop,
+                                UnaryOpType uop)
+      : m_init_value(std::move(init)),
+        m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)),
+        m_binary_op(std::move(bop)),
+        m_unary_op(std::move(uop)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, value_type& update,
+                  const bool final_pass) const {
+    if (final_pass) {
+      if (i == 0) {
+        // for both ExclusiveScan and TransformExclusiveScan,
+        // init is unmodified
+        m_first_dest[i] = m_init_value;
+      } else {
+        m_first_dest[i] = m_binary_op(update.val, m_init_value);
+      }
+    }
+
+    const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
+    this->join(update, tmp);
+  }
+
+  KOKKOS_FUNCTION
+  void init(value_type& update) const {
+    update.val        = {};
+    update.is_initial = true;
+  }
+
+  KOKKOS_FUNCTION
+  void join(volatile value_type& update,
+            volatile const value_type& input) const {
+    if (update.is_initial) {
+      update.val = input.val;
+    } else {
+      update.val = m_binary_op(update.val, input.val);
+    }
+    update.is_initial = false;
+  }
+};
+
+// --------------------------------------------------
+// exclusive_scan_custom_op_impl
+// --------------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType, class BinaryOpType>
+OutputIteratorType exclusive_scan_custom_op_impl(
+    const std::string& label, const ExecutionSpace& ex,
+    InputIteratorType first_from, InputIteratorType last_from,
+    OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // aliases
+  using index_type    = typename InputIteratorType::difference_type;
+  using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
+  using func_type =
+      TransformExclusiveScanFunctor<ExecutionSpace, index_type, ValueType,
+                                    InputIteratorType, OutputIteratorType,
+                                    BinaryOpType, unary_op_type>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_scan(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_type(init_value, first_from, first_dest, bop, unary_op_type()));
+  ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+// --------------------------------------------------
+// transform_exclusive_scan_impl
+// --------------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType, class BinaryOpType,
+          class UnaryOpType>
+OutputIteratorType transform_exclusive_scan_impl(
+    const std::string& label, const ExecutionSpace& ex,
+    InputIteratorType first_from, InputIteratorType last_from,
+    OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop,
+    UnaryOpType uop) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // aliases
+  using index_type = typename InputIteratorType::difference_type;
+  using func_type =
+      TransformExclusiveScanFunctor<ExecutionSpace, index_type, ValueType,
+                                    InputIteratorType, OutputIteratorType,
+                                    BinaryOpType, UnaryOpType>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_scan(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_type(init_value, first_from, first_dest, bop, uop));
+  ex.fence("Kokkos::transform_exclusive_scan: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+// --------------------------------------------------
+// exclusive_scan_default_op_impl
+// --------------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType>
+OutputIteratorType exclusive_scan_default_op_impl(const std::string& label,
+                                                  const ExecutionSpace& ex,
+                                                  InputIteratorType first_from,
+                                                  InputIteratorType last_from,
+                                                  OutputIteratorType first_dest,
+                                                  ValueType init_value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // does it make sense to do this static_assert too?
+  // using input_iterator_value_type = typename InputIteratorType::value_type;
+  // static_assert
+  //   (std::is_convertible<std::remove_cv_t<input_iterator_value_type>,
+  //   ValueType>::value,
+  //    "exclusive_scan: InputIteratorType::value_type not convertible to
+  //    ValueType");
+
+  // we are unnecessarily duplicating code, but this is on purpose
+  // so that we can use the default_op for OpenMPTarget.
+  // Originally, I had this implemented as:
+  // '''
+  // using bop_type   = StdExclusiveScanDefaultJoinFunctor<ValueType>;
+  // call exclusive_scan_custom_op_impl(..., bop_type());
+  // '''
+  // which avoids duplicating the functors, but for OpenMPTarget
+  // I cannot use a custom binary op.
+  // This is the same problem that occurs for reductions.
+
+  // aliases
+  using index_type = typename InputIteratorType::difference_type;
+  using func_type =
+      ExclusiveScanDefaultFunctor<ExecutionSpace, index_type, ValueType,
+                                  InputIteratorType, OutputIteratorType>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_scan(label,
+                          RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                          func_type(init_value, first_from, first_dest));
+  ex.fence("Kokkos::exclusive_scan_default_op: fence after operation");
+
+  return first_dest + num_elements;
+}
+
+}  // end namespace Impl
+
+///////////////////////////////
+//
+// exclusive scan API
+//
+///////////////////////////////
+
+// overload set 1
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+exclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
+               InputIteratorType last, OutputIteratorType first_dest,
+               ValueType init_value) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  return Impl::exclusive_scan_default_op_impl(
+      "Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last,
+      first_dest, init_value);
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+exclusive_scan(const std::string& label, const ExecutionSpace& ex,
+               InputIteratorType first, InputIteratorType last,
+               OutputIteratorType first_dest, ValueType init_value) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  return Impl::exclusive_scan_default_op_impl(label, ex, first, last,
+                                              first_dest, init_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType>
+auto exclusive_scan(const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    ValueType init_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::exclusive_scan_default_op_impl(
+      "Kokkos::exclusive_scan_default_functors_view_api", ex,
+      KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
+      init_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType>
+auto exclusive_scan(const std::string& label, const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    ValueType init_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from),
+                                              KE::cend(view_from),
+                                              KE::begin(view_dest), init_value);
+}
+
+// overload set 2
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType, class BinaryOpType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+exclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
+               InputIteratorType last, OutputIteratorType first_dest,
+               ValueType init_value, BinaryOpType bop) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  return Impl::exclusive_scan_custom_op_impl(
+      "Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last,
+      first_dest, init_value, bop);
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType, class BinaryOpType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+exclusive_scan(const std::string& label, const ExecutionSpace& ex,
+               InputIteratorType first, InputIteratorType last,
+               OutputIteratorType first_dest, ValueType init_value,
+               BinaryOpType bop) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest,
+                                             init_value, bop);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType,
+          class BinaryOpType>
+auto exclusive_scan(const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    ValueType init_value, BinaryOpType bop) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::exclusive_scan_custom_op_impl(
+      "Kokkos::exclusive_scan_custom_functors_view_api", ex,
+      KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
+      init_value, bop);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType,
+          class BinaryOpType>
+auto exclusive_scan(const std::string& label, const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    ValueType init_value, BinaryOpType bop) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::exclusive_scan_custom_op_impl(
+      label, ex, KE::cbegin(view_from), KE::cend(view_from),
+      KE::begin(view_dest), init_value, bop);
+}
+
+//////////////////////////////////////
+//
+// transform_exclusive_scan public API
+//
+//////////////////////////////////////
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType, class BinaryOpType,
+          class UnaryOpType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+transform_exclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
+                         InputIteratorType last, OutputIteratorType first_dest,
+                         ValueType init_value, BinaryOpType binary_op,
+                         UnaryOpType unary_op) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  return Impl::transform_exclusive_scan_impl(
+      "Kokkos::transform_exclusive_scan_custom_functors_iterator_api", ex,
+      first, last, first_dest, init_value, binary_op, unary_op);
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class ValueType, class BinaryOpType,
+          class UnaryOpType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+transform_exclusive_scan(const std::string& label, const ExecutionSpace& ex,
+                         InputIteratorType first, InputIteratorType last,
+                         OutputIteratorType first_dest, ValueType init_value,
+                         BinaryOpType binary_op, UnaryOpType unary_op) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  return Impl::transform_exclusive_scan_impl(label, ex, first, last, first_dest,
+                                             init_value, binary_op, unary_op);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType,
+          class BinaryOpType, class UnaryOpType>
+auto transform_exclusive_scan(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+    ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::transform_exclusive_scan_impl(
+      "Kokkos::transform_exclusive_scan_custom_functors_view_api", ex,
+      KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
+      init_value, binary_op, unary_op);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType,
+          class BinaryOpType, class UnaryOpType>
+auto transform_exclusive_scan(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+    ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::transform_exclusive_scan_impl(
+      label, ex, KE::cbegin(view_from), KE::cend(view_from),
+      KE::begin(view_dest), init_value, binary_op, unary_op);
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_IdentityReferenceUnaryFunctor.hpp b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_IdentityReferenceUnaryFunctor.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d43a161fcd469d430a82e067634a81cb147dd05f
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_IdentityReferenceUnaryFunctor.hpp
@@ -0,0 +1,64 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP
+#define KOKKOS_STD_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP
+
+#include <Kokkos_Macros.hpp>
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template <class ValueType>
+struct StdNumericScanIdentityReferenceUnaryFunctor {
+  KOKKOS_FUNCTION
+  constexpr const ValueType& operator()(const ValueType& a) const { return a; }
+};
+
+}  // namespace Impl
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_InclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_InclusiveScan.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..cdafc818697a3c9578b581b308e5d22a733a3df4
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_InclusiveScan.hpp
@@ -0,0 +1,699 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_NUMERICS_INCLUSIVE_SCAN_HPP
+#define KOKKOS_STD_NUMERICS_INCLUSIVE_SCAN_HPP
+
+#include <Kokkos_Core.hpp>
+#include "../Kokkos_BeginEnd.hpp"
+#include "../Kokkos_Constraints.hpp"
+#include "../Kokkos_Distance.hpp"
+#include "../Kokkos_ModifyingOperations.hpp"
+#include "../Kokkos_ValueWrapperForNoNeutralElement.hpp"
+#include "Kokkos_IdentityReferenceUnaryFunctor.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template <class ExeSpace, class IndexType, class ValueType, class FirstFrom,
+          class FirstDest>
+struct InclusiveScanDefaultFunctor {
+  using execution_space = ExeSpace;
+  using value_type      = ValueWrapperForNoNeutralElement<ValueType>;
+
+  FirstFrom m_first_from;
+  FirstDest m_first_dest;
+
+  KOKKOS_FUNCTION
+  InclusiveScanDefaultFunctor(FirstFrom first_from, FirstDest first_dest)
+      : m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, value_type& update,
+                  const bool final_pass) const {
+    const auto tmp = value_type{m_first_from[i], false};
+    this->join(update, tmp);
+
+    if (final_pass) {
+      m_first_dest[i] = update.val;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  void init(value_type& update) const {
+    update.val        = {};
+    update.is_initial = true;
+  }
+
+  KOKKOS_FUNCTION
+  void join(volatile value_type& update,
+            volatile const value_type& input) const {
+    if (update.is_initial) {
+      update.val = input.val;
+    } else {
+      update.val = update.val + input.val;
+    }
+    update.is_initial = false;
+  }
+};
+
+template <class ExeSpace, class IndexType, class ValueType, class FirstFrom,
+          class FirstDest, class BinaryOpType, class UnaryOpType>
+struct TransformInclusiveScanNoInitValueFunctor {
+  using execution_space = ExeSpace;
+  using value_type      = ValueWrapperForNoNeutralElement<ValueType>;
+
+  FirstFrom m_first_from;
+  FirstDest m_first_dest;
+  BinaryOpType m_binary_op;
+  UnaryOpType m_unary_op;
+
+  KOKKOS_FUNCTION
+  TransformInclusiveScanNoInitValueFunctor(FirstFrom first_from,
+                                           FirstDest first_dest,
+                                           BinaryOpType bop, UnaryOpType uop)
+      : m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)),
+        m_binary_op(std::move(bop)),
+        m_unary_op(std::move(uop)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, value_type& update,
+                  const bool final_pass) const {
+    const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
+    this->join(update, tmp);
+    if (final_pass) {
+      m_first_dest[i] = update.val;
+    }
+  }
+
+  KOKKOS_FUNCTION
+  void init(value_type& update) const {
+    update.val        = {};
+    update.is_initial = true;
+  }
+
+  KOKKOS_FUNCTION
+  void join(volatile value_type& update,
+            volatile const value_type& input) const {
+    if (update.is_initial) {
+      update.val = input.val;
+    } else {
+      update.val = m_binary_op(update.val, input.val);
+    }
+    update.is_initial = false;
+  }
+};
+
+template <class ExeSpace, class IndexType, class ValueType, class FirstFrom,
+          class FirstDest, class BinaryOpType, class UnaryOpType>
+struct TransformInclusiveScanWithInitValueFunctor {
+  using execution_space = ExeSpace;
+  using value_type      = ValueWrapperForNoNeutralElement<ValueType>;
+
+  FirstFrom m_first_from;
+  FirstDest m_first_dest;
+  BinaryOpType m_binary_op;
+  UnaryOpType m_unary_op;
+  ValueType m_init;
+
+  KOKKOS_FUNCTION
+  TransformInclusiveScanWithInitValueFunctor(FirstFrom first_from,
+                                             FirstDest first_dest,
+                                             BinaryOpType bop, UnaryOpType uop,
+                                             ValueType init)
+      : m_first_from(std::move(first_from)),
+        m_first_dest(std::move(first_dest)),
+        m_binary_op(std::move(bop)),
+        m_unary_op(std::move(uop)),
+        m_init(std::move(init)) {}
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, value_type& update,
+                  const bool final_pass) const {
+    const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
+    this->join(update, tmp);
+
+    if (final_pass) {
+      m_first_dest[i] = m_binary_op(update.val, m_init);
+    }
+  }
+
+  KOKKOS_FUNCTION
+  void init(value_type& update) const {
+    update.val        = {};
+    update.is_initial = true;
+  }
+
+  KOKKOS_FUNCTION
+  void join(volatile value_type& update,
+            volatile const value_type& input) const {
+    if (update.is_initial) {
+      update.val = input.val;
+    } else {
+      update.val = m_binary_op(update.val, input.val);
+    }
+    update.is_initial = false;
+  }
+};
+
+// -------------------------------------------------------------
+// inclusive_scan_default_op_impl
+// -------------------------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType>
+OutputIteratorType inclusive_scan_default_op_impl(
+    const std::string& label, const ExecutionSpace& ex,
+    InputIteratorType first_from, InputIteratorType last_from,
+    OutputIteratorType first_dest) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // aliases
+  using index_type = typename InputIteratorType::difference_type;
+  using value_type =
+      std::remove_const_t<typename InputIteratorType::value_type>;
+  using func_type =
+      InclusiveScanDefaultFunctor<ExecutionSpace, index_type, value_type,
+                                  InputIteratorType, OutputIteratorType>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_scan(label,
+                          RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                          func_type(first_from, first_dest));
+  ex.fence("Kokkos::inclusive_scan_default_op: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+// -------------------------------------------------------------
+// inclusive_scan_custom_binary_op_impl
+// -------------------------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOpType>
+OutputIteratorType inclusive_scan_custom_binary_op_impl(
+    const std::string& label, const ExecutionSpace& ex,
+    InputIteratorType first_from, InputIteratorType last_from,
+    OutputIteratorType first_dest, BinaryOpType binary_op) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // aliases
+  using index_type = typename InputIteratorType::difference_type;
+  using value_type =
+      std::remove_const_t<typename InputIteratorType::value_type>;
+  using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>;
+  using func_type     = TransformInclusiveScanNoInitValueFunctor<
+      ExecutionSpace, index_type, value_type, InputIteratorType,
+      OutputIteratorType, BinaryOpType, unary_op_type>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_scan(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_type(first_from, first_dest, binary_op, unary_op_type()));
+  ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+// -------------------------------------------------------------
+// inclusive_scan_custom_binary_op_impl with init_value
+// -------------------------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOpType, class ValueType>
+OutputIteratorType inclusive_scan_custom_binary_op_impl(
+    const std::string& label, const ExecutionSpace& ex,
+    InputIteratorType first_from, InputIteratorType last_from,
+    OutputIteratorType first_dest, BinaryOpType binary_op,
+    ValueType init_value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // aliases
+  using index_type    = typename InputIteratorType::difference_type;
+  using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
+  using func_type     = TransformInclusiveScanWithInitValueFunctor<
+      ExecutionSpace, index_type, ValueType, InputIteratorType,
+      OutputIteratorType, BinaryOpType, unary_op_type>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_scan(label,
+                          RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                          func_type(first_from, first_dest, binary_op,
+                                    unary_op_type(), init_value));
+  ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+// -------------------------------------------------------------
+// transform_inclusive_scan_impl without init_value
+// -------------------------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOpType, class UnaryOpType>
+OutputIteratorType transform_inclusive_scan_impl(const std::string& label,
+                                                 const ExecutionSpace& ex,
+                                                 InputIteratorType first_from,
+                                                 InputIteratorType last_from,
+                                                 OutputIteratorType first_dest,
+                                                 BinaryOpType binary_op,
+                                                 UnaryOpType unary_op) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // aliases
+  using index_type = typename InputIteratorType::difference_type;
+  using value_type =
+      std::remove_const_t<typename InputIteratorType::value_type>;
+  using func_type = TransformInclusiveScanNoInitValueFunctor<
+      ExecutionSpace, index_type, value_type, InputIteratorType,
+      OutputIteratorType, BinaryOpType, UnaryOpType>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_scan(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_type(first_from, first_dest, binary_op, unary_op));
+  ex.fence("Kokkos::transform_inclusive_scan: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+// -------------------------------------------------------------
+// transform_inclusive_scan_impl with init_value
+// -------------------------------------------------------------
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOpType, class UnaryOpType,
+          class ValueType>
+OutputIteratorType transform_inclusive_scan_impl(
+    const std::string& label, const ExecutionSpace& ex,
+    InputIteratorType first_from, InputIteratorType last_from,
+    OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op,
+    ValueType init_value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
+  Impl::static_assert_iterators_have_matching_difference_type(first_from,
+                                                              first_dest);
+  Impl::expect_valid_range(first_from, last_from);
+
+  // aliases
+  using index_type = typename InputIteratorType::difference_type;
+  using func_type  = TransformInclusiveScanWithInitValueFunctor<
+      ExecutionSpace, index_type, ValueType, InputIteratorType,
+      OutputIteratorType, BinaryOpType, UnaryOpType>;
+
+  // run
+  const auto num_elements =
+      Kokkos::Experimental::distance(first_from, last_from);
+  ::Kokkos::parallel_scan(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_type(first_from, first_dest, binary_op, unary_op, init_value));
+  ex.fence("Kokkos::transform_inclusive_scan: fence after operation");
+
+  // return
+  return first_dest + num_elements;
+}
+
+}  // end namespace Impl
+
+///////////////////////////////
+//
+// inclusive scan API
+//
+///////////////////////////////
+
+// overload set 1
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+inclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
+               InputIteratorType last, OutputIteratorType first_dest) {
+  return Impl::inclusive_scan_default_op_impl(
+      "Kokkos::inclusive_scan_default_functors_iterator_api", ex, first, last,
+      first_dest);
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+inclusive_scan(const std::string& label, const ExecutionSpace& ex,
+               InputIteratorType first, InputIteratorType last,
+               OutputIteratorType first_dest) {
+  return Impl::inclusive_scan_default_op_impl(label, ex, first, last,
+                                              first_dest);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto inclusive_scan(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::inclusive_scan_default_op_impl(
+      "Kokkos::inclusive_scan_default_functors_view_api", ex,
+      KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2>
+auto inclusive_scan(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::inclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from),
+                                              KE::cend(view_from),
+                                              KE::begin(view_dest));
+}
+
+// overload set 2 (accepting custom binary op)
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOp>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+inclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
+               InputIteratorType last, OutputIteratorType first_dest,
+               BinaryOp binary_op) {
+  return Impl::inclusive_scan_custom_binary_op_impl(
+      "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last,
+      first_dest, binary_op);
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOp>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+inclusive_scan(const std::string& label, const ExecutionSpace& ex,
+               InputIteratorType first, InputIteratorType last,
+               OutputIteratorType first_dest, BinaryOp binary_op) {
+  return Impl::inclusive_scan_custom_binary_op_impl(label, ex, first, last,
+                                                    first_dest, binary_op);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOp>
+auto inclusive_scan(const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    BinaryOp binary_op) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::inclusive_scan_custom_binary_op_impl(
+      "Kokkos::inclusive_scan_custom_functors_view_api", ex,
+      KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
+      binary_op);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOp>
+auto inclusive_scan(const std::string& label, const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    BinaryOp binary_op) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::inclusive_scan_custom_binary_op_impl(
+      label, ex, KE::cbegin(view_from), KE::cend(view_from),
+      KE::begin(view_dest), binary_op);
+}
+
+// overload set 3
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOp, class ValueType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+inclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
+               InputIteratorType last, OutputIteratorType first_dest,
+               BinaryOp binary_op, ValueType init_value) {
+  return Impl::inclusive_scan_custom_binary_op_impl(
+      "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last,
+      first_dest, binary_op, init_value);
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOp, class ValueType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+inclusive_scan(const std::string& label, const ExecutionSpace& ex,
+               InputIteratorType first, InputIteratorType last,
+               OutputIteratorType first_dest, BinaryOp binary_op,
+               ValueType init_value) {
+  return Impl::inclusive_scan_custom_binary_op_impl(
+      label, ex, first, last, first_dest, binary_op, init_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOp,
+          class ValueType>
+auto inclusive_scan(const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    BinaryOp binary_op, ValueType init_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::inclusive_scan_custom_binary_op_impl(
+      "Kokkos::inclusive_scan_custom_functors_view_api", ex,
+      KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
+      binary_op, init_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOp,
+          class ValueType>
+auto inclusive_scan(const std::string& label, const ExecutionSpace& ex,
+                    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+                    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+                    BinaryOp binary_op, ValueType init_value) {
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::inclusive_scan_custom_binary_op_impl(
+      label, ex, KE::cbegin(view_from), KE::cend(view_from),
+      KE::begin(view_dest), binary_op, init_value);
+}
+
+//////////////////////////////////////
+//
+// transform_inclusive_scan public API
+//
+//////////////////////////////////////
+
+// overload set 1 (no init value)
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOpType, class UnaryOpType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
+                         InputIteratorType last, OutputIteratorType first_dest,
+                         BinaryOpType binary_op, UnaryOpType unary_op) {
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::transform_inclusive_scan_impl(
+      "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex,
+      first, last, first_dest, binary_op, unary_op);
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOpType, class UnaryOpType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex,
+                         InputIteratorType first, InputIteratorType last,
+                         OutputIteratorType first_dest, BinaryOpType binary_op,
+                         UnaryOpType unary_op) {
+  Impl::static_assert_is_not_openmptarget(ex);
+
+  return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest,
+                                             binary_op, unary_op);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOpType,
+          class UnaryOpType>
+auto transform_inclusive_scan(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+    BinaryOpType binary_op, UnaryOpType unary_op) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::transform_inclusive_scan_impl(
+      "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex,
+      KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
+      binary_op, unary_op);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOpType,
+          class UnaryOpType>
+auto transform_inclusive_scan(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+    BinaryOpType binary_op, UnaryOpType unary_op) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::transform_inclusive_scan_impl(
+      label, ex, KE::cbegin(view_from), KE::cend(view_from),
+      KE::begin(view_dest), binary_op, unary_op);
+}
+
+// overload set 2 (init value)
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOpType, class UnaryOpType,
+          class ValueType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
+                         InputIteratorType last, OutputIteratorType first_dest,
+                         BinaryOpType binary_op, UnaryOpType unary_op,
+                         ValueType init_value) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  return Impl::transform_inclusive_scan_impl(
+      "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex,
+      first, last, first_dest, binary_op, unary_op, init_value);
+}
+
+template <class ExecutionSpace, class InputIteratorType,
+          class OutputIteratorType, class BinaryOpType, class UnaryOpType,
+          class ValueType>
+std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
+                      InputIteratorType, OutputIteratorType>::value,
+                  OutputIteratorType>
+transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex,
+                         InputIteratorType first, InputIteratorType last,
+                         OutputIteratorType first_dest, BinaryOpType binary_op,
+                         UnaryOpType unary_op, ValueType init_value) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest,
+                                             binary_op, unary_op, init_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOpType,
+          class UnaryOpType, class ValueType>
+auto transform_inclusive_scan(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+    BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::transform_inclusive_scan_impl(
+      "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex,
+      KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
+      binary_op, unary_op, init_value);
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class BinaryOpType,
+          class UnaryOpType, class ValueType>
+auto transform_inclusive_scan(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& view_from,
+    const ::Kokkos::View<DataType2, Properties2...>& view_dest,
+    BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) {
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
+  namespace KE = ::Kokkos::Experimental;
+  return Impl::transform_inclusive_scan_impl(
+      label, ex, KE::cbegin(view_from), KE::cend(view_from),
+      KE::begin(view_dest), binary_op, unary_op, init_value);
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_Reduce.hpp b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_Reduce.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bf03f6e98f83b388462f4c26864e43dbc285d109
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_Reduce.hpp
@@ -0,0 +1,326 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_NUMERICS_REDUCE_HPP
+#define KOKKOS_STD_NUMERICS_REDUCE_HPP
+
+#include <Kokkos_Core.hpp>
+#include "../Kokkos_BeginEnd.hpp"
+#include "../Kokkos_Constraints.hpp"
+#include "../Kokkos_Distance.hpp"
+#include "../Kokkos_ModifyingOperations.hpp"
+#include "../Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template <class ValueType>
+struct StdReduceDefaultJoinFunctor {
+  KOKKOS_FUNCTION
+  constexpr ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return a + b;
+  }
+
+  KOKKOS_FUNCTION
+  constexpr ValueType operator()(const volatile ValueType& a,
+                                 const volatile ValueType& b) const {
+    return a + b;
+  }
+};
+
+template <class IteratorType, class ReducerType>
+struct StdReduceFunctor {
+  using red_value_type = typename ReducerType::value_type;
+  using index_type     = typename IteratorType::difference_type;
+
+  const IteratorType m_first;
+  const ReducerType m_reducer;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i, red_value_type& red_value) const {
+    auto tmp_wrapped_value = red_value_type{m_first[i], false};
+
+    if (red_value.is_initial) {
+      red_value = tmp_wrapped_value;
+    } else {
+      m_reducer.join(red_value, tmp_wrapped_value);
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdReduceFunctor(IteratorType first, ReducerType reducer)
+      : m_first(std::move(first)), m_reducer(std::move(reducer)) {}
+};
+
+//------------------------------
+// reduce_custom_functors_impl
+//------------------------------
+template <class ExecutionSpace, class IteratorType, class ValueType,
+          class JoinerType>
+ValueType reduce_custom_functors_impl(const std::string& label,
+                                      const ExecutionSpace& ex,
+                                      IteratorType first, IteratorType last,
+                                      ValueType init_reduction_value,
+                                      JoinerType joiner) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::expect_valid_range(first, last);
+
+  if (first == last) {
+    // init is returned, unmodified
+    return init_reduction_value;
+  }
+
+  // aliases
+  using reducer_type =
+      ReducerWithArbitraryJoinerNoNeutralElement<ValueType, JoinerType>;
+  using functor_type         = StdReduceFunctor<IteratorType, reducer_type>;
+  using reduction_value_type = typename reducer_type::value_type;
+
+  // run
+  reduction_value_type result;
+  reducer_type reducer(result, joiner);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            functor_type(first, reducer), reducer);
+
+  // fence not needed since reducing into scalar
+  return joiner(result.val, init_reduction_value);
+}
+
+template <class ExecutionSpace, class IteratorType, class ValueType>
+ValueType reduce_default_functors_impl(const std::string& label,
+                                       const ExecutionSpace& ex,
+                                       IteratorType first, IteratorType last,
+                                       ValueType init_reduction_value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::expect_valid_range(first, last);
+
+  using value_type  = Kokkos::Impl::remove_cvref_t<ValueType>;
+  using joiner_type = Impl::StdReduceDefaultJoinFunctor<value_type>;
+  return reduce_custom_functors_impl(
+      label, ex, first, last, std::move(init_reduction_value), joiner_type());
+}
+
+}  // end namespace Impl
+
+///////////////////////////////
+//
+// reduce public API
+//
+///////////////////////////////
+
+//
+// overload set 1
+//
+template <class ExecutionSpace, class IteratorType>
+typename IteratorType::value_type reduce(const ExecutionSpace& ex,
+                                         IteratorType first,
+                                         IteratorType last) {
+  return Impl::reduce_default_functors_impl(
+      "Kokkos::reduce_default_functors_iterator_api", ex, first, last,
+      typename IteratorType::value_type());
+}
+
+template <class ExecutionSpace, class IteratorType>
+typename IteratorType::value_type reduce(const std::string& label,
+                                         const ExecutionSpace& ex,
+                                         IteratorType first,
+                                         IteratorType last) {
+  return Impl::reduce_default_functors_impl(
+      label, ex, first, last, typename IteratorType::value_type());
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto reduce(const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view) {
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  using view_type  = ::Kokkos::View<DataType, Properties...>;
+  using value_type = typename view_type::value_type;
+
+  return Impl::reduce_default_functors_impl(
+      "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view),
+      KE::cend(view), value_type());
+}
+
+template <class ExecutionSpace, class DataType, class... Properties>
+auto reduce(const std::string& label, const ExecutionSpace& ex,
+            const ::Kokkos::View<DataType, Properties...>& view) {
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  using view_type  = ::Kokkos::View<DataType, Properties...>;
+  using value_type = typename view_type::value_type;
+
+  return Impl::reduce_default_functors_impl(label, ex, KE::cbegin(view),
+                                            KE::cend(view), value_type());
+}
+
+//
+// overload set2:
+//
+template <class ExecutionSpace, class IteratorType, class ValueType>
+ValueType reduce(const ExecutionSpace& ex, IteratorType first,
+                 IteratorType last, ValueType init_reduction_value) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  return Impl::reduce_default_functors_impl(
+      "Kokkos::reduce_default_functors_iterator_api", ex, first, last,
+      init_reduction_value);
+}
+
+template <class ExecutionSpace, class IteratorType, class ValueType>
+ValueType reduce(const std::string& label, const ExecutionSpace& ex,
+                 IteratorType first, IteratorType last,
+                 ValueType init_reduction_value) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  return Impl::reduce_default_functors_impl(label, ex, first, last,
+                                            init_reduction_value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ValueType>
+ValueType reduce(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& view,
+                 ValueType init_reduction_value) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::reduce_default_functors_impl(
+      "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view),
+      KE::cend(view), init_reduction_value);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ValueType>
+ValueType reduce(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& view,
+                 ValueType init_reduction_value) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::reduce_default_functors_impl(
+      label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value);
+}
+
+//
+// overload set 3
+//
+template <class ExecutionSpace, class IteratorType, class ValueType,
+          class BinaryOp>
+ValueType reduce(const ExecutionSpace& ex, IteratorType first,
+                 IteratorType last, ValueType init_reduction_value,
+                 BinaryOp joiner) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  return Impl::reduce_custom_functors_impl(
+      "Kokkos::reduce_default_functors_iterator_api", ex, first, last,
+      init_reduction_value, joiner);
+}
+
+template <class ExecutionSpace, class IteratorType, class ValueType,
+          class BinaryOp>
+ValueType reduce(const std::string& label, const ExecutionSpace& ex,
+                 IteratorType first, IteratorType last,
+                 ValueType init_reduction_value, BinaryOp joiner) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  return Impl::reduce_custom_functors_impl(label, ex, first, last,
+                                           init_reduction_value, joiner);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ValueType, class BinaryOp>
+ValueType reduce(const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& view,
+                 ValueType init_reduction_value, BinaryOp joiner) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::reduce_custom_functors_impl(
+      "Kokkos::reduce_custom_functors_view_api", ex, KE::cbegin(view),
+      KE::cend(view), init_reduction_value, joiner);
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ValueType, class BinaryOp>
+ValueType reduce(const std::string& label, const ExecutionSpace& ex,
+                 const ::Kokkos::View<DataType, Properties...>& view,
+                 ValueType init_reduction_value, BinaryOp joiner) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::reduce_custom_functors_impl(label, ex, KE::cbegin(view),
+                                           KE::cend(view), init_reduction_value,
+                                           joiner);
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_TransformReduce.hpp b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_TransformReduce.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..846166d322557a9872b0c74cfcef707e64fcf6f5
--- /dev/null
+++ b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_TransformReduce.hpp
@@ -0,0 +1,488 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_STD_NUMERICS_TRANSFORM_REDUCE_HPP
+#define KOKKOS_STD_NUMERICS_TRANSFORM_REDUCE_HPP
+
+#include <Kokkos_Core.hpp>
+#include "../Kokkos_Constraints.hpp"
+#include "../Kokkos_Distance.hpp"
+#include "../Kokkos_ModifyingOperations.hpp"
+#include "../Kokkos_BeginEnd.hpp"
+#include "../Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp"
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+//
+// helper functors
+//
+template <class ValueType>
+struct StdTranformReduceDefaultBinaryTransformFunctor {
+  KOKKOS_FUNCTION
+  constexpr ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return (a * b);
+  }
+};
+
+template <class ValueType>
+struct StdTranformReduceDefaultJoinFunctor {
+  KOKKOS_FUNCTION
+  constexpr ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return a + b;
+  }
+
+  KOKKOS_FUNCTION
+  constexpr ValueType operator()(const volatile ValueType& a,
+                                 const volatile ValueType& b) const {
+    return a + b;
+  }
+};
+
+template <class IteratorType, class ReducerType, class TransformType>
+struct StdTransformReduceSingleIntervalFunctor {
+  using red_value_type = typename ReducerType::value_type;
+  using index_type     = typename IteratorType::difference_type;
+
+  const IteratorType m_first;
+  const ReducerType m_reducer;
+  const TransformType m_transform;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i, red_value_type& red_value) const {
+    auto tmp_wrapped_value = red_value_type{m_transform(m_first[i]), false};
+    if (red_value.is_initial) {
+      red_value = tmp_wrapped_value;
+    } else {
+      m_reducer.join(red_value, tmp_wrapped_value);
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdTransformReduceSingleIntervalFunctor(IteratorType first,
+                                          ReducerType reducer,
+                                          TransformType transform)
+      : m_first(std::move(first)),
+        m_reducer(std::move(reducer)),
+        m_transform(std::move(transform)) {}
+};
+
+template <class IndexType, class IteratorType1, class IteratorType2,
+          class ReducerType, class TransformType>
+struct StdTransformReduceTwoIntervalsFunctor {
+  using red_value_type = typename ReducerType::value_type;
+
+  const IteratorType1 m_first1;
+  const IteratorType2 m_first2;
+  const ReducerType m_reducer;
+  const TransformType m_transform;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    auto tmp_wrapped_value =
+        red_value_type{m_transform(m_first1[i], m_first2[i]), false};
+
+    if (red_value.is_initial) {
+      red_value = tmp_wrapped_value;
+    } else {
+      m_reducer.join(red_value, tmp_wrapped_value);
+    }
+  }
+
+  KOKKOS_FUNCTION
+  StdTransformReduceTwoIntervalsFunctor(IteratorType1 first1,
+                                        IteratorType2 first2,
+                                        ReducerType reducer,
+                                        TransformType transform)
+      : m_first1(std::move(first1)),
+        m_first2(std::move(first2)),
+        m_reducer(std::move(reducer)),
+        m_transform(std::move(transform)) {}
+};
+
+//------------------------------
+//
+// impl functions
+//
+//------------------------------
+
+template <class ExecutionSpace, class IteratorType, class ValueType,
+          class JoinerType, class UnaryTransformerType>
+ValueType transform_reduce_custom_functors_impl(
+    const std::string& label, const ExecutionSpace& ex, IteratorType first,
+    IteratorType last, ValueType init_reduction_value, JoinerType joiner,
+    UnaryTransformerType transformer) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first);
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::expect_valid_range(first, last);
+
+  if (first == last) {
+    // init is returned, unmodified
+    return init_reduction_value;
+  }
+
+  // aliases
+  using reducer_type =
+      ReducerWithArbitraryJoinerNoNeutralElement<ValueType, JoinerType>;
+  using functor_type =
+      StdTransformReduceSingleIntervalFunctor<IteratorType, reducer_type,
+                                              UnaryTransformerType>;
+  using reduction_value_type = typename reducer_type::value_type;
+
+  // run
+  reduction_value_type result;
+  reducer_type reducer(result, joiner);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(label,
+                            RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+                            functor_type(first, reducer, transformer), reducer);
+
+  // fence not needed since reducing into scalar
+
+  // as per standard, transform is not applied to the init value
+  // https://en.cppreference.com/w/cpp/algorithm/transform_reduce
+  return joiner(result.val, init_reduction_value);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class ValueType, class JoinerType, class BinaryTransformerType>
+ValueType transform_reduce_custom_functors_impl(
+    const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+    IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value,
+    JoinerType joiner, BinaryTransformerType transformer) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first1, first2);
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_iterators_have_matching_difference_type(first1, first2);
+  Impl::expect_valid_range(first1, last1);
+
+  if (first1 == last1) {
+    // init is returned, unmodified
+    return init_reduction_value;
+  }
+
+  // aliases
+  using index_type = typename IteratorType1::difference_type;
+  using reducer_type =
+      ReducerWithArbitraryJoinerNoNeutralElement<ValueType, JoinerType>;
+  using functor_type =
+      StdTransformReduceTwoIntervalsFunctor<index_type, IteratorType1,
+                                            IteratorType2, reducer_type,
+                                            BinaryTransformerType>;
+  using reduction_value_type = typename reducer_type::value_type;
+
+  // run
+  reduction_value_type result;
+  reducer_type reducer(result, joiner);
+
+  const auto num_elements = Kokkos::Experimental::distance(first1, last1);
+  ::Kokkos::parallel_reduce(
+      label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      functor_type(first1, first2, reducer, transformer), reducer);
+
+  // fence not needed since reducing into scalar
+  return joiner(result.val, init_reduction_value);
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class ValueType>
+ValueType transform_reduce_default_functors_impl(
+    const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
+    IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) {
+  // checks
+  Impl::static_assert_random_access_and_accessible(ex, first1, first2);
+  Impl::static_assert_is_not_openmptarget(ex);
+  Impl::static_assert_iterators_have_matching_difference_type(first1, first2);
+  Impl::expect_valid_range(first1, last1);
+
+  // aliases
+  using transformer_type =
+      Impl::StdTranformReduceDefaultBinaryTransformFunctor<ValueType>;
+  using joiner_type = Impl::StdTranformReduceDefaultJoinFunctor<ValueType>;
+
+  return transform_reduce_custom_functors_impl(
+      label, ex, first1, last1, first2, std::move(init_reduction_value),
+      joiner_type(), transformer_type());
+}
+
+}  // end namespace Impl
+
+///////////////////////////////
+//
+// transform_reduce public API
+//
+///////////////////////////////
+
+// ----------------------------
+// overload set1:
+// no custom functors passed, so equivalent to
+// transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>());
+// ----------------------------
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class ValueType>
+ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1,
+                           IteratorType1 last1, IteratorType2 first2,
+                           ValueType init_reduction_value) {
+  return Impl::transform_reduce_default_functors_impl(
+      "Kokkos::transform_reduce_default_functors_iterator_api", ex, first1,
+      last1, first2, std::move(init_reduction_value));
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class ValueType>
+ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex,
+                           IteratorType1 first1, IteratorType1 last1,
+                           IteratorType2 first2,
+                           ValueType init_reduction_value) {
+  return Impl::transform_reduce_default_functors_impl(
+      label, ex, first1, last1, first2, std::move(init_reduction_value));
+}
+
+// overload1 accepting views
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType>
+ValueType transform_reduce(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& first_view,
+    const ::Kokkos::View<DataType2, Properties2...>& second_view,
+    ValueType init_reduction_value) {
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view);
+
+  return Impl::transform_reduce_default_functors_impl(
+      "Kokkos::transform_reduce_default_functors_iterator_api", ex,
+      KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view),
+      std::move(init_reduction_value));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType>
+ValueType transform_reduce(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& first_view,
+    const ::Kokkos::View<DataType2, Properties2...>& second_view,
+    ValueType init_reduction_value) {
+  namespace KE = ::Kokkos::Experimental;
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view);
+
+  return Impl::transform_reduce_default_functors_impl(
+      label, ex, KE::cbegin(first_view), KE::cend(first_view),
+      KE::cbegin(second_view), std::move(init_reduction_value));
+}
+
+//
+// overload set2:
+// accepts a custom transform and joiner functor
+//
+
+// Note the std refers to the arg BinaryReductionOp
+// but in the Kokkos naming convention, it corresponds
+// to a "joiner" that knows how to join two values
+// NOTE: "joiner/transformer" need to be commutative.
+
+// https://en.cppreference.com/w/cpp/algorithm/transform_reduce
+
+// api accepting iterators
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class ValueType, class BinaryJoinerType, class BinaryTransform>
+ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1,
+                           IteratorType1 last1, IteratorType2 first2,
+                           ValueType init_reduction_value,
+                           BinaryJoinerType joiner,
+                           BinaryTransform transformer) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  return Impl::transform_reduce_custom_functors_impl(
+      "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1,
+      last1, first2, std::move(init_reduction_value), std::move(joiner),
+      std::move(transformer));
+}
+
+template <class ExecutionSpace, class IteratorType1, class IteratorType2,
+          class ValueType, class BinaryJoinerType, class BinaryTransform>
+ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex,
+                           IteratorType1 first1, IteratorType1 last1,
+                           IteratorType2 first2, ValueType init_reduction_value,
+                           BinaryJoinerType joiner,
+                           BinaryTransform transformer) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  return Impl::transform_reduce_custom_functors_impl(
+      label, ex, first1, last1, first2, std::move(init_reduction_value),
+      std::move(joiner), std::move(transformer));
+}
+
+// accepting views
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType,
+          class BinaryJoinerType, class BinaryTransform>
+ValueType transform_reduce(
+    const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& first_view,
+    const ::Kokkos::View<DataType2, Properties2...>& second_view,
+    ValueType init_reduction_value, BinaryJoinerType joiner,
+    BinaryTransform transformer) {
+  namespace KE = ::Kokkos::Experimental;
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view);
+
+  return Impl::transform_reduce_custom_functors_impl(
+      "Kokkos::transform_reduce_custom_functors_view_api", ex,
+      KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view),
+      std::move(init_reduction_value), std::move(joiner),
+      std::move(transformer));
+}
+
+template <class ExecutionSpace, class DataType1, class... Properties1,
+          class DataType2, class... Properties2, class ValueType,
+          class BinaryJoinerType, class BinaryTransform>
+ValueType transform_reduce(
+    const std::string& label, const ExecutionSpace& ex,
+    const ::Kokkos::View<DataType1, Properties1...>& first_view,
+    const ::Kokkos::View<DataType2, Properties2...>& second_view,
+    ValueType init_reduction_value, BinaryJoinerType joiner,
+    BinaryTransform transformer) {
+  namespace KE = ::Kokkos::Experimental;
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view);
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view);
+
+  return Impl::transform_reduce_custom_functors_impl(
+      label, ex, KE::cbegin(first_view), KE::cend(first_view),
+      KE::cbegin(second_view), std::move(init_reduction_value),
+      std::move(joiner), std::move(transformer));
+}
+
+//
+// overload set3:
+//
+// accepting iterators
+template <class ExecutionSpace, class IteratorType, class ValueType,
+          class BinaryJoinerType, class UnaryTransform>
+// need this to avoid ambiguous call
+std::enable_if_t<
+    ::Kokkos::Experimental::Impl::are_iterators<IteratorType>::value, ValueType>
+transform_reduce(const ExecutionSpace& ex, IteratorType first1,
+                 IteratorType last1, ValueType init_reduction_value,
+                 BinaryJoinerType joiner, UnaryTransform transformer) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  return Impl::transform_reduce_custom_functors_impl(
+      "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1,
+      last1, std::move(init_reduction_value), std::move(joiner),
+      std::move(transformer));
+}
+
+template <class ExecutionSpace, class IteratorType, class ValueType,
+          class BinaryJoinerType, class UnaryTransform>
+// need this to avoid ambiguous call
+std::enable_if_t<
+    ::Kokkos::Experimental::Impl::are_iterators<IteratorType>::value, ValueType>
+transform_reduce(const std::string& label, const ExecutionSpace& ex,
+                 IteratorType first1, IteratorType last1,
+                 ValueType init_reduction_value, BinaryJoinerType joiner,
+                 UnaryTransform transformer) {
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  return Impl::transform_reduce_custom_functors_impl(
+      label, ex, first1, last1, std::move(init_reduction_value),
+      std::move(joiner), std::move(transformer));
+}
+
+// accepting views
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ValueType, class BinaryJoinerType, class UnaryTransform>
+ValueType transform_reduce(const ExecutionSpace& ex,
+                           const ::Kokkos::View<DataType, Properties...>& view,
+                           ValueType init_reduction_value,
+                           BinaryJoinerType joiner,
+                           UnaryTransform transformer) {
+  namespace KE = ::Kokkos::Experimental;
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::transform_reduce_custom_functors_impl(
+      "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(view),
+      KE::cend(view), std::move(init_reduction_value), std::move(joiner),
+      std::move(transformer));
+}
+
+template <class ExecutionSpace, class DataType, class... Properties,
+          class ValueType, class BinaryJoinerType, class UnaryTransform>
+ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex,
+                           const ::Kokkos::View<DataType, Properties...>& view,
+                           ValueType init_reduction_value,
+                           BinaryJoinerType joiner,
+                           UnaryTransform transformer) {
+  namespace KE = ::Kokkos::Experimental;
+  static_assert(std::is_move_constructible<ValueType>::value,
+                "ValueType must be move constructible.");
+
+  Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
+
+  return Impl::transform_reduce_custom_functors_impl(
+      label, ex, KE::cbegin(view), KE::cend(view),
+      std::move(init_reduction_value), std::move(joiner),
+      std::move(transformer));
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt
index 50f8f0a332a6e528bcb12c26a4c60d5599f02c1d..94e6b2784f1812b8ff2c4f20232e965d7adcb097 100644
--- a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt
+++ b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt
@@ -6,70 +6,153 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
 KOKKOS_INCLUDE_DIRECTORIES(${KOKKOS_SOURCE_DIR}/core/unit_test/category_files)
 
 
-SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest)
-KOKKOS_INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR})
+SET(ALGORITHM UnitTestMain.cpp)
 
-# mfh 03 Nov 2017: The gtest library used here must have a different
-# name than that of the gtest library built in KokkosCore.  We can't
-# just refer to the library in KokkosCore's tests, because it's
-# possible to build only (e.g.,) KokkosAlgorithms tests, without
-# building KokkosCore tests.
+foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
+  string(TOUPPER ${Tag} DEVICE)
+  string(TOLOWER ${Tag} dir)
 
+  if(Kokkos_ENABLE_${DEVICE})
+    set(dir ${CMAKE_CURRENT_BINARY_DIR}/${dir})
+    file(MAKE_DIRECTORY ${dir})
 
-KOKKOS_ADD_TEST_LIBRARY(
-  kokkosalgorithms_gtest
-  HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
-  SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
-)
+    # -------------------------
+    # Sort1d,3d, Random
+    # -------------------------
+    set(SOURCES_A)
+    if(Tag STREQUAL "OpenMP")
+      LIST(APPEND SOURCES_A
+	TestOpenMP_Sort1D.cpp
+	TestOpenMP_Sort3D.cpp
+	TestOpenMP_SortDynamicView.cpp
+	)
+    endif()
 
-# avoid deprecation warnings from MSVC
-TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC GTEST_HAS_TR1_TUPLE=0 GTEST_HAS_PTHREAD=0)
+    set(file ${dir}/TestRandomAndSort.cpp)
+    # Write to a temporary intermediate file and call configure_file to avoid
+    # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs.
+    file(WRITE ${dir}/dummy.cpp
+      "#include <Test${Tag}_Category.hpp>\n"
+      "#include <TestRandomCommon.hpp>\n"
+      "#include <TestSortCommon.hpp>\n"
+      )
+    configure_file(${dir}/dummy.cpp ${file})
+    list(APPEND SOURCES_A ${file})
 
-IF((NOT (Kokkos_ENABLE_CUDA AND WIN32)) AND (NOT ("${KOKKOS_CXX_COMPILER_ID}" STREQUAL "Fujitsu")))
-  TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_14)
-ENDIF()
+    # ------------------------------------------
+    # std set A
+    # ------------------------------------------
+    set(STDALGO_SOURCES_A)
+    foreach(Name
+	StdReducers
+	StdAlgorithmsConstraints
+	RandomAccessIterator
+	)
+      list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
+    endforeach()
 
-# Suppress clang-tidy diagnostics on code that we do not have control over
-IF(CMAKE_CXX_CLANG_TIDY)
-  SET_TARGET_PROPERTIES(kokkosalgorithms_gtest PROPERTIES CXX_CLANG_TIDY "")
-ENDIF()
+    # ------------------------------------------
+    # std set B
+    # ------------------------------------------
+    set(STDALGO_SOURCES_B)
+    foreach(Name
+	StdAlgorithmsCommon
+	StdAlgorithmsMinMaxElementOps
+	)
+      list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
+    endforeach()
 
-SET(ALGORITHM UnitTestMain.cpp)
+    # ------------------------------------------
+    # std set C
+    # ------------------------------------------
+    set(STDALGO_SOURCES_C)
+    foreach(Name
+	StdAlgorithmsCommon
+	StdAlgorithmsLexicographicalCompare
+	StdAlgorithmsForEach
+	StdAlgorithmsFind
+	StdAlgorithmsFindFirstOf
+	StdAlgorithmsFindEnd
+	StdAlgorithmsCount
+	StdAlgorithmsEqual
+	StdAlgorithmsAllAnyNoneOf
+	StdAlgorithmsAdjacentFind
+	StdAlgorithmsSearch
+	StdAlgorithmsSearch_n
+	StdAlgorithmsMismatch
+	)
+      list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
+    endforeach()
 
-IF(Kokkos_ENABLE_OPENMP)
-  LIST(APPEND ALGORITHM_SOURCES
-    TestOpenMP_Sort1D.cpp
-    TestOpenMP_Sort3D.cpp
-    TestOpenMP_SortDynamicView.cpp
-  )
-ENDIF()
+    # ------------------------------------------
+    # std set D
+    # ------------------------------------------
+    set(STDALGO_SOURCES_D)
+    foreach(Name
+	StdAlgorithmsCommon
+	StdAlgorithmsModOps
+	StdAlgorithmsModSeqOps
+	StdAlgorithmsReplace
+	StdAlgorithmsReplaceIf
+	StdAlgorithmsReplaceCopy
+	StdAlgorithmsReplaceCopyIf
+	StdAlgorithmsCopyIf
+	StdAlgorithmsUnique
+	StdAlgorithmsUniqueCopy
+	StdAlgorithmsRemove
+	StdAlgorithmsRemoveIf
+	StdAlgorithmsRemoveCopy
+	StdAlgorithmsRemoveCopyIf
+	StdAlgorithmsRotate
+	StdAlgorithmsRotateCopy
+	StdAlgorithmsReverse
+	StdAlgorithmsShiftLeft
+	StdAlgorithmsShiftRight
+	)
+      list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
+    endforeach()
 
-foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
-  # Because there is always an exception to the rule
-  if(Tag STREQUAL "Threads")
-    set(DEVICE "PTHREAD")
-  else()
-    string(TOUPPER ${Tag} DEVICE)
-  endif()
+    # ------------------------------------------
+    # std set E
+    # ------------------------------------------
+    set(STDALGO_SOURCES_E)
+    foreach(Name
+	StdAlgorithmsCommon
+	StdAlgorithmsIsSorted
+	StdAlgorithmsIsSortedUntil
+	StdAlgorithmsPartitioningOps
+	StdAlgorithmsPartitionCopy
+	StdAlgorithmsNumerics
+	StdAlgorithmsAdjacentDifference
+	StdAlgorithmsExclusiveScan
+	StdAlgorithmsInclusiveScan
+	StdAlgorithmsTransformUnaryOp
+	StdAlgorithmsTransformExclusiveScan
+	StdAlgorithmsTransformInclusiveScan
+	)
+      list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
+    endforeach()
 
-  if(Kokkos_ENABLE_${DEVICE})
-    set(dir ${CMAKE_CURRENT_BINARY_DIR})
-    set(file ${dir}/Test${Tag}.cpp)
-      # Write to a temporary intermediate file and call configure_file to avoid
-      # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs.
-      file(WRITE ${dir}/dummy.cpp
-          "#include <Test${Tag}_Category.hpp>\n"
-	  "#include <TestRandomCommon.hpp>\n"
-	  "#include <TestSortCommon.hpp>\n"
-      )
-      configure_file(${dir}/dummy.cpp ${file})
-      list(APPEND ALGORITHM_SOURCES ${file})
   endif()
 endforeach()
 
 KOKKOS_ADD_EXECUTABLE_AND_TEST(
-  UnitTest
+  UnitTest_RandomAndSort
   SOURCES
     UnitTestMain.cpp
-    ${ALGORITHM_SOURCES}
+    ${SOURCES_A}
+)
+
+foreach(ID A;B;C;D;E)
+  KOKKOS_ADD_EXECUTABLE_AND_TEST(
+    UnitTest_StdSet_${ID}
+    SOURCES
+    UnitTestMain.cpp
+    ${STDALGO_SOURCES_${ID}}
+    )
+endforeach()
+
+KOKKOS_ADD_EXECUTABLE(
+  UnitTest_StdAlgoCompileOnly
+  SOURCES TestStdAlgorithmsCompileOnly.cpp
 )
diff --git a/packages/kokkos/algorithms/unit_tests/Makefile b/packages/kokkos/algorithms/unit_tests/Makefile
index dd0aa87de0b2c76fe76d03f8ea77092833dd9f63..e961e7ba2c18f615a41168e044f9ad7f42d528a6 100644
--- a/packages/kokkos/algorithms/unit_tests/Makefile
+++ b/packages/kokkos/algorithms/unit_tests/Makefile
@@ -45,7 +45,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
 	TEST_TARGETS += test-hip
 endif
 
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
 	OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
 	TARGETS += KokkosAlgorithms_UnitTest_Threads
 	TEST_TARGETS += test-threads
diff --git a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp
index 3dffce7df4f8dd1663a10a871075c1841005138d..464c86a7b62f34fa9e1c4671e3d73d4e0cf24a0d 100644
--- a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp
+++ b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp
@@ -47,6 +47,7 @@
 #include <iostream>
 #include <cstdlib>
 #include <cstdio>
+#include <Kokkos_DynRankView.hpp>
 #include <Kokkos_Timer.hpp>
 #include <Kokkos_Core.hpp>
 #include <Kokkos_Random.hpp>
@@ -327,10 +328,6 @@ template <class RandomGenerator, class Scalar>
 struct test_random_scalar {
   using rnd_type = typename RandomGenerator::generator_type;
 
-  int pass_mean, pass_var, pass_covar;
-  int pass_hist1d_mean, pass_hist1d_var, pass_hist1d_covar;
-  int pass_hist3d_mean, pass_hist3d_var, pass_hist3d_covar;
-
   test_random_scalar(
       typename test_random_functor<RandomGenerator, int>::type_1d& density_1d,
       typename test_random_functor<RandomGenerator, int>::type_3d& density_3d,
@@ -357,18 +354,15 @@ struct test_random_scalar {
           variance_expect / (result.variance / num_draws / 3) - 1.0;
       double covariance_eps =
           result.covariance / num_draws / 2 / variance_expect;
-      pass_mean = ((-tolerance < mean_eps) && (tolerance > mean_eps)) ? 1 : 0;
-      pass_var  = ((-1.5 * tolerance < variance_eps) &&
-                  (1.5 * tolerance > variance_eps))
-                     ? 1
-                     : 0;
-      pass_covar = ((-2.0 * tolerance < covariance_eps) &&
-                    (2.0 * tolerance > covariance_eps))
-                       ? 1
-                       : 0;
-      cout << "Pass: " << pass_mean << " " << pass_var << " " << mean_eps << " "
-           << variance_eps << " " << covariance_eps << " || " << tolerance
-           << endl;
+#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
+      if (!std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value) {
+#endif
+        EXPECT_LT(std::abs(mean_eps), tolerance);
+        EXPECT_LT(std::abs(variance_eps), 1.5 * tolerance);
+        EXPECT_LT(std::abs(covariance_eps), 2.0 * tolerance);
+#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
+      }
+#endif
     }
     {
       cout << " -- Testing 1-D histogram" << endl;
@@ -399,17 +393,15 @@ struct test_random_scalar {
       }
 #endif
 
-      pass_hist1d_mean =
-          ((-mean_eps_expect < mean_eps) && (mean_eps_expect > mean_eps)) ? 1
-                                                                          : 0;
-      pass_hist1d_var = ((-variance_eps_expect < variance_eps) &&
-                         (variance_eps_expect > variance_eps))
-                            ? 1
-                            : 0;
-      pass_hist1d_covar = ((-covariance_eps_expect < covariance_eps) &&
-                           (covariance_eps_expect > covariance_eps))
-                              ? 1
-                              : 0;
+#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
+      if (!std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value) {
+#endif
+        EXPECT_LT(std::abs(mean_eps), mean_eps_expect);
+        EXPECT_LT(std::abs(variance_eps), variance_eps_expect);
+        EXPECT_LT(std::abs(covariance_eps), covariance_eps_expect);
+#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
+      }
+#endif
 
       cout << "Density 1D: " << mean_eps << " " << variance_eps << " "
            << (result.covariance / HIST_DIM1D / HIST_DIM1D) << " || "
@@ -445,16 +437,15 @@ struct test_random_scalar {
       }
 #endif
 
-      pass_hist3d_mean =
-          ((-tolerance < mean_eps) && (tolerance > mean_eps)) ? 1 : 0;
-      pass_hist3d_var = ((-variance_factor * tolerance < variance_eps) &&
-                         (variance_factor * tolerance > variance_eps))
-                            ? 1
-                            : 0;
-      pass_hist3d_covar = ((-variance_factor * tolerance < covariance_eps) &&
-                           (variance_factor * tolerance > covariance_eps))
-                              ? 1
-                              : 0;
+#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
+      if (!std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value) {
+#endif
+        EXPECT_LT(std::abs(mean_eps), tolerance);
+        EXPECT_LT(std::abs(variance_eps), variance_factor);
+        EXPECT_LT(std::abs(covariance_eps), variance_factor);
+#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
+      }
+#endif
 
       cout << "Density 3D: " << mean_eps << " " << variance_eps << " "
            << result.covariance / HIST_DIM1D / HIST_DIM1D << " || " << tolerance
@@ -479,106 +470,79 @@ void test_random(unsigned int num_draws) {
   cout << "Test Scalar=int" << endl;
   test_random_scalar<RandomGenerator, int> test_int(density_1d, density_3d,
                                                     pool, num_draws);
-  ASSERT_EQ(test_int.pass_mean, 1);
-  ASSERT_EQ(test_int.pass_var, 1);
-  ASSERT_EQ(test_int.pass_covar, 1);
-  ASSERT_EQ(test_int.pass_hist1d_mean, 1);
-  ASSERT_EQ(test_int.pass_hist1d_var, 1);
-  ASSERT_EQ(test_int.pass_hist1d_covar, 1);
-  ASSERT_EQ(test_int.pass_hist3d_mean, 1);
-  ASSERT_EQ(test_int.pass_hist3d_var, 1);
-  ASSERT_EQ(test_int.pass_hist3d_covar, 1);
   deep_copy(density_1d, 0);
   deep_copy(density_3d, 0);
 
   cout << "Test Scalar=unsigned int" << endl;
   test_random_scalar<RandomGenerator, unsigned int> test_uint(
       density_1d, density_3d, pool, num_draws);
-  ASSERT_EQ(test_uint.pass_mean, 1);
-  ASSERT_EQ(test_uint.pass_var, 1);
-  ASSERT_EQ(test_uint.pass_covar, 1);
-  ASSERT_EQ(test_uint.pass_hist1d_mean, 1);
-  ASSERT_EQ(test_uint.pass_hist1d_var, 1);
-  ASSERT_EQ(test_uint.pass_hist1d_covar, 1);
-  ASSERT_EQ(test_uint.pass_hist3d_mean, 1);
-  ASSERT_EQ(test_uint.pass_hist3d_var, 1);
-  ASSERT_EQ(test_uint.pass_hist3d_covar, 1);
   deep_copy(density_1d, 0);
   deep_copy(density_3d, 0);
 
   cout << "Test Scalar=int64_t" << endl;
   test_random_scalar<RandomGenerator, int64_t> test_int64(
       density_1d, density_3d, pool, num_draws);
-  ASSERT_EQ(test_int64.pass_mean, 1);
-  ASSERT_EQ(test_int64.pass_var, 1);
-  ASSERT_EQ(test_int64.pass_covar, 1);
-  ASSERT_EQ(test_int64.pass_hist1d_mean, 1);
-  ASSERT_EQ(test_int64.pass_hist1d_var, 1);
-  ASSERT_EQ(test_int64.pass_hist1d_covar, 1);
-  ASSERT_EQ(test_int64.pass_hist3d_mean, 1);
-  ASSERT_EQ(test_int64.pass_hist3d_var, 1);
-  ASSERT_EQ(test_int64.pass_hist3d_covar, 1);
   deep_copy(density_1d, 0);
   deep_copy(density_3d, 0);
 
   cout << "Test Scalar=uint64_t" << endl;
   test_random_scalar<RandomGenerator, uint64_t> test_uint64(
       density_1d, density_3d, pool, num_draws);
-  ASSERT_EQ(test_uint64.pass_mean, 1);
-  ASSERT_EQ(test_uint64.pass_var, 1);
-  ASSERT_EQ(test_uint64.pass_covar, 1);
-  ASSERT_EQ(test_uint64.pass_hist1d_mean, 1);
-  ASSERT_EQ(test_uint64.pass_hist1d_var, 1);
-  ASSERT_EQ(test_uint64.pass_hist1d_covar, 1);
-  ASSERT_EQ(test_uint64.pass_hist3d_mean, 1);
-  ASSERT_EQ(test_uint64.pass_hist3d_var, 1);
-  ASSERT_EQ(test_uint64.pass_hist3d_covar, 1);
   deep_copy(density_1d, 0);
   deep_copy(density_3d, 0);
 
   cout << "Test Scalar=half" << endl;
   test_random_scalar<RandomGenerator, Kokkos::Experimental::half_t> test_half(
       density_1d, density_3d, pool, num_draws);
-  ASSERT_EQ(test_half.pass_mean, 1);
-  ASSERT_EQ(test_half.pass_var, 1);
-  ASSERT_EQ(test_half.pass_covar, 1);
-  ASSERT_EQ(test_half.pass_hist1d_mean, 1);
-  ASSERT_EQ(test_half.pass_hist1d_var, 1);
-  ASSERT_EQ(test_half.pass_hist1d_covar, 1);
-  ASSERT_EQ(test_half.pass_hist3d_mean, 1);
-  ASSERT_EQ(test_half.pass_hist3d_var, 1);
-  ASSERT_EQ(test_half.pass_hist3d_covar, 1);
+  deep_copy(density_1d, 0);
+  deep_copy(density_3d, 0);
+
+  cout << "Test Scalar=bhalf" << endl;
+  test_random_scalar<RandomGenerator, Kokkos::Experimental::bhalf_t> test_bhalf(
+      density_1d, density_3d, pool, num_draws);
   deep_copy(density_1d, 0);
   deep_copy(density_3d, 0);
 
   cout << "Test Scalar=float" << endl;
   test_random_scalar<RandomGenerator, float> test_float(density_1d, density_3d,
                                                         pool, num_draws);
-  ASSERT_EQ(test_float.pass_mean, 1);
-  ASSERT_EQ(test_float.pass_var, 1);
-  ASSERT_EQ(test_float.pass_covar, 1);
-  ASSERT_EQ(test_float.pass_hist1d_mean, 1);
-  ASSERT_EQ(test_float.pass_hist1d_var, 1);
-  ASSERT_EQ(test_float.pass_hist1d_covar, 1);
-  ASSERT_EQ(test_float.pass_hist3d_mean, 1);
-  ASSERT_EQ(test_float.pass_hist3d_var, 1);
-  ASSERT_EQ(test_float.pass_hist3d_covar, 1);
   deep_copy(density_1d, 0);
   deep_copy(density_3d, 0);
 
   cout << "Test Scalar=double" << endl;
   test_random_scalar<RandomGenerator, double> test_double(
       density_1d, density_3d, pool, num_draws);
-  ASSERT_EQ(test_double.pass_mean, 1);
-  ASSERT_EQ(test_double.pass_var, 1);
-  ASSERT_EQ(test_double.pass_covar, 1);
-  ASSERT_EQ(test_double.pass_hist1d_mean, 1);
-  ASSERT_EQ(test_double.pass_hist1d_var, 1);
-  ASSERT_EQ(test_double.pass_hist1d_covar, 1);
-  ASSERT_EQ(test_double.pass_hist3d_mean, 1);
-  ASSERT_EQ(test_double.pass_hist3d_var, 1);
-  ASSERT_EQ(test_double.pass_hist3d_covar, 1);
 }
+
+template <class ExecutionSpace, class Pool>
+struct TestDynRankView {
+  using ReducerType      = Kokkos::MinMax<double, Kokkos::HostSpace>;
+  using ReducerValueType = typename ReducerType::value_type;
+
+  Kokkos::DynRankView<double, ExecutionSpace> A;
+
+  TestDynRankView(int n) : A("a", n) {}
+
+  KOKKOS_FUNCTION void operator()(int i, ReducerValueType& update) const {
+    if (A(i) < update.min_val) update.min_val = A(i);
+    if (A(i) > update.max_val) update.max_val = A(i);
+  }
+
+  void run() {
+    Pool random(13);
+    double min = 10.;
+    double max = 100.;
+    Kokkos::fill_random(A, random, min, max);
+
+    ReducerValueType val;
+    Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(0, A.size()),
+                            *this, ReducerType(val));
+
+    Kokkos::fence();
+    ASSERT_GE(val.min_val, min);
+    ASSERT_LE(val.max_val, max);
+  }
+};
 }  // namespace Impl
 
 template <typename ExecutionSpace>
@@ -593,6 +557,9 @@ void test_random_xorshift64() {
   Impl::test_random<Kokkos::Random_XorShift64_Pool<
       Kokkos::Device<ExecutionSpace, typename ExecutionSpace::memory_space>>>(
       num_draws);
+  Impl::TestDynRankView<ExecutionSpace,
+                        Kokkos::Random_XorShift64_Pool<ExecutionSpace>>(10000)
+      .run();
 }
 
 template <typename ExecutionSpace>
@@ -608,6 +575,9 @@ void test_random_xorshift1024() {
   Impl::test_random<Kokkos::Random_XorShift1024_Pool<
       Kokkos::Device<ExecutionSpace, typename ExecutionSpace::memory_space>>>(
       num_draws);
+  Impl::TestDynRankView<ExecutionSpace,
+                        Kokkos::Random_XorShift1024_Pool<ExecutionSpace>>(10000)
+      .run();
 }
 }  // namespace Test
 
diff --git a/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp b/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..23e8fec7d5e463ba737d21e8a19a1c2c331bdfaa
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp
@@ -0,0 +1,252 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_RandomAccessIterator.hpp>
+#include <std_algorithms/Kokkos_Distance.hpp>
+
+namespace KE = Kokkos::Experimental;
+
+namespace Test {
+namespace stdalgos {
+
+struct random_access_iterator_test : std_algorithms_test {
+ public:
+  virtual void SetUp() {
+    Kokkos::parallel_for(m_static_view.extent(0),
+                         AssignIndexFunctor<static_view_t>(m_static_view));
+
+    Kokkos::parallel_for(m_static_view.extent(0),
+                         AssignIndexFunctor<dyn_view_t>(m_dynamic_view));
+
+    Kokkos::parallel_for(m_static_view.extent(0),
+                         AssignIndexFunctor<strided_view_t>(m_strided_view));
+  }
+};
+
+TEST_F(random_access_iterator_test, constructor) {
+  // just tests that constructor works
+  auto it1 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view);
+  auto it2 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view);
+  auto it3 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view);
+  auto it4 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
+  auto it5 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3);
+  auto it6 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3);
+  EXPECT_TRUE(true);
+}
+
+template <class IteratorType, class ValueType>
+void test_random_access_it_verify(IteratorType it, ValueType gold_value) {
+  using view_t = Kokkos::View<typename IteratorType::value_type>;
+  view_t checkView("checkView");
+  CopyFromIteratorFunctor<IteratorType, view_t> cf(it, checkView);
+  Kokkos::parallel_for("_std_algo_copy", 1, cf);
+  auto v_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkView);
+  EXPECT_EQ(v_h(), gold_value);
+}
+
+TEST_F(random_access_iterator_test, dereference) {
+  auto it1 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view);
+  auto it2 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view);
+  auto it3 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view);
+  test_random_access_it_verify(it1, (value_type)0);
+  test_random_access_it_verify(it2, (value_type)0);
+  test_random_access_it_verify(it3, (value_type)0);
+
+  auto it4 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
+  auto it5 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 4);
+  auto it6 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 5);
+  test_random_access_it_verify(it4, (value_type)3);
+  test_random_access_it_verify(it5, (value_type)4);
+  test_random_access_it_verify(it6, (value_type)5);
+}
+
+template <class ItTypeFrom, class ViewTypeTo>
+struct CopyFromIteratorUsingSubscriptFunctor {
+  ItTypeFrom m_itFrom;
+  ViewTypeTo m_viewTo;
+
+  CopyFromIteratorUsingSubscriptFunctor(const ItTypeFrom itFromIn,
+                                        const ViewTypeTo viewToIn)
+      : m_itFrom(itFromIn), m_viewTo(viewToIn) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int i) const { m_viewTo(i) = m_itFrom[i]; }
+};
+
+template <class IteratorType>
+void test_random_access_it_subscript_op_verify(IteratorType it) {
+  using value_t = typename IteratorType::value_type;
+  using view_t  = Kokkos::View<value_t*>;
+  view_t checkView("checkView", 3);
+  CopyFromIteratorUsingSubscriptFunctor<IteratorType, view_t> cf(it, checkView);
+  Kokkos::parallel_for("_std_algo_copy", 3, cf);
+
+  auto v_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkView);
+  EXPECT_EQ(v_h(0), (value_t)0);
+  EXPECT_EQ(v_h(1), (value_t)1);
+  EXPECT_EQ(v_h(2), (value_t)2);
+}
+
+TEST_F(random_access_iterator_test, subscript_operator) {
+  auto it1 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view);
+  auto it2 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view);
+  auto it3 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view);
+  test_random_access_it_subscript_op_verify(it1);
+  test_random_access_it_subscript_op_verify(it2);
+  test_random_access_it_subscript_op_verify(it3);
+}
+
+TEST_F(random_access_iterator_test, operatorsSet1) {
+  auto it1 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
+  auto it2 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3);
+  auto it3 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3);
+  ++it1;
+  ++it2;
+  ++it3;
+  test_random_access_it_verify(it1, (value_type)4);
+  test_random_access_it_verify(it2, (value_type)4);
+  test_random_access_it_verify(it3, (value_type)4);
+
+  --it1;
+  --it2;
+  --it3;
+  test_random_access_it_verify(it1, (value_type)3);
+  test_random_access_it_verify(it2, (value_type)3);
+  test_random_access_it_verify(it3, (value_type)3);
+}
+
+TEST_F(random_access_iterator_test, operatorsSet2) {
+  auto it1  = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
+  auto it2  = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3);
+  auto it3  = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3);
+  auto it11 = it1 + 3;
+  auto it21 = it2 + 3;
+  auto it31 = it3 + 3;
+  test_random_access_it_verify(it11, (value_type)6);
+  test_random_access_it_verify(it21, (value_type)6);
+  test_random_access_it_verify(it31, (value_type)6);
+
+  auto it12 = it11 - 4;
+  auto it22 = it21 - 4;
+  auto it32 = it31 - 4;
+  test_random_access_it_verify(it12, (value_type)2);
+  test_random_access_it_verify(it22, (value_type)2);
+  test_random_access_it_verify(it32, (value_type)2);
+}
+
+TEST_F(random_access_iterator_test, operatorsSet3) {
+  auto it1 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
+  auto it2 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3);
+  auto it3 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3);
+  it1 += 3;
+  it2 += 3;
+  it3 += 3;
+  test_random_access_it_verify(it1, (value_type)6);
+  test_random_access_it_verify(it2, (value_type)6);
+  test_random_access_it_verify(it3, (value_type)6);
+
+  it1 -= 4;
+  it2 -= 4;
+  it3 -= 4;
+  test_random_access_it_verify(it1, (value_type)2);
+  test_random_access_it_verify(it2, (value_type)2);
+  test_random_access_it_verify(it3, (value_type)2);
+}
+
+TEST_F(random_access_iterator_test, operatorsSet4) {
+  auto it1 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
+  auto it2 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3);
+  auto it3 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3);
+
+  auto it4 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 4);
+  auto it5 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 4);
+  auto it6 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 4);
+  EXPECT_TRUE(it1 != it4);
+  EXPECT_TRUE(it2 != it5);
+  EXPECT_TRUE(it3 != it6);
+  EXPECT_TRUE(it1 < it4);
+  EXPECT_TRUE(it2 < it5);
+  EXPECT_TRUE(it3 < it6);
+  EXPECT_TRUE(it1 <= it4);
+  EXPECT_TRUE(it2 <= it5);
+  EXPECT_TRUE(it3 <= it6);
+
+  auto it7 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
+  auto it8 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3);
+  auto it9 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3);
+  EXPECT_TRUE(it1 == it7);
+  EXPECT_TRUE(it2 == it8);
+  EXPECT_TRUE(it3 == it9);
+  EXPECT_TRUE(it1 >= it7);
+  EXPECT_TRUE(it2 >= it8);
+  EXPECT_TRUE(it3 >= it9);
+  EXPECT_TRUE(it4 > it7);
+  EXPECT_TRUE(it5 > it8);
+  EXPECT_TRUE(it6 > it9);
+}
+
+TEST_F(random_access_iterator_test, assignment_operator) {
+  auto it1 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
+  auto it2 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 5);
+  EXPECT_NE(it1, it2);
+
+  it2 = it1;
+  EXPECT_EQ(it1, it2);
+}
+
+TEST_F(random_access_iterator_test, distance) {
+  auto first = KE::begin(m_dynamic_view);
+  auto last  = KE::end(m_dynamic_view);
+
+  EXPECT_EQ(0, KE::distance(first, first));
+  EXPECT_EQ(1, KE::distance(first, first + 1));
+  EXPECT_EQ(m_dynamic_view.extent(0), size_t(KE::distance(first, last)));
+}
+
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestSort.hpp b/packages/kokkos/algorithms/unit_tests/TestSort.hpp
index 9c6308c84347e2229ad083805db3d05918baa4f8..a03847f2b26d84946e389aabbd496abaf8ec2d5c 100644
--- a/packages/kokkos/algorithms/unit_tests/TestSort.hpp
+++ b/packages/kokkos/algorithms/unit_tests/TestSort.hpp
@@ -135,8 +135,9 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
   KeyViewType keys("Keys", n);
 
   // Test sorting array with all numbers equal
-  Kokkos::deep_copy(keys, KeyType(1));
-  Kokkos::sort(keys, force_kokkos);
+  ExecutionSpace exec;
+  Kokkos::deep_copy(exec, keys, KeyType(1));
+  Kokkos::sort(exec, keys, force_kokkos);
 
   Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
   Kokkos::fill_random(keys, g,
@@ -147,21 +148,24 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
   double sum_after        = 0.0;
   unsigned int sort_fails = 0;
 
-  Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys), sum_before);
+  Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
+                          sum<ExecutionSpace, KeyType>(keys), sum_before);
 
-  Kokkos::sort(keys, force_kokkos);
+  Kokkos::sort(exec, keys, force_kokkos);
 
-  Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys), sum_after);
-  Kokkos::parallel_reduce(
-      n - 1, is_sorted_struct<ExecutionSpace, KeyType>(keys), sort_fails);
+  Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
+                          sum<ExecutionSpace, KeyType>(keys), sum_after);
+  Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n - 1),
+                          is_sorted_struct<ExecutionSpace, KeyType>(keys),
+                          sort_fails);
 
   double ratio   = sum_before / sum_after;
   double epsilon = 1e-10;
   unsigned int equal_sum =
       (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0;
 
-  ASSERT_EQ(sort_fails, 0);
-  ASSERT_EQ(equal_sum, 1);
+  ASSERT_EQ(sort_fails, 0u);
+  ASSERT_EQ(equal_sum, 1u);
 }
 
 template <class ExecutionSpace, typename KeyType>
@@ -177,8 +181,10 @@ void test_3D_sort_impl(unsigned int n) {
   double sum_after        = 0.0;
   unsigned int sort_fails = 0;
 
-  Kokkos::parallel_reduce(keys.extent(0), sum3D<ExecutionSpace, KeyType>(keys),
-                          sum_before);
+  ExecutionSpace exec;
+  Kokkos::parallel_reduce(
+      Kokkos::RangePolicy<ExecutionSpace>(exec, 0, keys.extent(0)),
+      sum3D<ExecutionSpace, KeyType>(keys), sum_before);
 
   int bin_1d = 1;
   while (bin_1d * bin_1d * bin_1d * 4 < (int)keys.extent(0)) bin_1d *= 2;
@@ -189,15 +195,17 @@ void test_3D_sort_impl(unsigned int n) {
   using BinOp = Kokkos::BinOp3D<KeyViewType>;
   BinOp bin_op(bin_max, min, max);
   Kokkos::BinSort<KeyViewType, BinOp> Sorter(keys, bin_op, false);
-  Sorter.create_permute_vector();
-  Sorter.template sort<KeyViewType>(keys);
-
-  Kokkos::parallel_reduce(keys.extent(0), sum3D<ExecutionSpace, KeyType>(keys),
-                          sum_after);
-  Kokkos::parallel_reduce(keys.extent(0) - 1,
-                          bin3d_is_sorted_struct<ExecutionSpace, KeyType>(
-                              keys, bin_1d, min[0], max[0]),
-                          sort_fails);
+  Sorter.create_permute_vector(exec);
+  Sorter.sort(exec, keys);
+
+  Kokkos::parallel_reduce(
+      Kokkos::RangePolicy<ExecutionSpace>(exec, 0, keys.extent(0)),
+      sum3D<ExecutionSpace, KeyType>(keys), sum_after);
+  Kokkos::parallel_reduce(
+      Kokkos::RangePolicy<ExecutionSpace>(exec, 0, keys.extent(0) - 1),
+      bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys, bin_1d, min[0],
+                                                      max[0]),
+      sort_fails);
 
   double ratio   = sum_before / sum_after;
   double epsilon = 1e-10;
@@ -207,8 +215,8 @@ void test_3D_sort_impl(unsigned int n) {
   if (sort_fails)
     printf("3D Sort Sum: %f %f Fails: %u\n", sum_before, sum_after, sort_fails);
 
-  ASSERT_EQ(sort_fails, 0);
-  ASSERT_EQ(equal_sum, 1);
+  ASSERT_EQ(sort_fails, 0u);
+  ASSERT_EQ(equal_sum, 1u);
 }
 
 //----------------------------------------------------------------------------
@@ -229,36 +237,36 @@ void test_dynamic_view_sort_impl(unsigned int n) {
   KeyViewType keys_view("KeysTmp", n);
 
   // Test sorting array with all numbers equal
-  Kokkos::deep_copy(keys_view, KeyType(1));
+  ExecutionSpace exec;
+  Kokkos::deep_copy(exec, keys_view, KeyType(1));
   Kokkos::deep_copy(keys, keys_view);
-  Kokkos::sort(keys, 0 /* begin */, n /* end */);
+  Kokkos::sort(exec, keys, 0 /* begin */, n /* end */);
 
   Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
   Kokkos::fill_random(keys_view, g,
                       Kokkos::Random_XorShift64_Pool<
                           ExecutionSpace>::generator_type::MAX_URAND);
 
-  ExecutionSpace().fence();
+  exec.fence();
   Kokkos::deep_copy(keys, keys_view);
-  // ExecutionSpace().fence();
 
   double sum_before       = 0.0;
   double sum_after        = 0.0;
   unsigned int sort_fails = 0;
 
-  Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys_view),
-                          sum_before);
+  Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
+                          sum<ExecutionSpace, KeyType>(keys_view), sum_before);
 
-  Kokkos::sort(keys, 0 /* begin */, n /* end */);
+  Kokkos::sort(exec, keys, 0 /* begin */, n /* end */);
 
-  ExecutionSpace().fence();  // Need this fence to prevent BusError with Cuda
+  exec.fence();  // Need this fence to prevent BusError with Cuda
   Kokkos::deep_copy(keys_view, keys);
-  // ExecutionSpace().fence();
 
-  Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys_view),
-                          sum_after);
-  Kokkos::parallel_reduce(
-      n - 1, is_sorted_struct<ExecutionSpace, KeyType>(keys_view), sort_fails);
+  Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
+                          sum<ExecutionSpace, KeyType>(keys_view), sum_after);
+  Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n - 1),
+                          is_sorted_struct<ExecutionSpace, KeyType>(keys_view),
+                          sort_fails);
 
   double ratio   = sum_before / sum_after;
   double epsilon = 1e-10;
@@ -271,8 +279,8 @@ void test_dynamic_view_sort_impl(unsigned int n) {
               << std::endl;
   }
 
-  ASSERT_EQ(sort_fails, 0);
-  ASSERT_EQ(equal_sum, 1);
+  ASSERT_EQ(sort_fails, 0u);
+  ASSERT_EQ(equal_sum, 1u);
 }
 
 //----------------------------------------------------------------------------
@@ -301,9 +309,10 @@ void test_issue_1160_impl() {
   for (int i = 0; i < 10; ++i) {
     h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i));
   }
-  Kokkos::deep_copy(element_, h_element);
-  Kokkos::deep_copy(x_, h_x);
-  Kokkos::deep_copy(v_, h_v);
+  ExecutionSpace exec;
+  Kokkos::deep_copy(exec, element_, h_element);
+  Kokkos::deep_copy(exec, x_, h_x);
+  Kokkos::deep_copy(exec, v_, h_v);
 
   using KeyViewType = decltype(element_);
   using BinOp       = Kokkos::BinOp1D<KeyViewType>;
@@ -316,15 +325,16 @@ void test_issue_1160_impl() {
 
   Kokkos::BinSort<KeyViewType, BinOp> Sorter(element_, begin, end, binner,
                                              false);
-  Sorter.create_permute_vector();
-  Sorter.sort(element_, begin, end);
+  Sorter.create_permute_vector(exec);
+  Sorter.sort(exec, element_, begin, end);
 
-  Sorter.sort(x_, begin, end);
-  Sorter.sort(v_, begin, end);
+  Sorter.sort(exec, x_, begin, end);
+  Sorter.sort(exec, v_, begin, end);
 
-  Kokkos::deep_copy(h_element, element_);
-  Kokkos::deep_copy(h_x, x_);
-  Kokkos::deep_copy(h_v, v_);
+  Kokkos::deep_copy(exec, h_element, element_);
+  Kokkos::deep_copy(exec, h_x, x_);
+  Kokkos::deep_copy(exec, h_v, v_);
+  exec.fence();
 
   ASSERT_EQ(h_element(0), 9);
   ASSERT_EQ(h_element(1), 8);
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4036112b4976c089bf6affae9a72106bdd7ab92c
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp
@@ -0,0 +1,293 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_Numeric.hpp>
+#include <utility>
+#include <numeric>
+
+namespace Test {
+namespace stdalgos {
+namespace AdjacentDifference {
+
+namespace KE = Kokkos::Experimental;
+
+template <class DestViewType>
+void fill_view(DestViewType dest_view, const std::string& name) {
+  // we need to be careful because dest_view might not be deep copyable
+  // for instance strided layout
+
+  using value_type      = typename DestViewType::value_type;
+  const std::size_t ext = dest_view.extent(0);
+  auto aux_view =
+      create_deep_copyable_compatible_view_with_same_extent(dest_view);
+  auto aux_v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element") {
+    aux_v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "two-elements-a") {
+    aux_v_h(0) = static_cast<value_type>(1);
+    aux_v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    aux_v_h(0) = static_cast<value_type>(2);
+    aux_v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      aux_v_h(i) = static_cast<value_type>(i) * 2;
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      aux_v_h(i) = static_cast<value_type>(i) * 3;
+    }
+    aux_v_h(5) = static_cast<value_type>(-15);
+  }
+
+  else if (name == "medium-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      aux_v_h(i) = static_cast<value_type>(i) * 2;
+    }
+  }
+
+  else if (name == "medium-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      aux_v_h(i) = static_cast<value_type>(i) * 2;
+    }
+    aux_v_h(4) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "large-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      aux_v_h(i) = static_cast<value_type>(-100) + static_cast<value_type>(i);
+    }
+  }
+
+  else if (name == "large-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      aux_v_h(i) = static_cast<value_type>(-100) + static_cast<value_type>(i);
+    }
+    aux_v_h(156) = static_cast<value_type>(-250);
+
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, aux_v_h);
+  CopyFunctor<decltype(aux_view), DestViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class TestViewType, class... Args>
+auto compute_gold(TestViewType test_view, const std::string& name,
+                  Args... args /* copy on purpose */) {
+  // we need to be careful because test_view might not be deep copyable
+  // for instance strided layout
+
+  const std::size_t ext = test_view.extent(0);
+
+  // create a deep copyable clone of test_view
+  auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
+  auto test_view_dc_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
+
+  // create gold deep copyable view
+  auto gold_view =
+      create_deep_copyable_compatible_view_with_same_extent(test_view);
+  auto gold_view_h = create_mirror_view(Kokkos::HostSpace(), gold_view);
+
+  // compute gold solution on host and deep copy to device
+  if (name == "empty") {
+    return gold_view;
+  } else {
+    using value_type = typename TestViewType::value_type;
+    std::vector<value_type> tmp(ext);
+    for (std::size_t i = 0; i < ext; ++i) {
+      tmp[i] = test_view_dc_h(i);
+    }
+    // run adj-diff on tmp directly
+    std::adjacent_difference(tmp.begin(), tmp.end(), tmp.begin(),
+                             std::forward<Args>(args)...);
+
+    // copy from tmp to gold_h
+    for (std::size_t i = 0; i < ext; ++i) {
+      gold_view_h(i) = tmp[i];
+    }
+    // deep_copy to device
+    Kokkos::deep_copy(gold_view, gold_view_h);
+    return gold_view;
+  }
+}
+
+template <class TestViewType, class GoldViewType>
+void verify_data(TestViewType test_view, GoldViewType gold) {
+  // we need to be careful because test_view might not be deep copyable
+  // for instance strided layout
+
+  auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
+  auto test_view_dc_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
+  // gold is deep_copyable for sure
+  const auto gold_h = create_mirror_view_and_copy(Kokkos::HostSpace(), gold);
+
+  for (std::size_t i = 0; i < test_view.extent(0); ++i) {
+    EXPECT_TRUE(gold_h(i) == test_view_dc_h(i));
+  }
+}
+
+template <class ValueType1, class ValueType2 = ValueType1,
+          class RetType = ValueType2>
+struct CustomBinaryOpFunctor {
+  KOKKOS_INLINE_FUNCTION
+  RetType operator()(const ValueType1& a, const ValueType2& b) const {
+    return a * b;
+  }
+};
+
+template <class ValueType1, class ValueType2 = ValueType1,
+          class RetType = ValueType2>
+struct DefaultBinaryOpFunctor {
+  KOKKOS_INLINE_FUNCTION
+  RetType operator()(const ValueType1& a, const ValueType2& b) const {
+    return a - b;
+  }
+};
+
+template <class Tag, class ValueType, class InfoType, class... Args>
+void run_single_scenario(const InfoType& scenario_info,
+                         Args... args /* copy on purpose */) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+
+  auto view_from =
+      create_view<ValueType>(Tag{}, view_ext, "adj_diff_from_view");
+  fill_view(view_from, name);
+
+  const auto gold = compute_gold(view_from, name, args...);
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "adj_diff_dest_view");
+    auto res1 = KE::adjacent_difference(exespace(), KE::cbegin(view_from),
+                                        KE::cend(view_from),
+                                        KE::begin(view_dest), args...);
+    EXPECT_TRUE(res1 == KE::end(view_dest));
+    verify_data(view_dest, gold);
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "adj_diff_dest_view");
+    auto res2 = KE::adjacent_difference(
+        "label", exespace(), KE::cbegin(view_from), KE::cend(view_from),
+        KE::begin(view_dest), args...);
+    EXPECT_TRUE(res2 == KE::end(view_dest));
+    verify_data(view_dest, gold);
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "adj_diff_dest_view");
+    auto res3 =
+        KE::adjacent_difference(exespace(), view_from, view_dest, args...);
+    EXPECT_TRUE(res3 == KE::end(view_dest));
+    verify_data(view_dest, gold);
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "adj_diff_dest_view");
+    auto res4 = KE::adjacent_difference("label", exespace(), view_from,
+                                        view_dest, args...);
+    EXPECT_TRUE(res4 == KE::end(view_dest));
+    verify_data(view_dest, gold);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType, class... Args>
+void run_all_scenarios(Args... args /* copy on purpose */) {
+  // if (0 < sizeof...(args)) {
+  //   std::cout << "adjacent_difference: " << view_tag_to_string(Tag{})
+  //             << ", custom binary op, all overloads \n";
+  // } else {
+  //   std::cout << "adjacent_difference: " << view_tag_to_string(Tag{})
+  //             << ", default binary op, all overloads \n";
+  // }
+
+  for (const auto& it : default_scenarios) {
+    run_single_scenario<Tag, ValueType>(it, args...);
+  }
+}
+
+TEST(std_algorithms_numerics_ops_test, adjecent_difference) {
+  using value_type = double;
+
+  run_all_scenarios<DynamicTag, value_type>();
+  run_all_scenarios<StridedTwoTag, value_type>();
+  run_all_scenarios<StridedThreeTag, value_type>();
+
+  using custom_binary_op = CustomBinaryOpFunctor<value_type>;
+  run_all_scenarios<DynamicTag, value_type>(custom_binary_op{});
+  run_all_scenarios<StridedTwoTag, value_type>(custom_binary_op{});
+  run_all_scenarios<StridedThreeTag, value_type>(custom_binary_op{});
+}
+
+}  // namespace AdjacentDifference
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6433a9cf635c8e0b10ff3da43d045d901df88a4b
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp
@@ -0,0 +1,325 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace AdjacentFind {
+
+namespace KE = Kokkos::Experimental;
+
+// impl is here for std because it is only avail from c++>=17
+template <class InputIterator, class OutputIterator, class BinaryPredicate>
+auto my_unique_copy(InputIterator first, InputIterator last,
+                    OutputIterator result, BinaryPredicate pred) {
+  if (first != last) {
+    typename OutputIterator::value_type t(*first);
+    *result = t;
+    ++result;
+    while (++first != last) {
+      if (!pred(t, *first)) {
+        t       = *first;
+        *result = t;
+        ++result;
+      }
+    }
+  }
+  return result;
+}
+
+template <class InputIterator, class OutputIterator>
+auto my_unique_copy(InputIterator first, InputIterator last,
+                    OutputIterator result) {
+  using value_type = typename OutputIterator::value_type;
+  using func_t     = IsEqualFunctor<value_type>;
+  return my_unique_copy(first, last, result, func_t());
+}
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  // make bounds tight so that it is likely we get
+  // consecutive equal elements
+  UnifDist() : m_dist(2, 8) { m_gen.seed(345823); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  // make bounds tight so that it is likely we get
+  // consecutive equal elements
+  UnifDist() : m_dist(2, 8) { m_gen.seed(345823); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    v_h(0)  = static_cast<value_type>(0);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(2);
+    v_h(3)  = static_cast<value_type>(3);
+    v_h(4)  = static_cast<value_type>(2);
+    v_h(5)  = static_cast<value_type>(5);
+    v_h(6)  = static_cast<value_type>(4);
+    v_h(7)  = static_cast<value_type>(4);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(6);
+    v_h(10) = static_cast<value_type>(6);
+  }
+
+  else if (name == "small-b") {
+    v_h(0)  = static_cast<value_type>(1);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(3);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(4);
+    v_h(7)  = static_cast<value_type>(4);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(6);
+    v_h(10) = static_cast<value_type>(8);
+    v_h(11) = static_cast<value_type>(9);
+    v_h(12) = static_cast<value_type>(8);
+  }
+
+  else if (name == "medium") {
+    // beginning just contains increasing values
+    for (std::size_t i = 0; i < 1000; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+
+    // then use random
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 1000; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else if (name == "large-a") {
+    // put equal elements at the end
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+    v_h(ext - 3) = static_cast<value_type>(44);
+    v_h(ext - 2) = static_cast<value_type>(44);
+    v_h(ext - 1) = static_cast<value_type>(44);
+  }
+
+  else if (name == "large-b") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class IteratorType, class BinaryPredicate>
+IteratorType my_std_adjacent_find(IteratorType first, IteratorType last,
+                                  BinaryPredicate p) {
+  if (first == last) {
+    return last;
+  }
+  IteratorType next = first;
+  ++next;
+  for (; next != last; ++next, ++first) {
+    if (p(*first, *next)) {
+      return first;
+    }
+  }
+  return last;
+}
+
+template <class IteratorType>
+IteratorType my_std_adjacent_find(IteratorType first, IteratorType last) {
+  using value_type = typename IteratorType::value_type;
+  return my_std_adjacent_find(first, last, IsEqualFunctor<value_type>());
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name) {
+  std::cout << "adjacent_find: default predicate: " << name << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << '\n';
+}
+
+template <class Tag, class ValueType, class Predicate>
+void print_scenario_details(const std::string& name, Predicate pred) {
+  (void)pred;
+  std::cout << "adjacent_find: custom  predicate: " << name << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << '\n';
+}
+
+template <class DiffType, class ViewType, class... Args>
+void verify(DiffType my_diff, ViewType view, Args... args) {
+  auto view_dc = create_deep_copyable_compatible_clone(view);
+  auto view_h  = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
+  auto std_r =
+      my_std_adjacent_find(KE::cbegin(view_h), KE::cend(view_h), args...);
+  const auto std_diff = std_r - KE::cbegin(view_h);
+
+  EXPECT_TRUE(my_diff == std_diff);
+}
+
+template <class Tag, class ValueType, class InfoType, class... Args>
+void run_single_scenario(const InfoType& scenario_info, Args... args) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, args...);
+
+  auto view = create_view<ValueType>(Tag{}, view_ext, "adjacent_find_view");
+  fill_view(view, name);
+
+  {
+    auto res_it        = KE::adjacent_find(exespace(), KE::cbegin(view),
+                                    KE::cend(view), args...);
+    const auto my_diff = res_it - KE::cbegin(view);
+    verify(my_diff, view, args...);
+  }
+
+  {
+    auto res_it = KE::adjacent_find("label", exespace(), KE::cbegin(view),
+                                    KE::cend(view), args...);
+    const auto my_diff = res_it - KE::cbegin(view);
+    verify(my_diff, view, args...);
+  }
+
+  {
+    auto res_it        = KE::adjacent_find(exespace(), view, args...);
+    const auto my_diff = res_it - KE::begin(view);
+    verify(my_diff, view, args...);
+  }
+
+  {
+    auto res_it        = KE::adjacent_find("label", exespace(), view, args...);
+    const auto my_diff = res_it - KE::begin(view);
+    verify(my_diff, view, args...);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 11},
+      {"small-b", 13},       {"medium", 21103},     {"large-a", 101513},
+      {"large-b", 100111}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+
+    using func_t = IsEqualFunctor<ValueType>;
+    run_single_scenario<Tag, ValueType>(it, func_t());
+  }
+}
+
+TEST(std_algorithms_nonmod_seq_ops, adjacent_find) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace AdjacentFind
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..65b6000962d273ec710f5b5452f6d81cea5aa02e
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp
@@ -0,0 +1,183 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace AllAnyNoneOf {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void test_all_of(const ViewType view) {
+  using value_t           = typename ViewType::value_type;
+  using view_host_space_t = Kokkos::View<value_t*, Kokkos::HostSpace>;
+  const auto equals_zero  = EqualsValFunctor<value_t>(0);
+
+  view_host_space_t expected("all_of_expected", view.extent(0));
+  compare_views(expected, view);
+
+  // reference result
+  EXPECT_TRUE(std::all_of(KE::begin(expected), KE::end(expected), equals_zero));
+
+  // pass iterators
+  EXPECT_TRUE(
+      KE::all_of(exespace(), KE::begin(view), KE::end(view), equals_zero));
+  // pass view
+  EXPECT_TRUE(KE::all_of(exespace(), view, equals_zero));
+
+  fill_views_inc(view, expected);
+
+  if (view.extent(0) > 1) {
+    // reference result
+    EXPECT_FALSE(
+        std::all_of(KE::begin(expected), KE::end(expected), equals_zero));
+
+    // pass const iterators
+    EXPECT_FALSE(
+        KE::all_of(exespace(), KE::cbegin(view), KE::cend(view), equals_zero));
+    // pass view
+    EXPECT_FALSE(KE::all_of("label", exespace(), view, equals_zero));
+  }
+}
+
+template <class ViewType>
+void test_any_of(const ViewType view) {
+  using value_t              = typename ViewType::value_type;
+  using view_host_space_t    = Kokkos::View<value_t*, Kokkos::HostSpace>;
+  const auto not_equals_zero = NotEqualsZeroFunctor<value_t>();
+
+  view_host_space_t expected("any_of_expected", view.extent(0));
+  compare_views(expected, view);
+
+  // reference result
+  EXPECT_FALSE(
+      std::any_of(KE::begin(expected), KE::end(expected), not_equals_zero));
+
+  // pass iterators
+  EXPECT_FALSE(
+      KE::any_of(exespace(), KE::begin(view), KE::end(view), not_equals_zero));
+  // pass view
+  EXPECT_FALSE(KE::any_of(exespace(), view, not_equals_zero));
+
+  fill_views_inc(view, expected);
+
+  if (view.extent(0) > 1) {
+    // reference result
+    EXPECT_TRUE(
+        std::any_of(KE::begin(expected), KE::end(expected), not_equals_zero));
+
+    // pass const iterators
+    EXPECT_TRUE(KE::any_of(exespace(), KE::cbegin(view), KE::cend(view),
+                           not_equals_zero));
+    // pass view
+    EXPECT_TRUE(KE::any_of("label", exespace(), view, not_equals_zero));
+  }
+}
+
+template <class ViewType>
+void test_none_of(const ViewType view) {
+  using value_t           = typename ViewType::value_type;
+  using view_host_space_t = Kokkos::View<value_t*, Kokkos::HostSpace>;
+  const auto is_positive  = IsPositiveFunctor<value_t>();
+
+  view_host_space_t expected("none_of_expected", view.extent(0));
+  compare_views(expected, view);
+
+  // reference result
+  EXPECT_TRUE(
+      std::none_of(KE::begin(expected), KE::end(expected), is_positive));
+
+  // pass iterators
+  EXPECT_TRUE(
+      KE::none_of(exespace(), KE::begin(view), KE::end(view), is_positive));
+  // pass view
+  EXPECT_TRUE(KE::none_of(exespace(), view, is_positive));
+
+  fill_views_inc(view, expected);
+
+  if (view.extent(0) > 1) {
+    // reference result
+    EXPECT_FALSE(
+        std::none_of(KE::begin(expected), KE::end(expected), is_positive));
+
+    // pass const iterators
+    EXPECT_FALSE(
+        KE::none_of(exespace(), KE::cbegin(view), KE::cend(view), is_positive));
+    // pass view
+    EXPECT_FALSE(KE::none_of("label", exespace(), view, is_positive));
+  }
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  for (const auto& scenario : default_scenarios) {
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "all_of");
+      test_all_of(view);
+    }
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "any_of");
+      test_any_of(view);
+    }
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "none_of");
+      test_none_of(view);
+    }
+  }
+}
+
+TEST(std_algorithms_all_any_none_of_test, test) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedTwoTag, int>();
+  run_all_scenarios<StridedThreeTag, unsigned>();
+}
+
+}  // namespace AllAnyNoneOf
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..871cce155b4d43239a1306f051f439359ad3b99b
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.cpp
@@ -0,0 +1,57 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+
+namespace Test {
+namespace stdalgos {
+
+std::string view_tag_to_string(DynamicTag) { return "dynamic_view"; }
+
+std::string view_tag_to_string(StridedTwoTag) { return "stride2_view"; }
+
+std::string view_tag_to_string(StridedThreeTag) { return "stride3_view"; }
+
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6d2b65a567d8551cbaf67c946dc3122a6c44c04d
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp
@@ -0,0 +1,255 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_STD_ALGOS_COMMON_HPP
+#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_STD_ALGOS_COMMON_HPP
+
+#include <gtest/gtest.h>
+#include <TestStdAlgorithmsHelperFunctors.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <numeric>
+#include <random>
+
+namespace Test {
+namespace stdalgos {
+
+using exespace = Kokkos::DefaultExecutionSpace;
+
+struct DynamicTag {};
+struct StridedTwoTag {};
+struct StridedThreeTag {};
+
+const std::map<std::string, std::size_t> default_scenarios = {
+    {"empty", 0},          {"one-element", 1}, {"two-elements-a", 2},
+    {"two-elements-b", 2}, {"small-a", 9},     {"small-b", 13},
+    {"medium-a", 1003},    {"medium-b", 1003}, {"large-a", 101513},
+    {"large-b", 101513}};
+
+// see cpp file for these functions
+std::string view_tag_to_string(DynamicTag);
+std::string view_tag_to_string(StridedTwoTag);
+std::string view_tag_to_string(StridedThreeTag);
+
+template <class ValueType>
+auto create_view(DynamicTag, std::size_t ext, const std::string label) {
+  using view_t = Kokkos::View<ValueType*>;
+  view_t view{label + "_" + view_tag_to_string(DynamicTag{}), ext};
+  return view;
+}
+
+template <class ValueType>
+auto create_view(StridedTwoTag, std::size_t ext, const std::string label) {
+  using view_t = Kokkos::View<ValueType*, Kokkos::LayoutStride>;
+  Kokkos::LayoutStride layout{ext, 2};
+  view_t view{label + "_" + view_tag_to_string(DynamicTag{}), layout};
+  return view;
+}
+
+template <class ValueType>
+auto create_view(StridedThreeTag, std::size_t ext, const std::string label) {
+  using view_t = Kokkos::View<ValueType*, Kokkos::LayoutStride>;
+  Kokkos::LayoutStride layout{ext, 3};
+  view_t view{label + "_" + view_tag_to_string(DynamicTag{}), layout};
+  return view;
+}
+
+template <class ViewType>
+auto create_deep_copyable_compatible_view_with_same_extent(ViewType view) {
+  const std::size_t ext      = view.extent(0);
+  using view_value_type      = typename ViewType::value_type;
+  using view_exespace        = typename ViewType::execution_space;
+  using view_deep_copyable_t = Kokkos::View<view_value_type*, view_exespace>;
+  view_deep_copyable_t view_dc("view_dc", ext);
+  return view_dc;
+}
+
+template <class ViewType>
+auto create_deep_copyable_compatible_clone(ViewType view) {
+  auto view_dc    = create_deep_copyable_compatible_view_with_same_extent(view);
+  using view_dc_t = decltype(view_dc);
+  CopyFunctor<ViewType, view_dc_t> F1(view, view_dc);
+  Kokkos::parallel_for("copy", view.extent(0), F1);
+  return view_dc;
+}
+
+template <class ViewType>
+auto create_host_space_copy(ViewType view) {
+  auto view_dc = create_deep_copyable_compatible_clone(view);
+  return create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
+}
+
+// fill the views with sequentially increasing values
+template <class ViewType, class ViewHostType>
+void fill_views_inc(ViewType view, ViewHostType host_view) {
+  namespace KE = Kokkos::Experimental;
+
+  Kokkos::parallel_for(view.extent(0), AssignIndexFunctor<ViewType>(view));
+  std::iota(KE::begin(host_view), KE::end(host_view), 0);
+  // compare_views(expected, view);
+}
+
+template <class ValueType, class ViewType>
+std::enable_if_t<!std::is_same<typename ViewType::traits::array_layout,
+                               Kokkos::LayoutStride>::value>
+verify_values(ValueType expected, const ViewType view) {
+  static_assert(std::is_same<ValueType, typename ViewType::value_type>::value,
+                "Non-matching value types of view and reference value");
+  auto view_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), view);
+  for (std::size_t i = 0; i < view_h.extent(0); i++) {
+    EXPECT_EQ(expected, view_h(i));
+  }
+}
+
+template <class ValueType, class ViewType>
+std::enable_if_t<std::is_same<typename ViewType::traits::array_layout,
+                              Kokkos::LayoutStride>::value>
+verify_values(ValueType expected, const ViewType view) {
+  static_assert(std::is_same<ValueType, typename ViewType::value_type>::value,
+                "Non-matching value types of view and reference value");
+
+  using non_strided_view_t = Kokkos::View<typename ViewType::value_type*>;
+  non_strided_view_t tmpView("tmpView", view.extent(0));
+
+  Kokkos::parallel_for(
+      "_std_algo_copy", view.extent(0),
+      CopyFunctor<ViewType, non_strided_view_t>(view, tmpView));
+  auto view_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), tmpView);
+  for (std::size_t i = 0; i < view_h.extent(0); i++) {
+    EXPECT_EQ(expected, view_h(i));
+  }
+}
+
+template <class ViewType1, class ViewType2>
+std::enable_if_t<!std::is_same<typename ViewType2::traits::array_layout,
+                               Kokkos::LayoutStride>::value>
+compare_views(ViewType1 expected, const ViewType2 actual) {
+  static_assert(std::is_same<typename ViewType1::value_type,
+                             typename ViewType2::value_type>::value,
+                "Non-matching value types of expected and actual view");
+  auto expected_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), expected);
+  auto actual_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), actual);
+
+  for (std::size_t i = 0; i < expected_h.extent(0); i++) {
+    EXPECT_EQ(expected_h(i), actual_h(i));
+  }
+}
+
+template <class ViewType1, class ViewType2>
+std::enable_if_t<std::is_same<typename ViewType2::traits::array_layout,
+                              Kokkos::LayoutStride>::value>
+compare_views(ViewType1 expected, const ViewType2 actual) {
+  static_assert(std::is_same<typename ViewType1::value_type,
+                             typename ViewType2::value_type>::value,
+                "Non-matching value types of expected and actual view");
+
+  using non_strided_view_t = Kokkos::View<typename ViewType2::value_type*>;
+  non_strided_view_t tmp_view("tmp_view", actual.extent(0));
+  Kokkos::parallel_for(
+      "_std_algo_copy", actual.extent(0),
+      CopyFunctor<ViewType2, non_strided_view_t>(actual, tmp_view));
+
+  auto actual_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), tmp_view);
+  auto expected_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), expected);
+
+  for (std::size_t i = 0; i < expected_h.extent(0); i++) {
+    EXPECT_EQ(expected_h(i), actual_h(i));
+  }
+}
+
+template <class ViewType>
+void fill_zero(ViewType a) {
+  const auto functor = FillZeroFunctor<ViewType>(a);
+  ::Kokkos::parallel_for(a.extent(0), std::move(functor));
+}
+
+template <class ViewType1, class ViewType2>
+void fill_zero(ViewType1 a, ViewType2 b) {
+  fill_zero(a);
+  fill_zero(b);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+// helpers for testing small views (extent = 10)
+// prefer `default_scenarios` map for creating new tests
+using value_type = double;
+
+struct std_algorithms_test : public ::testing::Test {
+  static constexpr size_t extent = 10;
+
+  using static_view_t = Kokkos::View<value_type[extent]>;
+  static_view_t m_static_view{"std-algo-test-1D-contiguous-view-static"};
+
+  using dyn_view_t = Kokkos::View<value_type*>;
+  dyn_view_t m_dynamic_view{"std-algo-test-1D-contiguous-view-dynamic", extent};
+
+  using strided_view_t = Kokkos::View<value_type*, Kokkos::LayoutStride>;
+  Kokkos::LayoutStride layout{extent, 2};
+  strided_view_t m_strided_view{"std-algo-test-1D-strided-view", layout};
+
+  using view_host_space_t = Kokkos::View<value_type[10], Kokkos::HostSpace>;
+
+  template <class ViewFromType>
+  void copyInputViewToFixtureViews(ViewFromType view) {
+    CopyFunctor<ViewFromType, static_view_t> F1(view, m_static_view);
+    Kokkos::parallel_for("_std_algo_copy1", view.extent(0), F1);
+
+    CopyFunctor<ViewFromType, dyn_view_t> F2(view, m_dynamic_view);
+    Kokkos::parallel_for("_std_algo_copy2", view.extent(0), F2);
+
+    CopyFunctor<ViewFromType, strided_view_t> F3(view, m_strided_view);
+    Kokkos::parallel_for("_std_algo_copy3", view.extent(0), F3);
+  }
+};
+
+}  // namespace stdalgos
+}  // namespace Test
+
+#endif
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2f2172eccc1e4d318a7b77dc8ddd29ebd41b637e
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp
@@ -0,0 +1,553 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <Kokkos_StdAlgorithms.hpp>
+
+namespace Test {
+namespace stdalgos {
+namespace compileonly {
+
+template <class ValueType>
+struct TrivialUnaryFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType a) const { return a; }
+};
+
+template <class ValueType>
+struct TrivialBinaryFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType &a, const ValueType &b) const {
+    return (a + b);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const volatile ValueType &a,
+                       const volatile ValueType &b) const {
+    return (a + b);
+  }
+};
+
+template <class ValueType>
+struct TrivialUnaryPredicate {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType val) const {
+    (void)val;
+    return true;
+  }
+};
+
+template <class ValueType>
+struct TrivialBinaryPredicate {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType val, const ValueType val2) const {
+    (void)val;
+    (void)val2;
+    return true;
+  }
+};
+
+template <class ValueType>
+struct TimesTwoFunctor {
+  KOKKOS_INLINE_FUNCTION
+  void operator()(ValueType &val) const { val *= (ValueType)2; }
+};
+
+template <class ValueType>
+struct TrivialComparator {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType &a, const ValueType &b) const {
+    return a > b;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const volatile ValueType &a,
+                  const volatile ValueType &b) const {
+    return a > b;
+  }
+};
+
+template <class ValueType>
+struct TrivialGenerator {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()() const { return ValueType{}; }
+};
+
+template <class ValueType>
+struct TrivialReduceJoinFunctor {
+  KOKKOS_FUNCTION
+  ValueType operator()(const ValueType &a, const ValueType &b) const {
+    return a + b;
+  }
+
+  KOKKOS_FUNCTION
+  ValueType operator()(const volatile ValueType &a,
+                       const volatile ValueType &b) const {
+    return a + b;
+  }
+};
+
+template <class ValueType>
+struct TrivialTransformReduceUnaryTransformer {
+  KOKKOS_FUNCTION
+  ValueType operator()(const ValueType &a) const { return a; }
+};
+
+template <class ValueType>
+struct TrivialTransformReduceBinaryTransformer {
+  KOKKOS_FUNCTION
+  ValueType operator()(const ValueType &a, const ValueType &b) const {
+    return (a * b);
+  }
+};
+
+// put all code here and don't call from main
+// so that even if one runs the executable,
+// nothing is run anyway
+
+namespace KE     = Kokkos::Experimental;
+using count_type = std::size_t;
+using T          = double;
+Kokkos::View<T *> in1("in1", 10);
+Kokkos::View<T *> in2("in2", 10);
+Kokkos::View<T *> in3("in3", 10);
+Kokkos::DefaultExecutionSpace exe_space;
+std::string const label = "trivial";
+
+//
+// just iterators
+//
+#define TEST_ALGO_MACRO_B1E1(ALGO)                                \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1)); \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1));
+
+#define TEST_ALGO_MACRO_B1E1B2(ALGO)                             \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2));                                \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2));
+
+#define TEST_ALGO_MACRO_B1E1B2E2(ALGO)                           \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2), KE::end(in2));                  \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2), KE::end(in2));
+
+#define TEST_ALGO_MACRO_B1E1E2(ALGO)                             \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1), \
+                 KE::end(in2));                                  \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1), KE::end(in2));
+
+#define TEST_ALGO_MACRO_B1E1E2B3(ALGO)                                         \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1), KE::end(in2), \
+                 KE::begin(in3));                                              \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1), KE::end(in2), \
+                 KE::begin(in3));
+
+#define TEST_ALGO_MACRO_B1E1E1B2(ALGO)                                         \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1), KE::end(in1), \
+                 KE::begin(in2));                                              \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1), KE::end(in1), \
+                 KE::begin(in2));
+
+//
+// iterators and params
+//
+#define TEST_ALGO_MACRO_B1_VARIAD(ALGO, ...)                     \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), __VA_ARGS__); \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), __VA_ARGS__);
+
+#define TEST_ALGO_MACRO_B1E1_VARIAD(ALGO, ...)                                 \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1), __VA_ARGS__); \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1), __VA_ARGS__);
+
+#define TEST_ALGO_MACRO_B1E1B2_VARIAD(ALGO, ...)                 \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2), __VA_ARGS__);                   \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2), __VA_ARGS__);
+
+#define TEST_ALGO_MACRO_B1_ARG_B2(ALGO, ARG)                             \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), ARG, KE::begin(in2)); \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), ARG, KE::begin(in2));
+
+#define TEST_ALGO_MACRO_B1E1B2B3_VARIAD(ALGO, ...)               \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2), KE::begin(in3), __VA_ARGS__);   \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2), KE::begin(in3), __VA_ARGS__);
+
+#define TEST_ALGO_MACRO_B1E1B2E2_VARIAD(ALGO, ARG)               \
+  (void)KE::ALGO(exe_space, /*--*/ KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2), KE::end(in2), ARG);             \
+  (void)KE::ALGO(label, exe_space, KE::begin(in1), KE::end(in1), \
+                 KE::begin(in2), KE::end(in2), ARG);
+
+//
+// views only
+//
+#define TEST_ALGO_MACRO_V1(ALGO)         \
+  (void)KE::ALGO(exe_space, /*--*/ in1); \
+  (void)KE::ALGO(label, exe_space, in1);
+
+#define TEST_ALGO_MACRO_V1V2(ALGO)            \
+  (void)KE::ALGO(exe_space, /*--*/ in1, in2); \
+  (void)KE::ALGO(label, exe_space, in1, in2);
+
+#define TEST_ALGO_MACRO_V1V2V3(ALGO)               \
+  (void)KE::ALGO(exe_space, /*--*/ in1, in2, in3); \
+  (void)KE::ALGO(label, exe_space, in1, in2, in3);
+
+//
+// views and params
+//
+#define TEST_ALGO_MACRO_V1_VARIAD(ALGO, ...)          \
+  (void)KE::ALGO(exe_space, /*--*/ in1, __VA_ARGS__); \
+  (void)KE::ALGO(label, exe_space, in1, __VA_ARGS__);
+
+#define TEST_ALGO_MACRO_V1V2_VARIAD(ALGO, ...)             \
+  (void)KE::ALGO(exe_space, /*--*/ in1, in2, __VA_ARGS__); \
+  (void)KE::ALGO(label, exe_space, in1, in2, __VA_ARGS__);
+
+#define TEST_ALGO_MACRO_V1V2V3_VARIAD(ALGO, ...)                \
+  (void)KE::ALGO(exe_space, /*--*/ in1, in2, in3, __VA_ARGS__); \
+  (void)KE::ALGO(label, exe_space, in1, in2, in3, __VA_ARGS__);
+
+#define TEST_ALGO_MACRO_V1_ARG_V2(ALGO, ARG)       \
+  (void)KE::ALGO(exe_space, /*--*/ in1, ARG, in2); \
+  (void)KE::ALGO(label, exe_space, in1, ARG, in2);
+
+void non_modifying_seq_ops() {
+  TEST_ALGO_MACRO_B1E1_VARIAD(find, T{});
+  TEST_ALGO_MACRO_V1_VARIAD(find, T{});
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(find_if, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(find_if, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(find_if_not, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(find_if_not, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(for_each, TimesTwoFunctor<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(for_each, TimesTwoFunctor<T>());
+
+  TEST_ALGO_MACRO_B1_VARIAD(for_each_n, count_type{}, TimesTwoFunctor<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(for_each_n, count_type{}, TimesTwoFunctor<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(count_if, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(count_if, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(count, T{});
+  TEST_ALGO_MACRO_V1_VARIAD(count, T{});
+
+  TEST_ALGO_MACRO_B1E1B2E2(mismatch);
+  TEST_ALGO_MACRO_B1E1B2E2_VARIAD(mismatch, TrivialBinaryPredicate<T>());
+  TEST_ALGO_MACRO_V1V2(mismatch);
+  TEST_ALGO_MACRO_V1V2_VARIAD(mismatch, TrivialBinaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(all_of, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(all_of, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(any_of, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(any_of, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(none_of, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(none_of, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1B2(equal);
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(equal, TrivialBinaryPredicate<T>());
+  TEST_ALGO_MACRO_V1V2(equal);
+  TEST_ALGO_MACRO_V1V2_VARIAD(equal, TrivialBinaryPredicate<T>());
+  TEST_ALGO_MACRO_B1E1B2E2(equal);
+  TEST_ALGO_MACRO_B1E1B2E2_VARIAD(equal, TrivialBinaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1B2E2(lexicographical_compare);
+  TEST_ALGO_MACRO_B1E1B2E2_VARIAD(lexicographical_compare,
+                                  TrivialComparator<T>());
+  TEST_ALGO_MACRO_V1V2(lexicographical_compare);
+  TEST_ALGO_MACRO_V1V2_VARIAD(lexicographical_compare, TrivialComparator<T>());
+
+  TEST_ALGO_MACRO_B1E1(adjacent_find);
+  TEST_ALGO_MACRO_V1(adjacent_find);
+  TEST_ALGO_MACRO_B1E1_VARIAD(adjacent_find, TrivialBinaryFunctor<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(adjacent_find, TrivialBinaryFunctor<T>());
+
+  TEST_ALGO_MACRO_B1E1B2E2(search);
+  TEST_ALGO_MACRO_V1V2(search);
+  TEST_ALGO_MACRO_B1E1B2E2_VARIAD(search, TrivialBinaryFunctor<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(search, TrivialBinaryFunctor<T>());
+
+  TEST_ALGO_MACRO_B1E1B2E2(find_first_of);
+  TEST_ALGO_MACRO_V1V2(find_first_of);
+  TEST_ALGO_MACRO_B1E1B2E2_VARIAD(find_first_of, TrivialBinaryFunctor<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(find_first_of, TrivialBinaryFunctor<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(search_n, count_type{}, T{});
+  TEST_ALGO_MACRO_V1_VARIAD(search_n, count_type{}, T{});
+  TEST_ALGO_MACRO_B1E1_VARIAD(search_n, count_type{}, T{},
+                              TrivialBinaryPredicate<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(search_n, count_type{}, T{},
+                            TrivialBinaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1B2E2(find_end);
+  TEST_ALGO_MACRO_V1V2(find_end);
+  TEST_ALGO_MACRO_B1E1B2E2_VARIAD(find_end, TrivialBinaryFunctor<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(find_end, TrivialBinaryFunctor<T>());
+}
+
+void modifying_seq_ops() {
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(replace_copy, T{}, T{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(replace_copy, T{}, T{});
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(replace_copy_if, TrivialUnaryPredicate<T>(),
+                                T{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(replace_copy_if, TrivialUnaryPredicate<T>(), T{});
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(replace, T{}, T{});
+  TEST_ALGO_MACRO_V1_VARIAD(replace, T{}, T{});
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(replace_if, TrivialUnaryPredicate<T>(), T{});
+  TEST_ALGO_MACRO_V1_VARIAD(replace_if, TrivialUnaryPredicate<T>(), T{});
+
+  TEST_ALGO_MACRO_B1E1B2(copy);
+  TEST_ALGO_MACRO_V1V2(copy);
+
+  TEST_ALGO_MACRO_B1_ARG_B2(copy_n, count_type{});
+  TEST_ALGO_MACRO_V1_ARG_V2(copy_n, count_type{});
+
+  TEST_ALGO_MACRO_B1E1B2(copy_backward);
+  TEST_ALGO_MACRO_V1V2(copy_backward);
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(copy_if, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(copy_if, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(fill, T{});
+  TEST_ALGO_MACRO_V1_VARIAD(fill, T{});
+
+  TEST_ALGO_MACRO_B1_VARIAD(fill_n, count_type{}, T{});
+  TEST_ALGO_MACRO_V1_VARIAD(fill_n, count_type{}, T{});
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(transform, TrivialUnaryFunctor<T>{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(transform, TrivialUnaryFunctor<T>{});
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(transform, TrivialUnaryFunctor<T>{});
+  TEST_ALGO_MACRO_B1E1B2B3_VARIAD(transform, TrivialBinaryFunctor<T>{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(transform, TrivialUnaryFunctor<T>{});
+  TEST_ALGO_MACRO_V1V2V3_VARIAD(transform, TrivialBinaryFunctor<T>{});
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(generate, TrivialGenerator<T>{});
+  TEST_ALGO_MACRO_V1_VARIAD(generate, TrivialGenerator<T>{});
+
+  TEST_ALGO_MACRO_B1_VARIAD(generate_n, count_type{}, TrivialGenerator<T>{});
+  TEST_ALGO_MACRO_V1_VARIAD(generate_n, count_type{}, TrivialGenerator<T>{});
+
+  TEST_ALGO_MACRO_B1E1B2(reverse_copy);
+  TEST_ALGO_MACRO_V1V2(reverse_copy);
+
+  TEST_ALGO_MACRO_B1E1(reverse);
+  TEST_ALGO_MACRO_V1(reverse);
+
+  TEST_ALGO_MACRO_B1E1B2(move);
+  TEST_ALGO_MACRO_V1V2(move);
+
+  TEST_ALGO_MACRO_B1E1E2(move_backward);
+  TEST_ALGO_MACRO_V1V2(move_backward);
+
+  TEST_ALGO_MACRO_B1E1B2(swap_ranges);
+  TEST_ALGO_MACRO_V1V2(swap_ranges);
+
+  TEST_ALGO_MACRO_B1E1(unique);
+  TEST_ALGO_MACRO_V1(unique);
+  TEST_ALGO_MACRO_B1E1_VARIAD(unique, TrivialBinaryPredicate<T>{});
+  TEST_ALGO_MACRO_V1_VARIAD(unique, TrivialBinaryPredicate<T>{});
+
+  TEST_ALGO_MACRO_B1E1B2(unique_copy);
+  TEST_ALGO_MACRO_V1V2(unique_copy);
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(unique_copy, TrivialBinaryPredicate<T>{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(unique_copy, TrivialBinaryPredicate<T>{});
+
+  TEST_ALGO_MACRO_B1E1E2(rotate);
+  TEST_ALGO_MACRO_V1_VARIAD(rotate, count_type{});
+
+  TEST_ALGO_MACRO_B1E1E1B2(rotate_copy);
+  TEST_ALGO_MACRO_V1_ARG_V2(rotate_copy, count_type{});
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(remove_if, TrivialUnaryPredicate<T>{});
+  TEST_ALGO_MACRO_V1_VARIAD(remove_if, TrivialUnaryPredicate<T>{});
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(remove, T{});
+  TEST_ALGO_MACRO_V1_VARIAD(remove, T{});
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(remove_copy, T{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(remove_copy, T{});
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(remove_copy_if, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(remove_copy_if, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(shift_left, count_type{});
+  TEST_ALGO_MACRO_V1_VARIAD(shift_left, count_type{});
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(shift_right, count_type{});
+  TEST_ALGO_MACRO_V1_VARIAD(shift_right, count_type{});
+}
+
+void sorting_ops() {
+  TEST_ALGO_MACRO_B1E1(is_sorted_until);
+  TEST_ALGO_MACRO_V1(is_sorted_until);
+
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  TEST_ALGO_MACRO_B1E1_VARIAD(is_sorted_until, TrivialComparator<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(is_sorted_until, TrivialComparator<T>());
+#endif
+
+  TEST_ALGO_MACRO_B1E1(is_sorted);
+  TEST_ALGO_MACRO_V1(is_sorted);
+
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  TEST_ALGO_MACRO_B1E1_VARIAD(is_sorted, TrivialComparator<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(is_sorted, TrivialComparator<T>());
+#endif
+}
+
+void minmax_ops() {
+  TEST_ALGO_MACRO_B1E1(min_element);
+  TEST_ALGO_MACRO_V1(min_element);
+  TEST_ALGO_MACRO_B1E1(max_element);
+  TEST_ALGO_MACRO_V1(max_element);
+  TEST_ALGO_MACRO_B1E1(minmax_element);
+  TEST_ALGO_MACRO_V1(minmax_element);
+
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  TEST_ALGO_MACRO_B1E1_VARIAD(min_element, TrivialComparator<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(min_element, TrivialComparator<T>());
+  TEST_ALGO_MACRO_B1E1_VARIAD(max_element, TrivialComparator<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(max_element, TrivialComparator<T>());
+  TEST_ALGO_MACRO_B1E1_VARIAD(minmax_element, TrivialComparator<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(minmax_element, TrivialComparator<T>());
+#endif
+}
+
+void partitionig_ops() {
+  TEST_ALGO_MACRO_B1E1_VARIAD(is_partitioned, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(is_partitioned, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1B2B3_VARIAD(partition_copy, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1V2V3_VARIAD(partition_copy, TrivialUnaryPredicate<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(partition_point, TrivialUnaryPredicate<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(partition_point, TrivialUnaryPredicate<T>());
+}
+
+void numeric() {
+  TEST_ALGO_MACRO_B1E1B2(adjacent_difference);
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(adjacent_difference, TrivialBinaryFunctor<T>());
+  TEST_ALGO_MACRO_V1V2(adjacent_difference);
+  TEST_ALGO_MACRO_V1V2_VARIAD(adjacent_difference, TrivialBinaryFunctor<T>());
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(exclusive_scan, T{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(exclusive_scan, T{});
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(exclusive_scan, T{}, TrivialBinaryFunctor<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(exclusive_scan, T{}, TrivialBinaryFunctor<T>());
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(transform_exclusive_scan, T{},
+                                TrivialBinaryFunctor<T>(),
+                                TrivialUnaryFunctor<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(transform_exclusive_scan, T{},
+                              TrivialBinaryFunctor<T>(),
+                              TrivialUnaryFunctor<T>());
+#endif
+
+  TEST_ALGO_MACRO_B1E1B2(inclusive_scan);
+  TEST_ALGO_MACRO_V1V2(inclusive_scan);
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(inclusive_scan, TrivialBinaryFunctor<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(inclusive_scan, TrivialBinaryFunctor<T>());
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(inclusive_scan, TrivialBinaryFunctor<T>(), T{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(inclusive_scan, TrivialBinaryFunctor<T>(), T{});
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(transform_inclusive_scan,
+                                TrivialBinaryFunctor<T>(),
+                                TrivialUnaryFunctor<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(transform_inclusive_scan,
+                              TrivialBinaryFunctor<T>(),
+                              TrivialUnaryFunctor<T>());
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(transform_inclusive_scan,
+                                TrivialBinaryFunctor<T>(),
+                                TrivialUnaryFunctor<T>(), T{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(transform_inclusive_scan,
+                              TrivialBinaryFunctor<T>(),
+                              TrivialUnaryFunctor<T>(), T{});
+#endif
+
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  TEST_ALGO_MACRO_B1E1(reduce);
+  TEST_ALGO_MACRO_V1(reduce);
+  TEST_ALGO_MACRO_B1E1_VARIAD(reduce, T{});
+  TEST_ALGO_MACRO_V1_VARIAD(reduce, T{});
+  TEST_ALGO_MACRO_B1E1_VARIAD(reduce, T{}, TrivialReduceJoinFunctor<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(reduce, T{}, TrivialReduceJoinFunctor<T>());
+
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(transform_reduce, T{});
+  TEST_ALGO_MACRO_V1V2_VARIAD(transform_reduce, T{});
+  TEST_ALGO_MACRO_B1E1B2_VARIAD(transform_reduce, T{},
+                                TrivialReduceJoinFunctor<T>(),
+                                TrivialTransformReduceBinaryTransformer<T>());
+  TEST_ALGO_MACRO_V1V2_VARIAD(transform_reduce, T{},
+                              TrivialReduceJoinFunctor<T>(),
+                              TrivialTransformReduceBinaryTransformer<T>());
+
+  TEST_ALGO_MACRO_B1E1_VARIAD(transform_reduce, T{},
+                              TrivialReduceJoinFunctor<T>(),
+                              TrivialTransformReduceUnaryTransformer<T>());
+  TEST_ALGO_MACRO_V1_VARIAD(transform_reduce, T{},
+                            TrivialReduceJoinFunctor<T>(),
+                            TrivialTransformReduceUnaryTransformer<T>());
+#endif
+}
+
+}  // namespace compileonly
+}  // namespace stdalgos
+}  // namespace Test
+
+int main() { return 0; }
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b1981df28115a5938d24abcdeef59ca812aef548
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp
@@ -0,0 +1,113 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+#include <std_algorithms/Kokkos_Constraints.hpp>
+
+namespace Test {
+namespace stdalgos {
+
+TEST(std_algorithms, is_admissible_to_std_algorithms) {
+  namespace KE     = Kokkos::Experimental;
+  using value_type = double;
+
+  static constexpr size_t extent0 = 13;
+  static constexpr size_t extent1 = 18;
+  static constexpr size_t extent2 = 18;
+
+  //-------------
+  // 1d views
+  //-------------
+  using static_view_1d_t = Kokkos::View<value_type[extent0]>;
+  static_view_1d_t static_view_1d{"std-algo-test-1d-contiguous-view-static"};
+
+  using dyn_view_1d_t = Kokkos::View<value_type*>;
+  dyn_view_1d_t dynamic_view_1d{"std-algo-test-1d-contiguous-view-dynamic",
+                                extent0};
+
+  using strided_view_1d_t = Kokkos::View<value_type*, Kokkos::LayoutStride>;
+  Kokkos::LayoutStride layout1d{extent0, 2};
+  strided_view_1d_t strided_view_1d{"std-algo-test-1d-strided-view", layout1d};
+  EXPECT_EQ(layout1d.dimension[0], 13u);
+  EXPECT_EQ(layout1d.stride[0], 2u);
+  // they are admissible
+  KE::Impl::static_assert_is_admissible_to_kokkos_std_algorithms(
+      static_view_1d);
+  KE::Impl::static_assert_is_admissible_to_kokkos_std_algorithms(
+      dynamic_view_1d);
+  KE::Impl::static_assert_is_admissible_to_kokkos_std_algorithms(
+      strided_view_1d);
+
+  //-------------
+  // 2d views
+  //-------------
+  using static_view_2d_t  = Kokkos::View<value_type[extent0][extent1]>;
+  using dyn_view_2d_t     = Kokkos::View<value_type**>;
+  using strided_view_2d_t = Kokkos::View<value_type**, Kokkos::LayoutStride>;
+  // non admissible
+  EXPECT_FALSE(KE::Impl::is_admissible_to_kokkos_std_algorithms<
+               static_view_2d_t>::value);
+  EXPECT_FALSE(
+      KE::Impl::is_admissible_to_kokkos_std_algorithms<dyn_view_2d_t>::value);
+  EXPECT_FALSE(KE::Impl::is_admissible_to_kokkos_std_algorithms<
+               strided_view_2d_t>::value);
+
+  //-------------
+  // 3d views
+  //-------------
+  using static_view_3d_t  = Kokkos::View<value_type[extent0][extent1][extent2]>;
+  using dyn_view_3d_t     = Kokkos::View<value_type***>;
+  using strided_view_3d_t = Kokkos::View<value_type***, Kokkos::LayoutStride>;
+  // non admissible
+  EXPECT_FALSE(KE::Impl::is_admissible_to_kokkos_std_algorithms<
+               static_view_3d_t>::value);
+  EXPECT_FALSE(
+      KE::Impl::is_admissible_to_kokkos_std_algorithms<dyn_view_3d_t>::value);
+  EXPECT_FALSE(KE::Impl::is_admissible_to_kokkos_std_algorithms<
+               strided_view_3d_t>::value);
+}
+
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f1d078bd725cd64968fbc678e4b1a1ea168e54d5
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp
@@ -0,0 +1,308 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace CopyIf {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-100, 100) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType, class PredicateType>
+std::size_t fill_view(ViewType dest_view, const std::string& name,
+                      PredicateType pred) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  std::size_t count = 0;
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+    // 1 is not even, so count is not incremented
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+    count++;
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+    count++;
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+    count++;
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = value_type{-5} + static_cast<value_type>(i + 1);
+      if (pred(v_h(i))) {
+        count++;
+      }
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 2 == 0) {
+        v_h(i) = static_cast<value_type>(22);
+      } else {
+        v_h(i) = static_cast<value_type>(-12);
+      }
+      if (pred(v_h(i))) {
+        count++;
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+      if (pred(v_h(i))) {
+        count++;
+      }
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+  return count;
+}
+
+template <class ViewTypeFrom, class ViewTypeTest, class PredType>
+void verify_data(const std::string& name, ViewTypeFrom view_from,
+                 ViewTypeTest view_test, PredType pred) {
+  using value_type = typename ViewTypeTest::value_type;
+
+  //! always careful because views might not be deep copyable
+  auto view_test_dc = create_deep_copyable_compatible_clone(view_test);
+  auto view_test_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_test_dc);
+
+  auto view_from_dc = create_deep_copyable_compatible_clone(view_from);
+  auto view_from_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_from_dc);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(0));
+  }
+
+  else if (name == "one-element-b") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2));
+  }
+
+  else if (name == "two-elements-a") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2));
+    EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(0));
+  }
+
+  else if (name == "two-elements-b") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2));
+    EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(0));
+  }
+
+  else if (name == "small-a") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(-4));
+    EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(-2));
+    EXPECT_TRUE(view_test_h(2) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(3) == static_cast<value_type>(2));
+    EXPECT_TRUE(view_test_h(4) == static_cast<value_type>(4));
+    EXPECT_TRUE(view_test_h(5) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(6) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(7) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(8) == static_cast<value_type>(0));
+  }
+
+  else if (name == "small-b") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(22));
+    EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(-12));
+    EXPECT_TRUE(view_test_h(2) == static_cast<value_type>(22));
+    EXPECT_TRUE(view_test_h(3) == static_cast<value_type>(-12));
+    EXPECT_TRUE(view_test_h(4) == static_cast<value_type>(22));
+    EXPECT_TRUE(view_test_h(5) == static_cast<value_type>(-12));
+    EXPECT_TRUE(view_test_h(6) == static_cast<value_type>(22));
+    EXPECT_TRUE(view_test_h(7) == static_cast<value_type>(-12));
+    EXPECT_TRUE(view_test_h(8) == static_cast<value_type>(22));
+    EXPECT_TRUE(view_test_h(9) == static_cast<value_type>(-12));
+    EXPECT_TRUE(view_test_h(10) == static_cast<value_type>(22));
+    EXPECT_TRUE(view_test_h(11) == static_cast<value_type>(-12));
+    EXPECT_TRUE(view_test_h(12) == static_cast<value_type>(22));
+  }
+
+  else if (name == "medium" || name == "large") {
+    // for (std::size_t i = 0; i < view_from_h.extent(0); ++i){
+    //   std::cout << "i= " << i << " "
+    // 		<< " vf = " << view_from_h(i) << " "
+    // 		<< " vt = " << view_test_h(i) << '\n';
+    // }
+
+    std::size_t count = 0;
+    for (std::size_t i = 0; i < view_from_h.extent(0); ++i) {
+      if (pred(view_from_h(i))) {
+        EXPECT_TRUE(view_test_h(count++) == view_from_h(i));
+      }
+    }
+    // all other entries of test view should be zero
+    for (; count < view_test_h.extent(0); ++count) {
+      // std::cout << count << '\n';
+      EXPECT_TRUE(view_test_h(count) == value_type(0));
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "copy_if: " << name << ", " << view_tag_to_string(Tag{}) << ",
+  // "
+  //           << value_type_to_string(ValueType()) << std::endl;
+
+  auto view_from = create_view<ValueType>(Tag{}, view_ext, "copy_if_from");
+  IsEvenFunctor<ValueType> pred;
+
+  {
+    auto n         = fill_view(view_from, name, pred);
+    auto view_dest = create_view<ValueType>(Tag{}, view_ext, "copy_if_dest");
+    auto rit       = KE::copy_if(exespace(), KE::cbegin(view_from),
+                           KE::cend(view_from), KE::begin(view_dest), pred);
+    verify_data(name, view_from, view_dest, pred);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + n));
+  }
+
+  {
+    auto n         = fill_view(view_from, name, pred);
+    auto view_dest = create_view<ValueType>(Tag{}, view_ext, "copy_if_dest");
+    auto rit       = KE::copy_if("label", exespace(), KE::cbegin(view_from),
+                           KE::cend(view_from), KE::begin(view_dest), pred);
+    verify_data(name, view_from, view_dest, pred);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + n));
+  }
+
+  {
+    auto n         = fill_view(view_from, name, pred);
+    auto view_dest = create_view<ValueType>(Tag{}, view_ext, "copy_if_dest");
+    auto rit       = KE::copy_if(exespace(), view_from, view_dest, pred);
+    verify_data(name, view_from, view_dest, pred);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + n));
+  }
+
+  {
+    auto n         = fill_view(view_from, name, pred);
+    auto view_dest = create_view<ValueType>(Tag{}, view_ext, "copy_if_dest");
+    auto rit = KE::copy_if("label", exespace(), view_from, view_dest, pred);
+    verify_data(name, view_from, view_dest, pred);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + n));
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 1103},      {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, copy_if) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace CopyIf
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dfc7d794ed68e7a1cc217f7d02c147d6903addce
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp
@@ -0,0 +1,142 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace Count {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void test_count(const ViewType view) {
+  using value_t           = typename ViewType::value_type;
+  using view_host_space_t = Kokkos::View<value_t*, Kokkos::HostSpace>;
+
+  view_host_space_t expected("count_expected", view.extent(0));
+  compare_views(expected, view);
+
+  {
+    const value_t count_value = 0;
+    const auto std_result =
+        std::count(KE::cbegin(expected), KE::cend(expected), count_value);
+    EXPECT_EQ(view.extent(0), size_t(std_result));
+
+    // pass const iterators
+    EXPECT_EQ(std_result, KE::count(exespace(), KE::cbegin(view),
+                                    KE::cend(view), count_value));
+    // pass view
+    EXPECT_EQ(std_result, KE::count(exespace(), view, count_value));
+  }
+
+  {
+    const value_t count_value = 13;
+    const auto std_result =
+        std::count(KE::cbegin(expected), KE::cend(expected), count_value);
+
+    // pass iterators
+    EXPECT_EQ(std_result, KE::count("label", exespace(), KE::begin(view),
+                                    KE::end(view), count_value));
+    // pass view
+    EXPECT_EQ(std_result, KE::count("label", exespace(), view, count_value));
+  }
+}
+
+template <class ViewType>
+void test_count_if(const ViewType view) {
+  using value_t           = typename ViewType::value_type;
+  using view_host_space_t = Kokkos::View<value_t*, Kokkos::HostSpace>;
+
+  view_host_space_t expected("count_expected", view.extent(0));
+  compare_views(expected, view);
+
+  // no positive elements (all zeroes)
+  const auto predicate = IsPositiveFunctor<value_type>();
+  EXPECT_EQ(0,
+            std::count_if(KE::begin(expected), KE::end(expected), predicate));
+
+  // pass iterators
+  EXPECT_EQ(
+      0, KE::count_if(exespace(), KE::begin(view), KE::end(view), predicate));
+  // pass view
+  EXPECT_EQ(0, KE::count_if(exespace(), view, predicate));
+
+  fill_views_inc(view, expected);
+
+  const auto std_result =
+      std::count_if(KE::begin(expected), KE::end(expected), predicate);
+  // pass const iterators
+  EXPECT_EQ(std_result, KE::count_if("label", exespace(), KE::cbegin(view),
+                                     KE::cend(view), predicate));
+  // pass view
+  EXPECT_EQ(std_result, KE::count_if("label", exespace(), view, predicate));
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  for (const auto& scenario : default_scenarios) {
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "count");
+      test_count(view);
+    }
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "count");
+      test_count_if(view);
+    }
+  }
+}
+
+TEST(std_algorithms_count_test, test) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedTwoTag, int>();
+  run_all_scenarios<StridedThreeTag, unsigned>();
+}
+
+}  // namespace Count
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..78edff4230db7c9bb5d88eba34448782443c1278
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp
@@ -0,0 +1,150 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace Equal {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void test_equal(const ViewType view) {
+  auto copy = create_deep_copyable_compatible_clone(view);
+
+  // pass iterators
+  EXPECT_TRUE(
+      KE::equal(exespace(), KE::begin(view), KE::end(view), KE::begin(copy)));
+  // pass views
+  EXPECT_TRUE(KE::equal(exespace(), view, copy));
+
+  // modify copy - make the last element different
+  const auto extent = view.extent(0);
+  if (extent > 0) {
+    KE::fill(exespace(), KE::end(copy) - 1, KE::end(copy), 1);
+
+    // pass const iterators
+    EXPECT_FALSE(KE::equal(exespace(), KE::cbegin(view), KE::cend(view),
+                           KE::cbegin(copy)));
+    // pass views
+    EXPECT_FALSE(KE::equal("label", exespace(), view, copy));
+  }
+}
+
+template <class ViewType>
+void test_equal_custom_comparator(const ViewType view) {
+  using value_t = typename ViewType::value_type;
+  const auto p  = CustomEqualityComparator<value_t>();
+  auto copy     = create_deep_copyable_compatible_clone(view);
+
+  // pass iterators
+  EXPECT_TRUE(KE::equal(exespace(), KE::begin(view), KE::end(view),
+                        KE::begin(copy), p));
+  // pass views
+  EXPECT_TRUE(KE::equal(exespace(), view, copy, p));
+
+  // modify copy - make the last element different
+  const auto extent = view.extent(0);
+  if (extent > 0) {
+    KE::fill(exespace(), KE::end(copy) - 1, KE::end(copy), 1);
+
+    // pass const iterators
+    EXPECT_FALSE(KE::equal("label", exespace(), KE::cbegin(view),
+                           KE::cend(view), KE::cbegin(copy), p));
+    // pass views
+    EXPECT_FALSE(KE::equal(exespace(), view, copy, p));
+  }
+}
+
+template <class ViewType>
+void test_equal_4_iterators(const ViewType view) {
+  using value_t = typename ViewType::value_type;
+  const auto p  = CustomEqualityComparator<value_t>();
+  auto copy     = create_deep_copyable_compatible_clone(view);
+
+  // pass iterators
+  EXPECT_TRUE(KE::equal(exespace(), KE::begin(view), KE::end(view),
+                        KE::begin(copy), KE::end(copy)));
+  // pass const and non-const iterators, custom comparator
+  EXPECT_TRUE(KE::equal("label", exespace(), KE::cbegin(view), KE::cend(view),
+                        KE::begin(copy), KE::end(copy), p));
+
+  const auto extent = view.extent(0);
+  if (extent > 0) {
+    // use different length ranges, pass const iterators
+    EXPECT_FALSE(KE::equal(exespace(), KE::cbegin(view), KE::cend(view),
+                           KE::cbegin(copy), KE::cend(copy) - 1));
+
+    // modify copy - make the last element different
+    KE::fill(exespace(), KE::end(copy) - 1, KE::end(copy), 1);
+    // pass const iterators
+    EXPECT_FALSE(KE::equal(exespace(), KE::cbegin(view), KE::cend(view),
+                           KE::cbegin(copy), KE::cend(copy)));
+  }
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  for (const auto& scenario : default_scenarios) {
+    auto view = create_view<ValueType>(Tag{}, scenario.second, "equal");
+    test_equal(view);
+    test_equal_custom_comparator(view);
+    test_equal_4_iterators(view);
+  }
+}
+
+TEST(std_algorithms_equal_test, test) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedTwoTag, int>();
+  run_all_scenarios<StridedThreeTag, unsigned>();
+}
+
+}  // namespace Equal
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..99c921323beaa4b623451c739aa96b757b6836cc
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp
@@ -0,0 +1,381 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_Numeric.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace EScan {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(0.05, 1.2) { m_gen.seed(1034343); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(1, 3) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_zero(ViewType view) {
+  Kokkos::parallel_for(view.extent(0), FillZeroFunctor<ViewType>(view));
+}
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  UnifDist<value_type> randObj;
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i + 1);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+    v_h(5) = static_cast<value_type>(-2);
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+// I had to write my own because std::exclusive_scan is ONLY found with
+// std=c++17
+template <class it1, class it2, class ValType, class BopType>
+void my_host_exclusive_scan(it1 first, it1 last, it2 dest, ValType init,
+                            BopType bop) {
+  const auto num_elements = last - first;
+  if (num_elements > 0) {
+    while (first < last - 1) {
+      *(dest++) = init;
+      init      = bop(*first++, init);
+    }
+    *dest = init;
+  }
+}
+
+template <class ViewType1, class ViewType2, class ValueType, class BinaryOp>
+void verify_data(ViewType1 data_view,  // contains data
+                 ViewType2 test_view,  // the view to test
+                 ValueType init_value, BinaryOp bop) {
+  //! always careful because views might not be deep copyable
+
+  auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
+  auto data_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
+
+  using gold_view_value_type = typename ViewType2::value_type;
+  Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
+      "goldh", data_view.extent(0));
+  my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
+                         KE::begin(gold_h), init_value, bop);
+
+  auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
+  auto test_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
+  if (test_view_h.extent(0) > 0) {
+    for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
+      // std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
+      //           << gold_h(i) << " " << test_view_h(i) << " "
+      //           << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
+      if (std::is_same<gold_view_value_type, int>::value) {
+        EXPECT_TRUE(gold_h(i) == test_view_h(i));
+      } else {
+        const auto error = std::abs(gold_h(i) - test_view_h(i));
+        if (error > 1e-10) {
+          std::cout << i << " " << std::setprecision(15) << data_view_h(i)
+                    << " " << gold_h(i) << " " << test_view_h(i) << " "
+                    << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
+        }
+        EXPECT_TRUE(error < 1e-10);
+      }
+    }
+  }
+}
+
+template <class ValueType>
+struct MultiplyFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return (a * b);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const volatile ValueType& a,
+                       const volatile ValueType& b) const {
+    return (a * b);
+  }
+};
+
+template <class ValueType>
+struct SumFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return (a + b);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const volatile ValueType& a,
+                       const volatile ValueType& b) const {
+    return (a + b);
+  }
+};
+
+std::string value_type_to_string(int) { return "int"; }
+
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario_default_op(const InfoType& scenario_info,
+                                    ValueType init_value) {
+  using default_op           = SumFunctor<ValueType>;
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "exclusive_scan default op: " << name << ", "
+  //           << view_tag_to_string(Tag{}) << ", "
+  //           << value_type_to_string(ValueType()) << ", "
+  //           << "init = " << init_value << std::endl;
+
+  auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
+  auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
+  fill_view(view_from, name);
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
+                                KE::cend(view_from), KE::begin(view_dest),
+                                init_value);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, default_op());
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
+                                KE::cend(view_from), KE::begin(view_dest),
+                                init_value);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, default_op());
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, default_op());
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
+                                init_value);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, default_op());
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType, class InfoType, class BinaryOp>
+void run_single_scenario_custom_op(const InfoType& scenario_info,
+                                   ValueType init_value, BinaryOp bop) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "exclusive_scan custom op: " << name << ", "
+  //           << view_tag_to_string(Tag{}) << ", "
+  //           << value_type_to_string(ValueType()) << ", "
+  //           << "init = " << init_value << std::endl;
+
+  auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
+  auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
+  fill_view(view_from, name);
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
+                                KE::cend(view_from), KE::begin(view_dest),
+                                init_value, bop);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, bop);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
+                                KE::cend(view_from), KE::begin(view_dest),
+                                init_value, bop);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, bop);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r =
+        KE::exclusive_scan(exespace(), view_from, view_dest, init_value, bop);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, bop);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
+                                init_value, bop);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, bop);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_exclusive_scan_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element", 1}, {"two-elements-a", 2},
+      {"two-elements-b", 2}, {"small-a", 9},     {"small-b", 13},
+      {"medium", 1103},      {"large", 10513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario_default_op<Tag, ValueType>(it, ValueType{0});
+    run_single_scenario_default_op<Tag, ValueType>(it, ValueType{1});
+    run_single_scenario_default_op<Tag, ValueType>(it, ValueType{-2});
+    run_single_scenario_default_op<Tag, ValueType>(it, ValueType{3});
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+    // custom multiply op is only run for small views otherwise it overflows
+    if (it.first == "small-a" || it.first == "small-b") {
+      using custom_bop_t = MultiplyFunctor<ValueType>;
+      run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
+                                                    custom_bop_t());
+      run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
+                                                    custom_bop_t());
+      run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
+                                                    custom_bop_t());
+      run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
+                                                    custom_bop_t());
+    }
+
+    using custom_bop_t = SumFunctor<ValueType>;
+    run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
+                                                  custom_bop_t());
+    run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
+                                                  custom_bop_t());
+    run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
+                                                  custom_bop_t());
+    run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
+                                                  custom_bop_t());
+#endif
+  }
+}
+
+TEST(std_algorithms_numeric_ops_test, exclusive_scan) {
+  run_exclusive_scan_all_scenarios<DynamicTag, double>();
+  run_exclusive_scan_all_scenarios<StridedThreeTag, double>();
+  run_exclusive_scan_all_scenarios<DynamicTag, int>();
+  run_exclusive_scan_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace EScan
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..357e733dc2b7c9e8d6132319cfb9e44422e48e8b
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp
@@ -0,0 +1,191 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <iterator>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace Find {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void test_find(const ViewType view) {
+  using value_t           = typename ViewType::value_type;
+  using view_host_space_t = Kokkos::View<value_t*, Kokkos::HostSpace>;
+
+  view_host_space_t expected("count_expected", view.extent(0));
+  compare_views(expected, view);
+  constexpr value_t find_value = 13;
+
+  // value not found, return last
+  EXPECT_EQ(KE::end(expected),
+            std::find(KE::begin(expected), KE::end(expected), find_value));
+
+  // pass const iterators, returns const iterator
+  EXPECT_EQ(KE::cend(view),
+            KE::find(exespace(), KE::cbegin(view), KE::cend(view), find_value));
+  // pass view, returns iterator
+  EXPECT_EQ(KE::end(view), KE::find(exespace(), view, find_value));
+
+  fill_views_inc(view, expected);
+
+  auto std_result =
+      std::find(KE::begin(expected), KE::end(expected), find_value);
+  auto distance = std::distance(KE::begin(expected), std_result);
+
+  // pass iterators, returns iterator
+  EXPECT_EQ(KE::begin(view) + distance,
+            KE::find(exespace(), KE::begin(view), KE::end(view), find_value));
+  // pass view, returns iterator
+  EXPECT_EQ(KE::begin(view) + distance, KE::find(exespace(), view, find_value));
+}
+
+template <class ViewType>
+void test_find_if(const ViewType view) {
+  using value_t           = typename ViewType::value_type;
+  using view_host_space_t = Kokkos::View<value_t*, Kokkos::HostSpace>;
+
+  view_host_space_t expected("count_expected", view.extent(0));
+  compare_views(expected, view);
+
+  const auto not_equals_zero = NotEqualsZeroFunctor<value_type>();
+
+  // value not found, return last
+  EXPECT_EQ(
+      KE::end(expected),
+      std::find_if(KE::begin(expected), KE::end(expected), not_equals_zero));
+
+  // pass iterators, returns iterator
+  EXPECT_EQ(KE::end(view), KE::find_if(exespace(), KE::begin(view),
+                                       KE::end(view), not_equals_zero));
+  // pass view, returns iterator
+  EXPECT_EQ(KE::end(view), KE::find_if(exespace(), view, not_equals_zero));
+
+  fill_views_inc(view, expected);
+
+  constexpr value_t find_value = 13;
+  const auto equals_val        = EqualsValFunctor<value_type>(find_value);
+  auto std_result =
+      std::find_if(KE::begin(expected), KE::end(expected), equals_val);
+  auto distance = std::distance(KE::begin(expected), std_result);
+
+  // pass const iterators, returns const iterator
+  EXPECT_EQ(
+      KE::cbegin(view) + distance,
+      KE::find_if(exespace(), KE::cbegin(view), KE::cend(view), equals_val));
+  // pass view, returns iterator
+  EXPECT_EQ(KE::begin(view) + distance,
+            KE::find_if(exespace(), view, equals_val));
+}
+
+template <class ViewType>
+void test_find_if_not(const ViewType view) {
+  using value_t           = typename ViewType::value_type;
+  using view_host_space_t = Kokkos::View<value_t*, Kokkos::HostSpace>;
+
+  view_host_space_t expected("count_expected", view.extent(0));
+  compare_views(expected, view);
+
+  const auto not_equals_zero = NotEqualsZeroFunctor<value_type>();
+
+  // first value matches
+  EXPECT_EQ(KE::begin(expected),
+            std::find_if_not(KE::begin(expected), KE::end(expected),
+                             not_equals_zero));
+
+  // pass iterators, returns iterator
+  EXPECT_EQ(KE::begin(view), KE::find_if_not(exespace(), KE::begin(view),
+                                             KE::end(view), not_equals_zero));
+  // pass view, returns iterator
+  EXPECT_EQ(KE::begin(view),
+            KE::find_if_not(exespace(), view, not_equals_zero));
+
+  fill_views_inc(view, expected);
+
+  const auto equals_zero = EqualsValFunctor<value_type>(0);
+  auto std_result =
+      std::find_if_not(KE::begin(expected), KE::end(expected), equals_zero);
+  auto distance = std::distance(KE::begin(expected), std_result);
+
+  // pass const iterators, returns const iterator
+  EXPECT_EQ(KE::cbegin(view) + distance,
+            KE::find_if_not(exespace(), KE::cbegin(view), KE::cend(view),
+                            equals_zero));
+  // pass view, returns const iterator
+  EXPECT_EQ(KE::begin(view) + distance,
+            KE::find_if_not(exespace(), view, equals_zero));
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  for (const auto& scenario : default_scenarios) {
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "find");
+      test_find(view);
+    }
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "find_if");
+      test_find_if(view);
+    }
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "find_if_not");
+      test_find_if_not(view);
+    }
+  }
+}
+
+TEST(std_algorithms_find_test, test) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedTwoTag, int>();
+  run_all_scenarios<StridedThreeTag, unsigned>();
+}
+
+}  // namespace Find
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b4685ced723199d19303660ce7f0a0d57b4ec3b2
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp
@@ -0,0 +1,387 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace FindEnd {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(0, 20) { m_gen.seed(1034343); }
+  UnifDist(int a, int b) : m_dist(a, b) { m_gen.seed(234343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "three-elements-a") {
+    v_h(0) = static_cast<value_type>(-1);
+    v_h(1) = static_cast<value_type>(2);
+    v_h(2) = static_cast<value_type>(2);
+  }
+
+  else if (name == "three-elements-b") {
+    v_h(0) = static_cast<value_type>(3);
+    v_h(1) = static_cast<value_type>(1);
+    v_h(2) = static_cast<value_type>(3);
+  }
+
+  else if (name == "four-elements-a") {
+    v_h(0) = static_cast<value_type>(-1);
+    v_h(1) = static_cast<value_type>(2);
+    v_h(2) = static_cast<value_type>(2);
+    v_h(3) = static_cast<value_type>(4);
+  }
+
+  else if (name == "four-elements-b") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(1);
+    v_h(2) = static_cast<value_type>(1);
+    v_h(3) = static_cast<value_type>(1);
+  }
+
+  else if (name == "small-a") {
+    v_h(0)  = static_cast<value_type>(0);
+    v_h(1)  = static_cast<value_type>(4);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(-1);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(1);
+    v_h(7)  = static_cast<value_type>(2);
+    v_h(8)  = static_cast<value_type>(2);
+    v_h(9)  = static_cast<value_type>(4);
+    v_h(10) = static_cast<value_type>(1);
+  }
+
+  else if (name == "small-b") {
+    v_h(0)  = static_cast<value_type>(1);
+    v_h(1)  = static_cast<value_type>(2);
+    v_h(2)  = static_cast<value_type>(3);
+    v_h(3)  = static_cast<value_type>(1);
+    v_h(4)  = static_cast<value_type>(-1);
+    v_h(5)  = static_cast<value_type>(-2);
+    v_h(6)  = static_cast<value_type>(0);
+    v_h(7)  = static_cast<value_type>(1);
+    v_h(8)  = static_cast<value_type>(2);
+    v_h(9)  = static_cast<value_type>(2);
+    v_h(10) = static_cast<value_type>(5);
+    v_h(11) = static_cast<value_type>(9);
+    v_h(12) = static_cast<value_type>(8);
+  }
+
+  else {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewType>
+auto create_seq(ViewType data_view, std::size_t seq_extent) {
+  // we need to specify a sequence that we search for
+  // within the original view/range.
+  // to do this, rather than doing something purely random,
+  // we use the view with the data, and select a subsequence.
+
+  auto data_view_h            = create_host_space_copy(data_view);
+  const auto data_view_extent = data_view.extent(0);
+
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+  using seq_view_t = Kokkos::View<value_type*, exe_space>;
+  seq_view_t seq_view("seq_view", seq_extent);
+  auto seq_view_h = create_mirror_view(Kokkos::HostSpace(), seq_view);
+
+  // when the target sequence is of same size as view, just fill
+  // sequeunce with all values of the view
+  if (seq_extent == data_view_extent) {
+    for (std::size_t i = 0; i < seq_extent; ++i) {
+      seq_view_h(i) = data_view_h(i);
+    }
+  } else {
+    // if target sequence to fill is smaller, then we need to pick
+    // a starting point to copy data from to make the the sequence.
+    // we pick randomly between 0 and data_view_extent - seq_extent.
+    // and fill the sequeunce data with the values copied from data view.
+
+    using dist_type = std::uniform_int_distribution<int>;
+    std::random_device r;
+    // from this:
+    // https://stackoverflow.com/questions/34490599/c11-how-to-set-seed-using-random
+    std::seed_seq seed{r(), r(), r(), r(), r(), r()};
+    std::mt19937 gen(seed);
+    dist_type dist(0, data_view_extent - seq_extent);
+    const auto start = dist(gen);
+    // std::cout << "start= " << start << "\n";
+    for (std::size_t i = 0; i < seq_extent; ++i) {
+      seq_view_h(i) = data_view_h(start + i);
+      // std::cout << "i= " << i << " " << seq_view_h(i) << "\n";
+    }
+  }
+
+  Kokkos::deep_copy(seq_view, seq_view_h);
+  return seq_view;
+}
+
+// search is only avai from c++17, so I have to put it here
+template <class ForwardIt1, class ForwardIt2, class BinaryPredicate>
+ForwardIt1 my_std_search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first,
+                         ForwardIt2 s_last, BinaryPredicate p) {
+  for (;; ++first) {
+    ForwardIt1 it = first;
+    for (ForwardIt2 s_it = s_first;; ++it, ++s_it) {
+      if (s_it == s_last) {
+        return first;
+      }
+      if (it == last) {
+        return last;
+      }
+      if (!p(*it, *s_it)) {
+        break;
+      }
+    }
+  }
+}
+
+// only avai from c++17, so I have to put it here
+template <class ForwardIt1, class ForwardIt2, class BinaryPredicate>
+ForwardIt1 my_std_find_end(ForwardIt1 first, ForwardIt1 last,
+                           ForwardIt2 s_first, ForwardIt2 s_last,
+                           BinaryPredicate p) {
+  if (s_first == s_last) {
+    return last;
+  }
+
+  ForwardIt1 result = last;
+  while (true) {
+    ForwardIt1 new_result = my_std_search(first, last, s_first, s_last, p);
+    if (new_result == last) {
+      break;
+    } else {
+      result = new_result;
+      first  = result;
+      ++first;
+    }
+  }
+  return result;
+}
+
+template <class ForwardIt1, class ForwardIt2>
+ForwardIt1 my_std_find_end(ForwardIt1 first, ForwardIt1 last,
+                           ForwardIt2 s_first, ForwardIt2 s_last) {
+  using value_type1 = typename ForwardIt1::value_type;
+  using value_type2 = typename ForwardIt2::value_type;
+
+  using pred_t = IsEqualFunctor<value_type1, value_type2>;
+  return my_std_find_end(first, last, s_first, s_last, pred_t());
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name, std::size_t seq_ext) {
+  std::cout << "find_end: default predicate: " << name << ", "
+            << "find_end_seq_ext = " << seq_ext << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class Predicate>
+void print_scenario_details(const std::string& name, std::size_t seq_ext,
+                            Predicate pred) {
+  (void)pred;
+  std::cout << "find_end: custom  predicate: " << name << ", "
+            << "find_end_seq_ext = " << seq_ext << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class InfoType, class... Args>
+void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext,
+                         Args... args) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, seq_ext, args...);
+
+  auto view = create_view<ValueType>(Tag{}, view_ext, "find_end_test_view");
+  fill_view(view, name);
+  auto s_view = create_seq(view, seq_ext);
+
+  // run std
+  auto view_h   = create_host_space_copy(view);
+  auto s_view_h = create_host_space_copy(s_view);
+  auto stdrit =
+      my_std_find_end(KE::cbegin(view_h), KE::cend(view_h),
+                      KE::cbegin(s_view_h), KE::cend(s_view_h), args...);
+
+  {
+    auto myrit = KE::find_end(exespace(), KE::cbegin(view), KE::cend(view),
+                              KE::cbegin(s_view), KE::cend(s_view), args...);
+    const auto mydiff  = myrit - KE::cbegin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    // std::cout << "result : " << mydiff << " " << stddiff << std::endl;
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit =
+        KE::find_end("label", exespace(), KE::cbegin(view), KE::cend(view),
+                     KE::cbegin(s_view), KE::cend(s_view), args...);
+    const auto mydiff  = myrit - KE::cbegin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit         = KE::find_end(exespace(), view, s_view, args...);
+    const auto mydiff  = myrit - KE::begin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit = KE::find_end("label", exespace(), view, s_view, args...);
+    const auto mydiff  = myrit - KE::begin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {{"empty", 0},
+                                                        {"one-element-a", 1},
+                                                        {"one-element-b", 1},
+                                                        {"two-elements-a", 2},
+                                                        {"two-elements-b", 2},
+                                                        {"three-elements-a", 3},
+                                                        {"three-elements-b", 3},
+                                                        {"four-elements-a", 4},
+                                                        {"four-elements-b", 4},
+                                                        {"small-a", 11},
+                                                        {"small-b", 13},
+                                                        {"medium-a", 11103},
+                                                        {"medium-b", 21103},
+                                                        {"large-a", 101513},
+                                                        {"large-b", 100111}};
+
+  const std::vector<std::size_t> seq_extents = {
+      0, 1, 2, 3, 4, 5, 8, 11, 15, 31, 113, 523, 1035, 11103};
+
+  // for each scenario we want to run "find_end"
+  // for a set of sequences of various extents
+  for (const auto& it : scenarios) {
+    for (const auto& it2 : seq_extents) {
+      // only run if view is larger or equal than sequence
+      if (it.second >= it2) {
+        run_single_scenario<Tag, ValueType>(it, it2);
+
+        using func_t = IsEqualFunctor<ValueType>;
+        run_single_scenario<Tag, ValueType>(it, it2, func_t());
+      }
+    }
+  }
+}
+
+TEST(std_algorithms_non_mod_seq_ops, find_end) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace FindEnd
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bd6ea0300a6ff80f6d9a0c3af7139668280f6311
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp
@@ -0,0 +1,303 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace FindFirstOf {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-100, 100) { m_gen.seed(1034343); }
+  UnifDist(int a, int b) : m_dist(a, b) { m_gen.seed(514343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "three-elements-a") {
+    v_h(0) = static_cast<value_type>(-1);
+    v_h(1) = static_cast<value_type>(2);
+    v_h(2) = static_cast<value_type>(3);
+  }
+
+  else if (name == "three-elements-b") {
+    v_h(0) = static_cast<value_type>(3);
+    v_h(1) = static_cast<value_type>(1);
+    v_h(2) = static_cast<value_type>(-4);
+  }
+
+  else if (name == "four-elements-a") {
+    v_h(0) = static_cast<value_type>(-1);
+    v_h(1) = static_cast<value_type>(2);
+    v_h(2) = static_cast<value_type>(2);
+    v_h(3) = static_cast<value_type>(4);
+  }
+
+  else if (name == "four-elements-b") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(1);
+    v_h(2) = static_cast<value_type>(1);
+    v_h(3) = static_cast<value_type>(1);
+  }
+
+  else if (name == "small-a") {
+    v_h(0)  = static_cast<value_type>(0);
+    v_h(1)  = static_cast<value_type>(4);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(-1);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(1);
+    v_h(7)  = static_cast<value_type>(2);
+    v_h(8)  = static_cast<value_type>(2);
+    v_h(9)  = static_cast<value_type>(4);
+    v_h(10) = static_cast<value_type>(1);
+  }
+
+  else if (name == "small-b") {
+    v_h(0)  = static_cast<value_type>(1);
+    v_h(1)  = static_cast<value_type>(2);
+    v_h(2)  = static_cast<value_type>(3);
+    v_h(3)  = static_cast<value_type>(1);
+    v_h(4)  = static_cast<value_type>(-1);
+    v_h(5)  = static_cast<value_type>(-2);
+    v_h(6)  = static_cast<value_type>(0);
+    v_h(7)  = static_cast<value_type>(1);
+    v_h(8)  = static_cast<value_type>(2);
+    v_h(9)  = static_cast<value_type>(2);
+    v_h(10) = static_cast<value_type>(5);
+    v_h(11) = static_cast<value_type>(9);
+    v_h(12) = static_cast<value_type>(8);
+  }
+
+  else {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewType>
+auto create_seq_for_find_first_of(ViewType data_view, std::size_t seq_extent) {
+  (void)data_view;
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+  using seq_view_t = Kokkos::View<value_type*, exe_space>;
+  seq_view_t seq_view("seq_view", seq_extent);
+  auto seq_view_h = create_mirror_view(Kokkos::HostSpace(), seq_view);
+
+  UnifDist<value_type> randObj(-10, -10);
+  for (std::size_t i = 0; i < seq_extent; ++i) {
+    seq_view_h(i) = randObj();
+  }
+
+  Kokkos::deep_copy(seq_view, seq_view_h);
+  return seq_view;
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name, std::size_t seq_ext) {
+  std::cout << "find_first_of: default predicate: " << name << ", "
+            << "seach_seq_ext = " << seq_ext << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class Predicate>
+void print_scenario_details(const std::string& name, std::size_t seq_ext,
+                            Predicate pred) {
+  (void)pred;
+  std::cout << "find_first_of: custom  predicate: " << name << ", "
+            << "seach_seq_ext = " << seq_ext << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class InfoType, class... Args>
+void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext,
+                         Args... args) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, seq_ext, args...);
+
+  auto view =
+      create_view<ValueType>(Tag{}, view_ext, "find_first_of_test_view");
+  fill_view(view, name);
+  auto s_view = create_seq_for_find_first_of(view, seq_ext);
+
+  // run std
+  auto view_h   = create_host_space_copy(view);
+  auto s_view_h = create_host_space_copy(s_view);
+  auto stdrit =
+      std::find_first_of(KE::cbegin(view_h), KE::cend(view_h),
+                         KE::cbegin(s_view_h), KE::cend(s_view_h), args...);
+
+  {
+    auto myrit =
+        KE::find_first_of(exespace(), KE::cbegin(view), KE::cend(view),
+                          KE::cbegin(s_view), KE::cend(s_view), args...);
+    const auto mydiff  = myrit - KE::cbegin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit =
+        KE::find_first_of("label", exespace(), KE::cbegin(view), KE::cend(view),
+                          KE::cbegin(s_view), KE::cend(s_view), args...);
+    const auto mydiff  = myrit - KE::cbegin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit         = KE::find_first_of(exespace(), view, s_view, args...);
+    const auto mydiff  = myrit - KE::begin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit = KE::find_first_of("label", exespace(), view, s_view, args...);
+    const auto mydiff  = myrit - KE::begin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {{"empty", 0},
+                                                        {"one-element-a", 1},
+                                                        {"one-element-b", 1},
+                                                        {"two-elements-a", 2},
+                                                        {"two-elements-b", 2},
+                                                        {"three-elements-a", 3},
+                                                        {"three-elements-b", 3},
+                                                        {"four-elements-a", 4},
+                                                        {"four-elements-b", 4},
+                                                        {"small-a", 11},
+                                                        {"small-b", 13},
+                                                        {"medium-a", 11103},
+                                                        {"medium-b", 21103},
+                                                        {"large-a", 101513},
+                                                        {"large-b", 100111}};
+
+  const std::vector<std::size_t> seq_extents = {0,  1,  2,  3,   4,   5,   8,
+                                                11, 20, 31, 113, 523, 1035};
+
+  // for each scenario we want to run "find_first_of"
+  // for a set of sequences of various extents
+  for (const auto& it : scenarios) {
+    for (const auto& it2 : seq_extents) {
+      run_single_scenario<Tag, ValueType>(it, it2);
+
+      using func_t = IsEqualFunctor<ValueType>;
+      run_single_scenario<Tag, ValueType>(it, it2, func_t());
+    }
+  }
+}
+
+TEST(std_algorithms_non_mod_seq_ops, find_first_of) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace FindFirstOf
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c8cec00edc60a50986502f4c8730a60d01c7cf82
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp
@@ -0,0 +1,167 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace ForEach {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void test_for_each(const ViewType view) {
+  using value_t           = typename ViewType::value_type;
+  using view_host_space_t = Kokkos::View<value_t*, Kokkos::HostSpace>;
+
+  view_host_space_t expected("for_each_expected", view.extent(0));
+  compare_views(expected, view);
+
+  const auto mod_functor = IncrementElementWiseFunctor<value_t>();
+
+  // pass view, functor takes non-const ref
+  KE::for_each("label", exespace(), view, mod_functor);
+  std::for_each(KE::begin(expected), KE::end(expected), mod_functor);
+  compare_views(expected, view);
+
+  // pass iterators, functor takes non-const ref
+  KE::for_each(exespace(), KE::begin(view), KE::end(view), mod_functor);
+  std::for_each(KE::begin(expected), KE::end(expected), mod_functor);
+  compare_views(expected, view);
+
+  const auto non_mod_functor = NoOpNonMutableFunctor<value_t>();
+
+  // pass view, functor takes const ref
+  KE::for_each(exespace(), view, non_mod_functor);
+  std::for_each(KE::begin(expected), KE::end(expected), non_mod_functor);
+  compare_views(expected, view);
+
+  // pass const iterators, functor takes const ref
+  KE::for_each(exespace(), KE::cbegin(view), KE::cend(view), non_mod_functor);
+  std::for_each(KE::begin(expected), KE::end(expected), non_mod_functor);
+  compare_views(expected, view);
+
+#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
+  const auto mod_lambda = KOKKOS_LAMBDA(value_t & i) { ++i; };
+
+  // pass view, lambda takes non-const ref
+  KE::for_each(exespace(), view, mod_lambda);
+  std::for_each(KE::begin(expected), KE::end(expected), mod_lambda);
+  compare_views(expected, view);
+
+  // pass iterators, lambda takes non-const ref
+  KE::for_each(exespace(), KE::begin(view), KE::end(view), mod_lambda);
+  std::for_each(KE::begin(expected), KE::end(expected), mod_lambda);
+  compare_views(expected, view);
+
+  const auto non_mod_lambda = KOKKOS_LAMBDA(const value_t& i) { (void)i; };
+
+  // pass view, lambda takes const ref
+  KE::for_each(exespace(), view, non_mod_lambda);
+  std::for_each(KE::cbegin(expected), KE::cend(expected), non_mod_lambda);
+  compare_views(expected, view);
+
+  // pass const iterators, lambda takes const ref
+  KE::for_each(exespace(), KE::cbegin(view), KE::cend(view), non_mod_lambda);
+  std::for_each(KE::cbegin(expected), KE::cend(expected), non_mod_lambda);
+  compare_views(expected, view);
+#endif
+}
+
+// std::for_each_n is C++17, so we cannot compare results directly
+template <class ViewType>
+void test_for_each_n(const ViewType view) {
+  using value_t       = typename ViewType::value_type;
+  const std::size_t n = view.extent(0);
+
+  const auto non_mod_functor = NoOpNonMutableFunctor<value_t>();
+
+  // pass const iterators, functor takes const ref
+  EXPECT_EQ(KE::cbegin(view) + n,
+            KE::for_each_n(exespace(), KE::cbegin(view), n, non_mod_functor));
+  verify_values(value_t{0}, view);
+
+  // pass view, functor takes const ref
+  EXPECT_EQ(KE::begin(view) + n,
+            KE::for_each_n(exespace(), view, n, non_mod_functor));
+  verify_values(value_t{0}, view);
+
+  // pass iterators, functor takes non-const ref
+  const auto mod_functor = IncrementElementWiseFunctor<value_t>();
+  EXPECT_EQ(KE::begin(view) + n,
+            KE::for_each_n(exespace(), KE::begin(view), n, mod_functor));
+  verify_values(value_t{1}, view);
+
+  // pass view, functor takes non-const ref
+  EXPECT_EQ(KE::begin(view) + n,
+            KE::for_each_n("label", exespace(), view, n, mod_functor));
+  verify_values(value_t{2}, view);
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  for (const auto& scenario : default_scenarios) {
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "for_each");
+      test_for_each(view);
+    }
+    {
+      auto view = create_view<ValueType>(Tag{}, scenario.second, "for_each_n");
+      test_for_each_n(view);
+    }
+  }
+}
+
+TEST(std_algorithms_for_each_test, test) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedTwoTag, int>();
+  run_all_scenarios<StridedThreeTag, unsigned>();
+}
+
+}  // namespace ForEach
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ef366c56e78786ff13592e0ea6663e55be845100
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp
@@ -0,0 +1,188 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_STD_ALGOS_HELPERS_FUNCTORS_HPP
+#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_STD_ALGOS_HELPERS_FUNCTORS_HPP
+
+#include <Kokkos_Core.hpp>
+#include <type_traits>
+
+namespace Test {
+namespace stdalgos {
+
+template <class ViewTypeFrom, class ViewTypeTo>
+struct CopyFunctor {
+  ViewTypeFrom m_view_from;
+  ViewTypeTo m_view_to;
+
+  CopyFunctor() = delete;
+
+  CopyFunctor(const ViewTypeFrom view_from, const ViewTypeTo view_to)
+      : m_view_from(view_from), m_view_to(view_to) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int i) const { m_view_to(i) = m_view_from(i); }
+};
+
+template <class ItTypeFrom, class ViewTypeTo>
+struct CopyFromIteratorFunctor {
+  ItTypeFrom m_it_from;
+  ViewTypeTo m_view_to;
+
+  CopyFromIteratorFunctor(const ItTypeFrom it_from, const ViewTypeTo view_to)
+      : m_it_from(it_from), m_view_to(view_to) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int) const { m_view_to() = *m_it_from; }
+};
+
+template <class ValueType>
+struct IncrementElementWiseFunctor {
+  KOKKOS_INLINE_FUNCTION
+  void operator()(ValueType& val) const { ++val; }
+};
+
+template <class ViewType>
+struct FillZeroFunctor {
+  ViewType m_view;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int index) const {
+    m_view(index) = static_cast<typename ViewType::value_type>(0);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  FillZeroFunctor(ViewType viewIn) : m_view(viewIn) {}
+};
+
+template <class ValueType>
+struct NoOpNonMutableFunctor {
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const ValueType& val) const { (void)val; }
+};
+
+template <class ViewType>
+struct AssignIndexFunctor {
+  ViewType m_view;
+
+  AssignIndexFunctor(ViewType view) : m_view(view) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int i) const { m_view(i) = typename ViewType::value_type(i); }
+};
+
+template <class ValueType>
+struct IsEvenFunctor {
+  static_assert(std::is_integral<ValueType>::value,
+                "IsEvenFunctor uses operator%, so ValueType must be int");
+
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType val) const { return (val % 2 == 0); }
+};
+
+template <class ValueType>
+struct IsPositiveFunctor {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType val) const { return (val > 0); }
+};
+
+template <class ValueType>
+struct IsNegativeFunctor {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType val) const { return (val < 0); }
+};
+
+template <class ValueType>
+struct NotEqualsZeroFunctor {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType val) const { return val != 0; }
+};
+
+template <class ValueType>
+struct EqualsValFunctor {
+  const ValueType m_value;
+
+  EqualsValFunctor(ValueType value) : m_value(value) {}
+
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType val) const { return val == m_value; }
+};
+
+template <class ValueType1, class ValueType2>
+struct CustomLessThanComparator {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType1& a, const ValueType2& b) const {
+    return a < b;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const volatile ValueType1& a,
+                  const volatile ValueType1& b) const {
+    return a < b;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  CustomLessThanComparator() {}
+};
+
+template <class ValueType>
+struct CustomEqualityComparator {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType& a, const ValueType& b) const {
+    return a == b;
+  }
+};
+
+template <class ValueType1, class ValueType2 = ValueType1>
+struct IsEqualFunctor {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType1& a, const ValueType2& b) const {
+    return (a == b);
+  }
+};
+
+}  // namespace stdalgos
+}  // namespace Test
+
+#endif
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0f90623a34d2c60eb5c0236f7ab729923f515d4e
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp
@@ -0,0 +1,390 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_Numeric.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace IncScan {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(0.05, 1.2) { m_gen.seed(1034343); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(1, 3) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_zero(ViewType view) {
+  Kokkos::parallel_for(view.extent(0), FillZeroFunctor<ViewType>(view));
+}
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  UnifDist<value_type> randObj;
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i + 1);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+    v_h(5) = static_cast<value_type>(-2);
+  }
+
+  else if (name == "medium-a" || name == "medium-b" || name == "large") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+// my own because std::inclusive_scan is ONLY found with std=c++17
+template <class it1, class it2, class BinOp>
+void my_host_inclusive_scan(it1 first, it1 last, it2 dest, BinOp bop) {
+  if (first != last) {
+    auto init = *first;
+    *dest     = init;
+    while (++first < last) {
+      init      = bop(*first, init);
+      *(++dest) = init;
+    }
+  }
+}
+
+template <class it1, class it2, class BinOp, class ValType>
+void my_host_inclusive_scan(it1 first, it1 last, it2 dest, BinOp bop,
+                            ValType init) {
+  if (first != last) {
+    init  = bop(*first, init);
+    *dest = init;
+    while (++first < last) {
+      init      = bop(*first, init);
+      *(++dest) = init;
+    }
+  }
+}
+
+template <class ViewType1, class ViewType2, class BinaryOp, class... Args>
+void verify_data(ViewType1 data_view,  // contains data
+                 ViewType2 test_view,  // the view to test
+                 BinaryOp bop, Args... args /* copy on purpose */) {
+  //! always careful because views might not be deep copyable
+
+  auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
+  auto data_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
+
+  using gold_view_value_type = typename ViewType2::value_type;
+  Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
+      "goldh", data_view.extent(0));
+  my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
+                         KE::begin(gold_h), bop, args...);
+
+  auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
+  auto test_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
+
+  const auto ext = test_view_h.extent(0);
+  if (ext > 0) {
+    for (std::size_t i = 0; i < ext; ++i) {
+      // std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
+      //           << gold_h(i) << " " << test_view_h(i) << " "
+      //           << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
+
+      if (std::is_same<gold_view_value_type, int>::value) {
+        EXPECT_TRUE(gold_h(i) == test_view_h(i));
+      } else {
+        const auto error = std::abs(gold_h(i) - test_view_h(i));
+        if (error > 1e-10) {
+          std::cout << i << " " << std::setprecision(15) << data_view_h(i)
+                    << " " << gold_h(i) << " " << test_view_h(i) << " "
+                    << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
+        }
+        EXPECT_TRUE(error < 1e-10);
+      }
+    }
+    // std::cout << " last el: " << test_view_h(ext-1) << std::endl;
+  }
+}
+
+template <class ValueType>
+struct MultiplyFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return (a * b);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const volatile ValueType& a,
+                       const volatile ValueType& b) const {
+    return (a * b);
+  }
+};
+
+template <class ValueType>
+struct SumFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return (a + b);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const volatile ValueType& a,
+                       const volatile ValueType& b) const {
+    return (a + b);
+  }
+};
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario_default_op(const InfoType& scenario_info) {
+  using default_op           = SumFunctor<ValueType>;
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "inclusive_scan default op: " << name << ", "
+  //           << view_tag_to_string(Tag{}) << ", "
+  //           << value_type_to_string(ValueType()) << std::endl;
+
+  auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
+  auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
+  fill_view(view_from, name);
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
+                                KE::cend(view_from), KE::begin(view_dest));
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, default_op());
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
+                                KE::cend(view_from), KE::begin(view_dest));
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, default_op());
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::inclusive_scan(exespace(), view_from, view_dest);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, default_op());
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, default_op());
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType, class InfoType, class BinaryOp,
+          class... Args>
+void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop,
+                                   Args... args /* copy on purpose */) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+
+  // if (1 == sizeof...(Args)) {
+  //   std::cout << "inclusive_scan custom op and init value: " << name << ", "
+  //             << view_tag_to_string(Tag{}) << ", "
+  //             << value_type_to_string(ValueType()) << ", " << std::endl;
+  // } else {
+  //   std::cout << "inclusive_scan custom op: " << name << ", "
+  //             << view_tag_to_string(Tag{}) << ", "
+  //             << value_type_to_string(ValueType()) << ", " << std::endl;
+  // }
+
+  auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
+  auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
+  fill_view(view_from, name);
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
+                                KE::cend(view_from), KE::begin(view_dest), bop,
+                                args...);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, bop, args...);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
+                                KE::cend(view_from), KE::begin(view_dest), bop,
+                                args...);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, bop, args...);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::inclusive_scan(exespace(), view_from, view_dest, bop, args...);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, bop, args...);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest, bop,
+                                args...);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, bop, args...);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_inclusive_scan_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element", 1}, {"two-elements-a", 2},
+      {"two-elements-b", 2}, {"small-a", 9},     {"small-b", 13},
+      {"medium-a", 313},     {"medium-b", 1103}, {"large", 10513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario_default_op<Tag, ValueType>(it);
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+    // the sum custom op is always run
+    using sum_binary_op = SumFunctor<ValueType>;
+    sum_binary_op sbop;
+    run_single_scenario_custom_op<Tag, ValueType>(it, sbop);
+    run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{0});
+    run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{1});
+    run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{-2});
+    run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{3});
+
+    // custom multiply only for small views to avoid overflows
+    if (it.first == "small-a" || it.first == "small-b") {
+      using mult_binary_op = MultiplyFunctor<ValueType>;
+      mult_binary_op mbop;
+      run_single_scenario_custom_op<Tag, ValueType>(it, mbop);
+      run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{0});
+      run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{1});
+      run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{-2});
+      run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{3});
+    }
+#endif
+  }
+}
+
+TEST(std_algorithms_numeric_ops_test, inclusive_scan) {
+  run_inclusive_scan_all_scenarios<DynamicTag, double>();
+  run_inclusive_scan_all_scenarios<StridedThreeTag, double>();
+  run_inclusive_scan_all_scenarios<DynamicTag, int>();
+  run_inclusive_scan_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace IncScan
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..acfb4c3f420cb043ce9453897763511947db1efc
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp
@@ -0,0 +1,222 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_SortingOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace IsSorted {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+    v_h(5) = static_cast<value_type>(-15);
+  }
+
+  else if (name == "medium-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+  }
+
+  else if (name == "medium-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+    v_h(4) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "large-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(-100) + static_cast<value_type>(i);
+    }
+  }
+
+  else if (name == "large-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(-100) + static_cast<value_type>(i);
+    }
+    v_h(156) = static_cast<value_type>(-250);
+
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+bool compute_gold(const std::string& name) {
+  if (name == "empty") {
+    return true;
+  } else if (name == "one-element") {
+    return true;
+  } else if (name == "two-elements-a") {
+    return true;
+  } else if (name == "two-elements-b") {
+    return false;
+  } else if (name == "small-a") {
+    return true;
+  } else if (name == "small-b") {
+    return false;
+  } else if (name == "medium-a") {
+    return true;
+  } else if (name == "medium-b") {
+    return false;
+  } else if (name == "large-a") {
+    return true;
+  } else if (name == "large-b") {
+    return false;
+  } else {
+    throw std::runtime_error("invalid choice");
+  }
+}
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+
+  // std::cout << "is-sorted: " << name << ", " << view_tag_to_string(Tag{})
+  //           << std::endl;
+
+  auto view = create_view<ValueType>(Tag{}, view_ext, "is_sorted");
+  fill_view(view, name);
+  const auto gold = compute_gold(name);
+
+  std::vector<bool> resultsA(4);
+  resultsA[0] = KE::is_sorted(exespace(), KE::cbegin(view), KE::cend(view));
+  resultsA[1] =
+      KE::is_sorted("label", exespace(), KE::cbegin(view), KE::cend(view));
+  resultsA[2]     = KE::is_sorted(exespace(), view);
+  resultsA[3]     = KE::is_sorted("label", exespace(), view);
+  const auto allA = std::all_of(resultsA.cbegin(), resultsA.cend(),
+                                [=](bool v) { return v == gold; });
+  EXPECT_TRUE(allA);
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+  CustomLessThanComparator<ValueType, ValueType> comp;
+  std::vector<bool> resultsB(4);
+  resultsB[0] =
+      KE::is_sorted(exespace(), KE::cbegin(view), KE::cend(view), comp);
+  resultsB[1]     = KE::is_sorted("label", exespace(), KE::cbegin(view),
+                              KE::cend(view), comp);
+  resultsB[2]     = KE::is_sorted(exespace(), view, comp);
+  resultsB[3]     = KE::is_sorted("label", exespace(), view, comp);
+  const auto allB = std::all_of(resultsB.cbegin(), resultsB.cend(),
+                                [=](bool v) { return v == gold; });
+  EXPECT_TRUE(allB);
+#endif
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_is_sorted_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element", 1}, {"two-elements-a", 2},
+      {"two-elements-b", 2}, {"small-a", 9},     {"small-b", 13},
+      {"medium-a", 1003},    {"medium-b", 1003}, {"large-a", 101513},
+      {"large-b", 101513}};
+
+  std::cout << "is_sorted: " << view_tag_to_string(Tag{})
+            << ", all overloads \n";
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_sorting_ops_test, is_sorted) {
+  run_is_sorted_all_scenarios<DynamicTag, double>();
+  run_is_sorted_all_scenarios<StridedTwoTag, double>();
+  run_is_sorted_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace IsSorted
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3860fecfc637c39282386b05981adb1a8791697c
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp
@@ -0,0 +1,225 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_SortingOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace IsSortedUntil {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+    v_h(5) = static_cast<value_type>(15);
+  }
+
+  else if (name == "medium-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+  }
+
+  else if (name == "medium-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+    v_h(4) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "large-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(-100) + static_cast<value_type>(i);
+    }
+  }
+
+  else if (name == "large-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(-100) + static_cast<value_type>(i);
+    }
+    v_h(156) = static_cast<value_type>(-250);
+
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewType>
+auto compute_gold(ViewType view, const std::string& name) {
+  if (name == "empty") {
+    return KE::end(view);
+  } else if (name == "one-element") {
+    return KE::end(view);
+  } else if (name == "two-elements-a") {
+    return KE::end(view);
+  } else if (name == "two-elements-b") {
+    return KE::begin(view) + 1;
+  } else if (name == "small-a") {
+    return KE::end(view);
+  } else if (name == "small-b") {
+    return KE::begin(view) + 6;
+  } else if (name == "medium-a") {
+    return KE::end(view);
+  } else if (name == "medium-b") {
+    return KE::begin(view) + 4;
+  } else if (name == "large-a") {
+    return KE::end(view);
+  } else if (name == "large-b") {
+    return KE::begin(view) + 156;
+  } else {
+    throw std::runtime_error("invalid choice");
+  }
+}
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+
+  // std::cout << "is-sorted-until: " << name << ", " <<
+  // view_tag_to_string(Tag{})
+  //           << std::endl;
+
+  auto view = create_view<ValueType>(Tag{}, view_ext, "is_sorted_until");
+  fill_view(view, name);
+  const auto gold = compute_gold(view, name);
+
+  auto r1 = KE::is_sorted_until(exespace(), KE::begin(view), KE::end(view));
+  auto r2 =
+      KE::is_sorted_until("label", exespace(), KE::begin(view), KE::end(view));
+  auto r3 = KE::is_sorted_until(exespace(), view);
+  auto r4 = KE::is_sorted_until("label", exespace(), view);
+  EXPECT_TRUE(r1 == gold);
+  EXPECT_TRUE(r2 == gold);
+  EXPECT_TRUE(r3 == gold);
+  EXPECT_TRUE(r4 == gold);
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+  CustomLessThanComparator<ValueType, ValueType> comp;
+  auto r5 =
+      KE::is_sorted_until(exespace(), KE::cbegin(view), KE::cend(view), comp);
+  auto r6 = KE::is_sorted_until("label", exespace(), KE::cbegin(view),
+                                KE::cend(view), comp);
+  auto r7 = KE::is_sorted_until(exespace(), view, comp);
+  auto r8 = KE::is_sorted_until("label", exespace(), view, comp);
+#endif
+
+  EXPECT_TRUE(r1 == gold);
+  EXPECT_TRUE(r2 == gold);
+  EXPECT_TRUE(r3 == gold);
+  EXPECT_TRUE(r4 == gold);
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_is_sorted_until_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element", 1}, {"two-elements-a", 2},
+      {"two-elements-b", 2}, {"small-a", 9},     {"small-b", 13},
+      {"medium-a", 1003},    {"medium-b", 1003}, {"large-a", 101513},
+      {"large-b", 101513}};
+
+  std::cout << "is_sorted_until: " << view_tag_to_string(Tag{})
+            << ", all overloads \n";
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_sorting_ops_test, is_sorted_until) {
+  run_is_sorted_until_all_scenarios<DynamicTag, double>();
+  run_is_sorted_until_all_scenarios<StridedTwoTag, double>();
+  run_is_sorted_until_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace IsSortedUntil
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8c0c3e4cc8639b36dd80977440a7078a9669cc4f
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp
@@ -0,0 +1,184 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace LexicographicalCompare {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType1, class ViewType2>
+void test_lexicographical_compare(const ViewType1 view_1, ViewType2 view_2) {
+  auto host_copy_1 = create_host_space_copy(view_1);
+  auto host_copy_2 = create_host_space_copy(view_2);
+
+  auto first_1 = KE::begin(view_1);
+  auto last_1  = KE::end(view_1);
+  auto first_2 = KE::begin(view_2);
+  auto last_2  = KE::end(view_2);
+
+  auto h_first_1 = KE::begin(host_copy_1);
+  auto h_last_1  = KE::end(host_copy_1);
+  auto h_first_2 = KE::begin(host_copy_2);
+  auto h_last_2  = KE::end(host_copy_2);
+
+  {
+    // default comparator
+    auto std_result =
+        std::lexicographical_compare(h_first_1, h_last_1, h_first_2, h_last_2);
+
+    // pass iterators
+    EXPECT_EQ(std_result, KE::lexicographical_compare(exespace(), first_1,
+                                                      last_1, first_2, last_2));
+    EXPECT_EQ(std_result,
+              KE::lexicographical_compare("label", exespace(), first_1, last_1,
+                                          first_2, last_2));
+
+    // pass views
+    EXPECT_EQ(std_result,
+              KE::lexicographical_compare(exespace(), view_1, view_2));
+    EXPECT_EQ(std_result,
+              KE::lexicographical_compare("label", exespace(), view_1, view_2));
+  }
+
+  {
+    // custom comparator
+    using value_t_1 = typename ViewType1::value_type;
+    using value_t_2 = typename ViewType2::value_type;
+    const auto custom_comparator =
+        CustomLessThanComparator<value_t_1, value_t_2>();
+    auto std_result = std::lexicographical_compare(
+        h_first_1, h_last_1, h_first_2, h_last_2, custom_comparator);
+
+    // pass iterators
+    EXPECT_EQ(std_result,
+              KE::lexicographical_compare(exespace(), first_1, last_1, first_2,
+                                          last_2, custom_comparator));
+    EXPECT_EQ(std_result,
+              KE::lexicographical_compare("label", exespace(), first_1, last_1,
+                                          first_2, last_2, custom_comparator));
+
+    // pass views
+    EXPECT_EQ(std_result, KE::lexicographical_compare(
+                              exespace(), view_1, view_2, custom_comparator));
+    EXPECT_EQ(std_result,
+              KE::lexicographical_compare("label", exespace(), view_1, view_2,
+                                          custom_comparator));
+  }
+
+  {
+    // empty vs non-empty
+    auto std_result =
+        std::lexicographical_compare(h_first_1, h_first_1, h_first_2, h_last_2);
+    EXPECT_EQ(std_result, KE::lexicographical_compare(
+                              exespace(), first_1, first_1, first_2, last_2));
+  }
+
+  {
+    // pass shorter range
+    if (view_1.extent(0) > 1) {
+      auto std_result = std::lexicographical_compare(h_first_1, h_last_1 - 1,
+                                                     h_first_2, h_last_2);
+      EXPECT_EQ(std_result,
+                KE::lexicographical_compare(exespace(), first_1, last_1 - 1,
+                                            first_2, last_2));
+    }
+  }
+
+  {
+    // first element smaller
+    if (view_1.extent(0) > 1) {
+      KE::fill(exespace(), first_1, first_1 + 1, 1);
+      KE::fill(exespace(), first_2, first_2 + 1, 2);
+
+      EXPECT_TRUE(KE::lexicographical_compare(exespace(), first_1, last_1,
+                                              first_2, last_2));
+    }
+  }
+
+  {
+    // first element bigger, last element smaller
+    if (view_1.extent(0) > 2) {
+      KE::fill(exespace(), first_1, first_1 + 1, 2);
+      KE::fill(exespace(), first_2, first_2 + 1, 1);
+
+      KE::fill(exespace(), last_1 - 1, last_1, 1);
+      KE::fill(exespace(), last_2 - 1, last_2, 2);
+
+      EXPECT_FALSE(KE::lexicographical_compare(exespace(), first_1, last_1,
+                                               first_2, last_2));
+    }
+  }
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  for (const auto& scenario : default_scenarios) {
+    auto view1 = create_view<ValueType>(Tag{}, scenario.second,
+                                        "lexicographical_compare_1");
+    auto view2 = create_view<ValueType>(Tag{}, scenario.second,
+                                        "lexicographical_compare_2");
+
+    test_lexicographical_compare(view1, view2);
+  }
+}
+
+TEST(std_algorithms_lexicographical_compare_test, test) {
+// FIXME: should this disable only custom comparator tests?
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedTwoTag, int>();
+  run_all_scenarios<StridedThreeTag, unsigned>();
+#endif
+}
+
+}  // namespace LexicographicalCompare
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c13cdac0b1d5891e5dacf66cd17db7b99cd44e6c
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp
@@ -0,0 +1,492 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_MinMaxElementOperations.hpp>
+
+namespace KE = Kokkos::Experimental;
+
+namespace Test {
+namespace stdalgos {
+
+struct std_algorithms_min_max_element_test : std_algorithms_test {
+  const int m_number_of_filling_cases = 5;
+
+  void fillFixtureViews(int case_number) {
+    static_view_t tmpView("tmpView");
+    auto tmp_view_h = Kokkos::create_mirror_view(Kokkos::HostSpace(), tmpView);
+    if (case_number == 1) {
+      tmp_view_h(0) = 0;
+      tmp_view_h(1) = 0;
+      tmp_view_h(2) = 0;
+      tmp_view_h(3) = 2;
+      tmp_view_h(4) = 2;
+      tmp_view_h(5) = 1;
+      tmp_view_h(6) = 1;
+      tmp_view_h(7) = 1;
+      tmp_view_h(8) = 1;
+      tmp_view_h(9) = 0;
+    }
+
+    else if (case_number == 2) {
+      tmp_view_h(0) = 1;
+      tmp_view_h(1) = 2;
+      tmp_view_h(2) = 3;
+      tmp_view_h(3) = 4;
+      tmp_view_h(4) = 5;
+      tmp_view_h(5) = 6;
+      tmp_view_h(6) = 7;
+      tmp_view_h(7) = 8;
+      tmp_view_h(8) = 9;
+      tmp_view_h(9) = 10;
+    }
+
+    else if (case_number == 3) {
+      tmp_view_h(0) = 8;
+      tmp_view_h(1) = 8;
+      tmp_view_h(2) = -1;
+      tmp_view_h(3) = -1;
+      tmp_view_h(4) = 5;
+      tmp_view_h(5) = 5;
+      tmp_view_h(6) = 5;
+      tmp_view_h(7) = 8;
+      tmp_view_h(8) = 2;
+      tmp_view_h(9) = 1;
+    }
+
+    else if (case_number == 4) {
+      tmp_view_h(0) = 2;
+      tmp_view_h(1) = 2;
+      tmp_view_h(2) = 2;
+      tmp_view_h(3) = 2;
+      tmp_view_h(4) = 2;
+      tmp_view_h(5) = 2;
+      tmp_view_h(6) = 2;
+      tmp_view_h(7) = 2;
+      tmp_view_h(8) = 2;
+      tmp_view_h(9) = 2;
+    }
+
+    else if (case_number == 5) {
+      tmp_view_h(0) = 1;
+      tmp_view_h(1) = 2;
+      tmp_view_h(2) = 3;
+      tmp_view_h(3) = 4;
+      tmp_view_h(4) = 5;
+      tmp_view_h(5) = 12;
+      tmp_view_h(6) = 5;
+      tmp_view_h(7) = 4;
+      tmp_view_h(8) = 3;
+      tmp_view_h(9) = 2;
+    }
+
+    else {
+    }
+
+    Kokkos::deep_copy(tmpView, tmp_view_h);
+    copyInputViewToFixtureViews(tmpView);
+  }
+
+  Kokkos::pair<int, value_type> goldSolutionMaxElement(int caseNumber) {
+    // returns {indexOfMaxElem, maxValue}
+    if (caseNumber == 1) {
+      return {3, 2};
+    } else if (caseNumber == 2) {
+      return {9, 10};
+    } else if (caseNumber == 3) {
+      return {0, 8};
+    } else if (caseNumber == 4) {
+      return {0, 2};
+    } else if (caseNumber == 5) {
+      return {5, 12};
+    } else {
+      return {};
+    }
+  }
+
+  Kokkos::pair<int, value_type> goldSolutionMinElement(int caseNumber) {
+    // returns {indexOfMinElem, minValue}
+    if (caseNumber == 1) {
+      return {0, 0};
+    } else if (caseNumber == 2) {
+      return {0, 1};
+    } else if (caseNumber == 3) {
+      return {2, -1};
+    } else if (caseNumber == 4) {
+      return {0, 2};
+    } else if (caseNumber == 5) {
+      return {0, 1};
+    } else {
+      return {};
+    }
+  }
+
+  Kokkos::pair<Kokkos::pair<int, value_type>, Kokkos::pair<int, value_type>>
+  goldSolutionMinMaxElement(int caseNumber) {
+    // returns {{indexOfMinElem, minValue}, {indexOfMaxElem, maxValue}}
+    // remember that for min it finds the first smallest element
+    // remember that for max it finds the last biggest element
+    if (caseNumber == 1) {
+      return {{0, 0}, {4, 2}};
+    } else if (caseNumber == 2) {
+      return {{0, 1}, {9, 10}};
+    } else if (caseNumber == 3) {
+      return {{2, -1}, {7, 8}};
+    } else if (caseNumber == 4) {
+      return {{0, 2}, {9, 2}};
+    } else if (caseNumber == 5) {
+      return {{0, 1}, {5, 12}};
+    } else {
+      return {};
+    }
+  }
+
+  template <class ViewType>
+  void test_max_element_non_trivial_data(ViewType view);
+  template <class ViewType>
+  void test_min_element_non_trivial_data(ViewType view);
+  template <class ViewType>
+  void test_minmax_element_non_trivial_data(ViewType view);
+
+  template <class ViewType>
+  void test_max_element_non_trivial_data_custom_comp(ViewType view);
+  template <class ViewType>
+  void test_min_element_non_trivial_data_custom_comp(ViewType view);
+  template <class ViewType>
+  void test_minmax_element_non_trivial_data_custom_comp(ViewType view);
+};
+
+template <class IndexType, class ValueType, class ItType, class TestedViewType>
+void std_algo_min_max_test_verify(Kokkos::pair<IndexType, ValueType> goldPair,
+                                  const ItType result,
+                                  TestedViewType testedView) {
+  // check that iterator is pointing to right element
+  EXPECT_EQ(result - KE::begin(testedView), goldPair.first);
+
+  // create a view for the result to copy into it the iterator's value
+  using result_view_t = Kokkos::View<int>;
+  result_view_t resultView("result");
+  CopyFromIteratorFunctor<ItType, result_view_t> cf(result, resultView);
+  Kokkos::parallel_for("_std_algo_copy", 1, cf);
+  auto result_v_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), resultView);
+
+  // use the host mirror of the result view to check that the values match
+  EXPECT_EQ(result_v_h(), goldPair.second);
+}
+
+template <class GoldSolutionType, class ItType, class TestedViewType>
+void std_algo_min_max_test_verify(const GoldSolutionType& goldSolution,
+                                  const ItType itMin, const ItType itMax,
+                                  TestedViewType testedView) {
+  std_algo_min_max_test_verify(goldSolution.first, itMin, testedView);
+  std_algo_min_max_test_verify(goldSolution.second, itMax, testedView);
+}
+
+template <class ViewType>
+void test_max_element_trivial_data(ViewType view) {
+  /* if we pass empty range, should return last */
+  auto result = KE::max_element(exespace(), KE::cbegin(view), KE::cbegin(view));
+  EXPECT_TRUE(result == KE::cbegin(view));
+
+  /* if we pass empty range, should return last */
+  auto it0     = KE::cbegin(view) + 3;
+  auto it1     = it0;
+  auto result2 = KE::max_element(exespace(), it0, it1);
+  EXPECT_TRUE(result2 == it1);
+}
+
+template <class ViewType>
+void test_min_element_trivial_data(ViewType view) {
+  /* if we pass empty range, should return last */
+  auto result = KE::min_element(exespace(), KE::cbegin(view), KE::cbegin(view));
+  EXPECT_TRUE(result == KE::cbegin(view));
+
+  /* if we pass empty range, should return last */
+  auto it0     = KE::cbegin(view) + 3;
+  auto it1     = it0;
+  auto result2 = KE::min_element(exespace(), it0, it1);
+  EXPECT_TRUE(result2 == it1);
+}
+
+template <class ViewType>
+void test_minmax_element_empty_range(ViewType view) {
+  auto result =
+      KE::minmax_element(exespace(), KE::cbegin(view), KE::cbegin(view));
+  EXPECT_TRUE(result.first == KE::cbegin(view));
+  EXPECT_TRUE(result.second == KE::cbegin(view));
+  auto it0     = KE::cbegin(view) + 3;
+  auto it1     = it0;
+  auto result2 = KE::minmax_element(exespace(), it0, it1);
+  EXPECT_TRUE(result2.first == it1);
+  EXPECT_TRUE(result2.second == it1);
+}
+
+template <class ViewType>
+void std_algorithms_min_max_element_test::test_max_element_non_trivial_data(
+    ViewType view) {
+  for (int id = 1; id <= m_number_of_filling_cases; ++id) {
+    fillFixtureViews(id);
+    const auto gold_solution = goldSolutionMaxElement(id);
+    // API accepting view
+    {
+      const auto result = KE::max_element(exespace(), view);
+      std_algo_min_max_test_verify(gold_solution, result, view);
+      const auto result2 = KE::max_element("MYCUSTOMLABEL1", exespace(), view);
+      std_algo_min_max_test_verify(gold_solution, result2, view);
+    }
+    // API accepting iterators
+    {
+      const auto result =
+          KE::max_element(exespace(), KE::begin(view), KE::end(view));
+      std_algo_min_max_test_verify(gold_solution, result, view);
+      const auto result2 = KE::max_element("MYCUSTOMLABEL2", exespace(),
+                                           KE::begin(view), KE::end(view));
+      std_algo_min_max_test_verify(gold_solution, result2, view);
+    }
+  }
+}
+
+template <class ViewType>
+void std_algorithms_min_max_element_test::test_min_element_non_trivial_data(
+    ViewType view) {
+  for (int id = 1; id <= m_number_of_filling_cases; ++id) {
+    fillFixtureViews(id);
+    const auto goldPair = goldSolutionMinElement(id);
+    // API accepting view
+    {
+      const auto result = KE::min_element(exespace(), view);
+      std_algo_min_max_test_verify(goldPair, result, view);
+      const auto result2 = KE::min_element("MYCUSTOMLABEL1", exespace(), view);
+      std_algo_min_max_test_verify(goldPair, result2, view);
+    }
+    // API accepting iterators
+    {
+      const auto result =
+          KE::min_element(exespace(), KE::begin(view), KE::end(view));
+      std_algo_min_max_test_verify(goldPair, result, view);
+      const auto result2 = KE::min_element("MYCUSTOMLABEL2", exespace(),
+                                           KE::begin(view), KE::end(view));
+      std_algo_min_max_test_verify(goldPair, result2, view);
+    }
+  }
+}
+
+template <class ViewType>
+void std_algorithms_min_max_element_test::test_minmax_element_non_trivial_data(
+    ViewType view) {
+  for (int id = 1; id <= m_number_of_filling_cases; ++id) {
+    fillFixtureViews(id);
+    const auto gold = goldSolutionMinMaxElement(id);
+    {
+      auto result = KE::minmax_element(exespace(), view);
+      std_algo_min_max_test_verify(gold, result.first, result.second, view);
+      const auto result2 =
+          KE::minmax_element("MYCUSTOMLABEL1", exespace(), view);
+      std_algo_min_max_test_verify(gold, result2.first, result2.second, view);
+    }
+    {
+      const auto result =
+          KE::minmax_element(exespace(), KE::begin(view), KE::end(view));
+      std_algo_min_max_test_verify(gold, result.first, result.second, view);
+      const auto result2 = KE::minmax_element("MYCUSTOMLABEL2", exespace(),
+                                              KE::begin(view), KE::end(view));
+      std_algo_min_max_test_verify(gold, result2.first, result2.second, view);
+    }
+  }
+}
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+template <class ViewType>
+void std_algorithms_min_max_element_test::
+    test_max_element_non_trivial_data_custom_comp(ViewType view) {
+  for (int id = 1; id <= m_number_of_filling_cases; ++id) {
+    fillFixtureViews(id);
+    const auto goldPair = goldSolutionMaxElement(id);
+    CustomLessThanComparator<value_type, value_type> comp;
+    // API accepting view
+    {
+      const auto result = KE::max_element(exespace(), view, comp);
+      std_algo_min_max_test_verify(goldPair, result, view);
+      const auto result2 =
+          KE::max_element("MYCUSTOMLABEL3", exespace(), view, comp);
+      std_algo_min_max_test_verify(goldPair, result2, view);
+    }
+    // API accepting iterators
+    {
+      const auto result =
+          KE::max_element(exespace(), KE::begin(view), KE::end(view), comp);
+      std_algo_min_max_test_verify(goldPair, result, view);
+      const auto result2 = KE::max_element(
+          "MYCUSTOMLABEL4", exespace(), KE::begin(view), KE::end(view), comp);
+      std_algo_min_max_test_verify(goldPair, result2, view);
+    }
+  }
+}
+
+template <class ViewType>
+void std_algorithms_min_max_element_test::
+    test_min_element_non_trivial_data_custom_comp(ViewType view) {
+  for (int id = 1; id <= m_number_of_filling_cases; ++id) {
+    fillFixtureViews(id);
+    const auto goldPair = goldSolutionMinElement(id);
+    CustomLessThanComparator<value_type, value_type> comp;
+    // API accepting view
+    {
+      const auto result = KE::min_element(exespace(), view, comp);
+      std_algo_min_max_test_verify(goldPair, result, view);
+      const auto result2 =
+          KE::min_element("MYCUSTOMLABEL3", exespace(), view, comp);
+      std_algo_min_max_test_verify(goldPair, result2, view);
+    }
+    // API accepting iterators
+    {
+      const auto result =
+          KE::min_element(exespace(), KE::begin(view), KE::end(view), comp);
+      std_algo_min_max_test_verify(goldPair, result, view);
+      const auto result2 = KE::min_element(
+          "MYCUSTOMLABEL4", exespace(), KE::begin(view), KE::end(view), comp);
+      std_algo_min_max_test_verify(goldPair, result2, view);
+    }
+  }
+}
+
+template <class ViewType>
+void std_algorithms_min_max_element_test::
+    test_minmax_element_non_trivial_data_custom_comp(ViewType view) {
+  for (int id = 1; id <= m_number_of_filling_cases; ++id) {
+    fillFixtureViews(id);
+    const auto goldPair = goldSolutionMinMaxElement(id);
+    CustomLessThanComparator<value_type, value_type> comp;
+    {
+      const auto result = KE::minmax_element(exespace(), view, comp);
+      std_algo_min_max_test_verify(goldPair, result.first, result.second, view);
+      const auto result2 =
+          KE::minmax_element("MYCUSTOMLABEL3", exespace(), view, comp);
+      std_algo_min_max_test_verify(goldPair, result2.first, result2.second,
+                                   view);
+    }
+    {
+      const auto result =
+          KE::minmax_element(exespace(), KE::begin(view), KE::end(view), comp);
+      std_algo_min_max_test_verify(goldPair, result.first, result.second, view);
+      const auto result2 = KE::minmax_element(
+          "MYCUSTOMLABEL4", exespace(), KE::begin(view), KE::end(view), comp);
+      std_algo_min_max_test_verify(goldPair, result2.first, result2.second,
+                                   view);
+    }
+  }
+}
+#endif
+
+// trivial case
+TEST_F(std_algorithms_min_max_element_test, min_element_empty_range) {
+  test_min_element_trivial_data(m_static_view);
+  test_min_element_trivial_data(m_dynamic_view);
+  test_min_element_trivial_data(m_strided_view);
+}
+
+TEST_F(std_algorithms_min_max_element_test, max_element_empty_range) {
+  test_max_element_trivial_data(m_static_view);
+  test_max_element_trivial_data(m_dynamic_view);
+  test_max_element_trivial_data(m_strided_view);
+}
+
+// non-trivial data
+TEST_F(std_algorithms_min_max_element_test, min_element_non_trivial_data) {
+  test_min_element_non_trivial_data(m_static_view);
+  test_min_element_non_trivial_data(m_dynamic_view);
+  test_min_element_non_trivial_data(m_strided_view);
+}
+
+TEST_F(std_algorithms_min_max_element_test, max_element_non_trivial_data) {
+  test_max_element_non_trivial_data(m_static_view);
+  test_max_element_non_trivial_data(m_dynamic_view);
+  test_max_element_non_trivial_data(m_strided_view);
+}
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+// non-trivial data, custom comp
+TEST_F(std_algorithms_min_max_element_test,
+       min_element_non_trivial_data_custom_comp) {
+  test_min_element_non_trivial_data_custom_comp(m_static_view);
+  test_min_element_non_trivial_data_custom_comp(m_dynamic_view);
+  test_min_element_non_trivial_data_custom_comp(m_strided_view);
+}
+
+TEST_F(std_algorithms_min_max_element_test,
+       max_element_non_trivial_data_custom_comp) {
+  test_max_element_non_trivial_data_custom_comp(m_static_view);
+  test_max_element_non_trivial_data_custom_comp(m_dynamic_view);
+  test_max_element_non_trivial_data_custom_comp(m_strided_view);
+}
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_COMPILER_CLANG) && \
+    (KOKKOS_COMPILER_CLANG >= 1300)
+TEST_F(std_algorithms_min_max_element_test, minmax_element_empty_range) {
+  test_minmax_element_empty_range(m_static_view);
+  test_minmax_element_empty_range(m_dynamic_view);
+  test_minmax_element_empty_range(m_strided_view);
+}
+
+TEST_F(std_algorithms_min_max_element_test, minmax_element_non_trivial_data) {
+  test_minmax_element_non_trivial_data(m_static_view);
+  test_minmax_element_non_trivial_data(m_dynamic_view);
+  test_minmax_element_non_trivial_data(m_strided_view);
+}
+#endif
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+// OpenMPTarget does not yet support custom comparator
+TEST_F(std_algorithms_min_max_element_test,
+       minmax_element_non_trivial_data_custom_comp) {
+  test_minmax_element_non_trivial_data_custom_comp(m_static_view);
+  test_minmax_element_non_trivial_data_custom_comp(m_dynamic_view);
+  test_minmax_element_non_trivial_data_custom_comp(m_strided_view);
+}
+#endif
+
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f13fe071d554599be6735820075da0e848463b81
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp
@@ -0,0 +1,228 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <iterator>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
+#include <algorithm>
+#include <numeric>
+
+namespace Test {
+namespace stdalgos {
+namespace Mismatch {
+
+namespace KE = Kokkos::Experimental;
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(std::size_t ext1, std::size_t ext2,
+                            const std::string& flag) {
+  std::cout << "mismatch: "
+            << "ext1 = " << ext1 << ", "
+            << "ext2 = " << ext2 << ", " << flag << ", "
+            << view_tag_to_string(Tag{}) << ", "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ViewType, class... Args>
+void run_single_scenario(ViewType view1, ViewType view2,
+                         const std::string& flag, Args... args) {
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+  using aux_view_t = Kokkos::View<value_type*, exe_space>;
+
+  const std::size_t ext1 = view1.extent(0);
+  const std::size_t ext2 = view2.extent(0);
+  // print_scenario_details<Tag, value_type>(ext1, ext2, flag);
+
+  aux_view_t aux_view1("aux_view1", ext1);
+  auto v1_h = create_mirror_view(Kokkos::HostSpace(), aux_view1);
+  aux_view_t aux_view2("aux_view2", ext2);
+  auto v2_h = create_mirror_view(Kokkos::HostSpace(), aux_view2);
+
+  // note that the checks ext1>0 and ext2>0 are there
+  // otherwise we get an error for CUDA NVCC DEBUG CI
+
+  // view is is always filled with 8's
+  if (ext1 > 0) {
+    for (std::size_t i = 0; i < ext1; ++i) {
+      v1_h(i) = static_cast<value_type>(8);
+    }
+  }
+
+  if (flag == "fill-to-match") {
+    if (ext2 > 0) {
+      for (std::size_t i = 0; i < ext2; ++i) {
+        v2_h(i) = static_cast<value_type>(8);
+      }
+    }
+  }
+
+  else if (flag == "fill-to-mismatch") {
+    // need to make them mismatch, so we fill
+    // with same value and only modifify the
+    // second view arbitrarily at middle point
+
+    if (ext2 > 0) {
+      for (std::size_t i = 0; i < ext2; ++i) {
+        v2_h(i) = static_cast<value_type>(8);
+      }
+
+      // make them mismatch at middle
+      v2_h(ext2 / 2) = -5;
+    }
+  } else {
+    throw std::runtime_error("Kokkos: stdalgo: test: mismatch: Invalid string");
+  }
+
+  Kokkos::deep_copy(aux_view1, v1_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view1, view1);
+  Kokkos::parallel_for("copy1", view1.extent(0), F1);
+
+  Kokkos::deep_copy(aux_view2, v2_h);
+  CopyFunctor<aux_view_t, ViewType> F2(aux_view2, view2);
+  Kokkos::parallel_for("copy2", view2.extent(0), F2);
+
+  // run the std::mismatch on a host copy of the data
+  auto view1_h         = create_host_space_copy(view1);
+  auto view2_h         = create_host_space_copy(view2);
+  auto f1_h            = KE::cbegin(view1_h);
+  auto l1_h            = KE::cend(view1_h);
+  auto f2_h            = KE::cbegin(view2_h);
+  auto l2_h            = KE::cend(view2_h);
+  auto std_res         = std::mismatch(f1_h, l1_h, f2_h, l2_h, args...);
+  const auto std_diff1 = std_res.first - f1_h;
+  const auto std_diff2 = std_res.second - f2_h;
+
+  {
+    // check our overloads with iterators
+    auto f1      = KE::cbegin(view1);
+    auto l1      = KE::cend(view1);
+    auto f2      = KE::cbegin(view2);
+    auto l2      = KE::cend(view2);
+    auto my_res1 = KE::mismatch(exespace(), f1, l1, f2, l2, args...);
+    auto my_res2 = KE::mismatch("label", exespace(), f1, l1, f2, l2, args...);
+    const auto my_diff11 = my_res1.first - f1;
+    const auto my_diff12 = my_res1.second - f2;
+    const auto my_diff21 = my_res2.first - f1;
+    const auto my_diff22 = my_res2.second - f2;
+    EXPECT_TRUE(my_diff11 == std_diff1);
+    EXPECT_TRUE(my_diff12 == std_diff2);
+    EXPECT_TRUE(my_diff21 == std_diff1);
+    EXPECT_TRUE(my_diff22 == std_diff2);
+  }
+
+  {
+    // check our overloads with views
+    auto my_res1 = KE::mismatch(exespace(), view1, view2, args...);
+    auto my_res2 = KE::mismatch("label", exespace(), view1, view2, args...);
+    const auto my_diff11 = my_res1.first - KE::begin(view1);
+    const auto my_diff12 = my_res1.second - KE::begin(view2);
+    const auto my_diff21 = my_res2.first - KE::begin(view1);
+    const auto my_diff22 = my_res2.second - KE::begin(view2);
+    EXPECT_TRUE(my_diff11 == std_diff1);
+    EXPECT_TRUE(my_diff12 == std_diff2);
+    EXPECT_TRUE(my_diff21 == std_diff1);
+    EXPECT_TRUE(my_diff22 == std_diff2);
+  }
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  using vecs_t = std::vector<std::string>;
+
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},  {"one-element", 1}, {"two-elements", 2},
+      {"small", 11}, {"medium", 21103},  {"large", 101513}};
+
+  for (const auto& scenario : scenarios) {
+    {
+      const std::size_t view1_ext = scenario.second;
+      auto view1 = create_view<ValueType>(Tag{}, view1_ext, "mismatch_view_1");
+
+      // for each view1 scenario, I want to test the case of a
+      // second view that is smaller, equal size and greater than the view1
+      const vecs_t view2cases = (scenario.first != "empty")
+                                    ? vecs_t({"smaller", "equalsize", "larger"})
+                                    : vecs_t({"equalsize", "larger"});
+
+      for (auto it2 : view2cases) {
+        std::size_t view2_ext = view1_ext;
+
+        // modify extent of view2 based on what we want
+        if (std::string(it2) == "smaller") {
+          view2_ext -= 1;
+        } else if (std::string(it2) == "larger") {
+          view2_ext += 3;
+        }
+
+        auto view2 =
+            create_view<ValueType>(Tag{}, view2_ext, "mismatch_view_2");
+
+        // and now we want to test both the case view1 and view2 match,
+        // as well as the case where they don't match
+        for (const auto& it3 : {"fill-to-match", "fill-to-mismatch"}) {
+          // run to use default predicate
+          run_single_scenario<Tag>(view1, view2, it3);
+
+          // run using an arbitrary predicate
+          using predicate_type = IsEqualFunctor<ValueType>;
+          run_single_scenario<Tag>(view1, view2, it3, predicate_type());
+        }
+      }
+    }
+  }
+}
+
+TEST(std_algorithms_mismatch_test, test) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace Mismatch
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..44acb477395ba25ee70fba3d4a33ed856c952158
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp
@@ -0,0 +1,209 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_ModifyingOperations.hpp>
+
+namespace Test {
+namespace stdalgos {
+namespace ModOps {
+
+namespace KE = Kokkos::Experimental;
+
+// ------------
+// move
+// ------------
+struct MyMovableType {
+  int m_value = 11;
+
+  MyMovableType() = default;
+  MyMovableType(MyMovableType&& other) {
+    if (this != &other) {
+      m_value       = other.m_value;
+      other.m_value = -2;
+    }
+  }
+
+  MyMovableType& operator=(MyMovableType&& other) {
+    if (this != &other) {
+      m_value       = other.m_value;
+      other.m_value = -4;
+    }
+    return *this;
+  }
+};
+
+TEST(std_algorithms_mod_ops_test, move) {
+  MyMovableType a;
+  using move_t = decltype(KE::move(a));
+  static_assert(std::is_rvalue_reference<move_t>::value, "");
+
+  // move constr
+  MyMovableType b(KE::move(a));
+  EXPECT_TRUE(b.m_value == 11);
+  EXPECT_TRUE(a.m_value == -2);
+
+  // move assign
+  MyMovableType c;
+  c = KE::move(b);
+  EXPECT_TRUE(c.m_value == 11);
+  EXPECT_TRUE(b.m_value == -4);
+}
+
+template <class ViewType>
+struct StdAlgoModSeqOpsTestMove {
+  ViewType m_view;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int index) const {
+    typename ViewType::value_type a{11};
+    using move_t = decltype(KE::move(a));
+    static_assert(std::is_rvalue_reference<move_t>::value, "");
+    m_view(index) = KE::move(a);
+  }
+
+  StdAlgoModSeqOpsTestMove(ViewType view) : m_view(view) {}
+};
+
+TEST(std_algorithms_mod_ops_test, move_within_parfor) {
+  using view_t = Kokkos::View<double*>;
+  view_t a("a", 10);
+
+  StdAlgoModSeqOpsTestMove<view_t> fnc(a);
+  Kokkos::parallel_for(a.extent(0), fnc);
+  auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
+  for (std::size_t i = 0; i < a.extent(0); ++i) {
+    EXPECT_DOUBLE_EQ(a_h(0), 11.);
+  }
+}
+
+// ------------
+// swap
+// ------------
+TEST(std_algorithms_mod_ops_test, swap) {
+  {
+    int a = 1;
+    int b = 2;
+    KE::swap(a, b);
+    EXPECT_TRUE(a == 2);
+    EXPECT_TRUE(b == 1);
+  }
+
+  {
+    double a = 3.;
+    double b = 1.;
+    KE::swap(a, b);
+    EXPECT_DOUBLE_EQ(a, 1.);
+    EXPECT_DOUBLE_EQ(b, 3.);
+  }
+}
+
+template <class ViewType>
+struct StdAlgoModSeqOpsTestSwap {
+  ViewType m_view;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int index) const {
+    typename ViewType::value_type newval{11};
+    KE::swap(m_view(index), newval);
+  }
+
+  StdAlgoModSeqOpsTestSwap(ViewType aIn) : m_view(aIn) {}
+};
+
+TEST(std_algorithms_mod_ops_test, swap_within_parfor) {
+  auto a = create_view<double>(stdalgos::DynamicTag{}, 10, "a");
+  StdAlgoModSeqOpsTestSwap<decltype(a)> fnc(a);
+  Kokkos::parallel_for(a.extent(0), fnc);
+  auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
+  for (std::size_t i = 0; i < a.extent(0); ++i) {
+    EXPECT_DOUBLE_EQ(a_h(0), 11.);
+  }
+}
+
+// ------------
+// iter_swap
+// ------------
+template <class ViewType>
+void test_iter_swap(ViewType view) {
+  /* fill view */
+  auto F = AssignIndexFunctor<ViewType>(view);
+  Kokkos::parallel_for(view.extent(0), std::move(F));
+
+  /* call iter_swap */
+  auto it1 = KE::begin(view);
+  KE::iter_swap(it1, it1 + 3);
+  KE::iter_swap(it1 + 4, it1 + 6);
+
+  /* check result */
+  using value_type = typename ViewType::value_type;
+  auto a_dc        = create_deep_copyable_compatible_clone(view);
+  auto a_h         = create_mirror_view_and_copy(Kokkos::HostSpace(), a_dc);
+  EXPECT_TRUE(view.extent(0) == 10);
+  EXPECT_TRUE(a_h(0) == value_type(3));
+  EXPECT_TRUE(a_h(1) == value_type(1));
+  EXPECT_TRUE(a_h(2) == value_type(2));
+  EXPECT_TRUE(a_h(3) == value_type(0));
+  EXPECT_TRUE(a_h(4) == value_type(6));
+  EXPECT_TRUE(a_h(5) == value_type(5));
+  EXPECT_TRUE(a_h(6) == value_type(4));
+  EXPECT_TRUE(a_h(7) == value_type(7));
+  EXPECT_TRUE(a_h(8) == value_type(8));
+  EXPECT_TRUE(a_h(9) == value_type(9));
+}
+
+TEST(std_algorithms_mod_ops_test, iter_swap_static_view) {
+  auto a = create_view<double>(stdalgos::DynamicTag{}, 10, "a");
+  test_iter_swap(a);
+
+  auto a1 = create_view<double>(stdalgos::StridedTwoTag{}, 10, "a1");
+  test_iter_swap(a1);
+
+  auto a2 = create_view<double>(stdalgos::StridedThreeTag{}, 10, "a2");
+  test_iter_swap(a2);
+}
+
+}  // namespace ModOps
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3a8883d48ef1064fca68560fcd4155f4d815eac5
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp
@@ -0,0 +1,429 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include "std_algorithms/Kokkos_BeginEnd.hpp"
+
+namespace KE = Kokkos::Experimental;
+
+namespace Test {
+namespace stdalgos {
+
+struct std_algorithms_mod_seq_ops_test : std_algorithms_test {
+ public:
+  virtual void SetUp() {
+    Kokkos::parallel_for(m_static_view.extent(0),
+                         AssignIndexFunctor<static_view_t>(m_static_view));
+  }
+};
+
+//----------------------------------------------------------------------------
+
+TEST_F(std_algorithms_mod_seq_ops_test, copy) {
+  auto result = KE::copy(exespace(), KE::begin(m_static_view),
+                         KE::end(m_static_view), KE::begin(m_strided_view));
+  EXPECT_EQ(KE::end(m_strided_view), result);
+  compare_views(m_static_view, m_strided_view);
+
+  auto result2 = KE::copy(exespace(), KE::begin(m_strided_view),
+                          KE::end(m_strided_view), KE::begin(m_dynamic_view));
+  EXPECT_EQ(KE::end(m_dynamic_view), result2);
+  compare_views(m_dynamic_view, m_strided_view);
+}
+
+TEST_F(std_algorithms_mod_seq_ops_test, copy_view) {
+  EXPECT_EQ(KE::end(m_dynamic_view),
+            KE::copy(exespace(), m_static_view, m_dynamic_view));
+  compare_views(m_static_view, m_dynamic_view);
+
+  EXPECT_EQ(KE::end(m_strided_view),
+            KE::copy(exespace(), m_dynamic_view, m_strided_view));
+  compare_views(m_dynamic_view, m_strided_view);
+}
+
+TEST_F(std_algorithms_mod_seq_ops_test, copy_n) {
+  constexpr std::size_t n = 5;
+  view_host_space_t expected("copy_n_expected");
+  expected(0) = 0;
+  expected(1) = 1;
+  expected(2) = 2;
+  expected(3) = 3;
+  expected(4) = 4;
+  expected(5) = 0;
+  expected(6) = 0;
+  expected(7) = 0;
+  expected(8) = 0;
+  expected(9) = 0;
+
+  // pass iterators
+  auto first = KE::begin(m_static_view);
+  auto dest  = KE::begin(m_dynamic_view);
+  EXPECT_EQ(dest + n, KE::copy_n(exespace(), first, n, dest));
+  compare_views(expected, m_dynamic_view);
+
+  // pass views
+  EXPECT_EQ(KE::begin(m_strided_view) + n,
+            KE::copy_n(exespace(), m_static_view, n, m_strided_view));
+  compare_views(expected, m_strided_view);
+}
+
+TEST_F(std_algorithms_mod_seq_ops_test, copy_backward) {
+  auto first = KE::begin(m_static_view);
+  auto last  = KE::end(m_static_view);
+  auto dest  = KE::end(m_dynamic_view);
+
+  // pass iterators
+  EXPECT_EQ(KE::begin(m_dynamic_view),
+            KE::copy_backward(exespace(), first, last, dest));
+  compare_views(m_static_view, m_dynamic_view);
+
+  // pass views
+  EXPECT_EQ(KE::begin(m_strided_view),
+            KE::copy_backward(exespace(), m_static_view, m_strided_view));
+  compare_views(m_static_view, m_strided_view);
+}
+
+TEST_F(std_algorithms_mod_seq_ops_test, reverse_copy) {
+  view_host_space_t expected("reverse_copy_expected");
+  expected(0) = 9;
+  expected(1) = 8;
+  expected(2) = 7;
+  expected(3) = 6;
+  expected(4) = 5;
+  expected(5) = 4;
+  expected(6) = 3;
+  expected(7) = 2;
+  expected(8) = 1;
+  expected(9) = 0;
+
+  auto first = KE::begin(m_static_view);
+  auto last  = KE::end(m_static_view);
+  auto dest  = KE::begin(m_dynamic_view);
+
+  EXPECT_EQ(KE::end(m_dynamic_view),
+            KE::reverse_copy(exespace(), first, last, dest));
+  compare_views(expected, m_dynamic_view);
+
+  EXPECT_EQ(KE::end(m_strided_view),
+            KE::reverse_copy(exespace(), m_static_view, m_strided_view));
+  compare_views(expected, m_strided_view);
+}
+
+TEST_F(std_algorithms_mod_seq_ops_test, fill) {
+  constexpr auto fill_value = 1.0;
+  view_host_space_t expected("fill_n_expected");
+  expected(0) = 0;
+  expected(1) = 0;
+  expected(2) = 0;
+  expected(3) = 0;
+  expected(4) = 0;
+  expected(5) = 0;
+  expected(6) = 0;
+  expected(7) = 0;
+  expected(8) = fill_value;
+  expected(9) = fill_value;
+
+  // pass iterators
+  KE::fill(exespace(), KE::begin(m_dynamic_view) + 8, KE::end(m_dynamic_view),
+           fill_value);
+  compare_views(expected, m_dynamic_view);
+
+  // pass view
+  KE::fill(exespace(), m_strided_view, fill_value);
+  verify_values(fill_value, m_strided_view);
+}
+
+TEST_F(std_algorithms_mod_seq_ops_test, fill_n) {
+  constexpr auto fill_n_value     = 100.0;
+  constexpr auto fill_n_new_value = 200.0;
+
+  // fill all elements
+  // pass iterator
+  EXPECT_EQ(KE::end(m_static_view),
+            KE::fill_n(exespace(), KE::begin(m_static_view),
+                       m_static_view.extent(0), fill_n_value));
+  verify_values(fill_n_value, m_static_view);
+
+  // pass view
+  EXPECT_EQ(KE::end(m_strided_view),
+            KE::fill_n(exespace(), m_strided_view, m_strided_view.extent(0),
+                       fill_n_value));
+  verify_values(fill_n_value, m_strided_view);
+
+  // fill zero elements
+  // pass view
+  EXPECT_EQ(KE::begin(m_dynamic_view),
+            KE::fill_n(exespace(), m_dynamic_view, 0, fill_n_new_value));
+
+  // fill single element
+  // pass iterator
+  EXPECT_EQ(
+      KE::begin(m_static_view) + 1,
+      KE::fill_n(exespace(), KE::begin(m_static_view), 1, fill_n_new_value));
+
+  view_host_space_t expected("fill_n_expected");
+  expected(0) = fill_n_new_value;
+  expected(1) = fill_n_value;
+  expected(2) = fill_n_value;
+  expected(3) = fill_n_value;
+  expected(4) = fill_n_value;
+  expected(5) = fill_n_value;
+  expected(6) = fill_n_value;
+  expected(7) = fill_n_value;
+  expected(8) = fill_n_value;
+  expected(9) = fill_n_value;
+  compare_views(expected, m_static_view);
+}
+
+struct TransformFunctor {
+  KOKKOS_INLINE_FUNCTION
+  value_type operator()(const value_type& val) const {
+    (void)val;
+    return static_cast<value_type>(-1);
+  }
+};
+
+TEST_F(std_algorithms_mod_seq_ops_test, transform_from_fixture_unary_op) {
+  view_host_space_t gold_source("transform_expected");
+  gold_source(0) = 0;
+  gold_source(1) = 1;
+  gold_source(2) = 2;
+  gold_source(3) = 3;
+  gold_source(4) = 4;
+  gold_source(5) = 5;
+  gold_source(6) = 6;
+  gold_source(7) = 7;
+  gold_source(8) = 8;
+  gold_source(9) = 9;
+
+  // transform static view, store results in dynamic view
+  auto r1 = KE::transform(exespace(), KE::begin(m_static_view),
+                          KE::end(m_static_view), KE::begin(m_dynamic_view),
+                          TransformFunctor());
+  EXPECT_EQ(r1, KE::end(m_dynamic_view));
+  compare_views(gold_source, m_static_view);
+  verify_values(-1., m_dynamic_view);
+
+  // transform dynamic view, store results in strided view
+  auto r2 = KE::transform(exespace(), m_dynamic_view, m_strided_view,
+                          TransformFunctor());
+  EXPECT_EQ(r2, KE::end(m_strided_view));
+  verify_values(-1., m_dynamic_view);
+  verify_values(-1., m_strided_view);
+
+  // transform strided view, store results in static view
+  auto r3 = KE::transform(exespace(), m_strided_view, m_static_view,
+                          TransformFunctor());
+  EXPECT_EQ(r3, KE::end(m_static_view));
+  verify_values(-1., m_static_view);
+  verify_values(-1., m_strided_view);
+}
+
+struct TransformBinaryFunctor {
+  KOKKOS_INLINE_FUNCTION
+  value_type operator()(const value_type& val1, const value_type& val2) const {
+    return val1 + val2;
+  }
+};
+
+TEST_F(std_algorithms_mod_seq_ops_test, transform_from_fixture_binary_op) {
+  view_host_space_t expected("transform_expected");
+  expected(0) = 0;
+  expected(1) = 1;
+  expected(2) = 2;
+  expected(3) = 3;
+  expected(4) = 4;
+  expected(5) = 5;
+  expected(6) = 6;
+  expected(7) = 7;
+  expected(8) = 8;
+  expected(9) = 9;
+
+  auto r1 = KE::transform(exespace(), KE::begin(m_static_view),
+                          KE::end(m_static_view), KE::begin(m_dynamic_view),
+                          KE::begin(m_strided_view), TransformBinaryFunctor());
+  EXPECT_EQ(r1, KE::end(m_strided_view));
+  compare_views(expected, m_strided_view);
+
+  expected(0) = 0;
+  expected(1) = 2;
+  expected(2) = 4;
+  expected(3) = 6;
+  expected(4) = 8;
+  expected(5) = 10;
+  expected(6) = 12;
+  expected(7) = 14;
+  expected(8) = 16;
+  expected(9) = 18;
+  auto r2 = KE::transform("label", exespace(), m_static_view, m_strided_view,
+                          m_dynamic_view, TransformBinaryFunctor());
+  EXPECT_EQ(r2, KE::end(m_dynamic_view));
+  compare_views(expected, m_dynamic_view);
+}
+
+constexpr value_type generated_value = 2.0;
+
+struct GenerateFunctor {
+  KOKKOS_INLINE_FUNCTION
+  value_type operator()() const { return generated_value; }
+};
+
+// cuda illegal instruction error appears for this one:
+// constexpr int generate_f() { return generated_value; }
+
+TEST_F(std_algorithms_mod_seq_ops_test, generate) {
+  // view + functor
+  KE::generate(exespace(), m_static_view, GenerateFunctor());
+  verify_values(generated_value, m_static_view);
+
+  // iterators + functor
+  KE::generate(exespace(), KE::begin(m_strided_view), KE::end(m_strided_view),
+               GenerateFunctor());
+  verify_values(generated_value, m_strided_view);
+}
+
+TEST_F(std_algorithms_mod_seq_ops_test, generate_n) {
+  // iterator + functor
+  EXPECT_EQ(KE::end(m_static_view),
+            KE::generate_n(exespace(), KE::begin(m_static_view),
+                           m_static_view.extent(0), GenerateFunctor()));
+  verify_values(generated_value, m_static_view);
+
+  // view + functor
+  EXPECT_EQ(KE::end(m_dynamic_view),
+            KE::generate_n(exespace(), m_dynamic_view, m_dynamic_view.extent(0),
+                           GenerateFunctor()));
+  verify_values(generated_value, m_dynamic_view);
+
+  // view + functor, negative n
+  EXPECT_EQ(KE::begin(m_strided_view),
+            KE::generate_n(exespace(), m_strided_view, -1, GenerateFunctor()));
+}
+
+// -----------------
+// test swap_ranges
+// -----------------
+template <class ViewType>
+struct StdModOpsSwapRangesFillFunctorA {
+  ViewType m_view;
+  StdModOpsSwapRangesFillFunctorA(ViewType view) : m_view(view) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int i) const { m_view(i) = i; }
+};
+
+template <class ViewType>
+struct StdModOpsSwapRangesFillFunctorB {
+  ViewType m_view;
+  StdModOpsSwapRangesFillFunctorB(ViewType view) : m_view(view) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int i) const { m_view(i) = 100 - i; }
+};
+
+template <class ViewType>
+void test_swap_ranges(ViewType view) {
+  const auto ext = view.extent(0);
+
+  /* fill view_a */
+  auto FA = StdModOpsSwapRangesFillFunctorA<ViewType>(view);
+  Kokkos::parallel_for(ext, std::move(FA));
+
+  /* fill view_b */
+  using static_view_type = std_algorithms_test::static_view_t;
+  static_view_type viewB("viewB");
+  auto FB = StdModOpsSwapRangesFillFunctorB<static_view_type>(viewB);
+  Kokkos::parallel_for(ext, std::move(FB));
+
+  /* call swap_ranges */
+  auto first1 = KE::begin(view) + 2;
+  auto last1  = first1 + 4;
+  auto first2 = KE::begin(viewB) + 1;
+  auto r      = KE::swap_ranges(exespace(), first1, last1, first2);
+  EXPECT_EQ(r, first2 + 4);
+
+  /* check VIEW_A */
+  static_view_type checkViewA("tmp");
+  using cp_func_a_t = CopyFunctor<ViewType, static_view_type>;
+  parallel_for(ext, cp_func_a_t(view, checkViewA));
+  auto cvA_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkViewA);
+  EXPECT_TRUE(cvA_h(0) == 0);
+  EXPECT_TRUE(cvA_h(1) == 1);
+  EXPECT_TRUE(cvA_h(2) == 99);
+  EXPECT_TRUE(cvA_h(3) == 98);
+  EXPECT_TRUE(cvA_h(4) == 97);
+  EXPECT_TRUE(cvA_h(5) == 96);
+  EXPECT_TRUE(cvA_h(6) == 6);
+  EXPECT_TRUE(cvA_h(7) == 7);
+  EXPECT_TRUE(cvA_h(8) == 8);
+  EXPECT_TRUE(cvA_h(9) == 9);
+
+  /* check viewB */
+  static_view_type checkViewB("tmpB");
+  using cp_func_b_t = CopyFunctor<static_view_type, static_view_type>;
+  Kokkos::parallel_for(ext, cp_func_b_t(viewB, checkViewB));
+  auto cvB_h =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkViewB);
+  EXPECT_TRUE(cvB_h(0) == 100);
+  EXPECT_TRUE(cvB_h(1) == 2);
+  EXPECT_TRUE(cvB_h(2) == 3);
+  EXPECT_TRUE(cvB_h(3) == 4);
+  EXPECT_TRUE(cvB_h(4) == 5);
+  EXPECT_TRUE(cvB_h(5) == 95);
+  EXPECT_TRUE(cvB_h(6) == 94);
+  EXPECT_TRUE(cvB_h(7) == 93);
+  EXPECT_TRUE(cvB_h(8) == 92);
+  EXPECT_TRUE(cvB_h(9) == 91);
+}
+
+TEST_F(std_algorithms_mod_seq_ops_test, swap_ranges) {
+  test_swap_ranges(m_static_view);
+  test_swap_ranges(m_dynamic_view);
+  test_swap_ranges(m_strided_view);
+}
+
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..51f74220ce3f0c34f87c0c5d1266664bdc4e93c9
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp
@@ -0,0 +1,716 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_Numeric.hpp>
+
+namespace KE = Kokkos::Experimental;
+
+namespace Test {
+namespace stdalgos {
+
+struct CustomValueType {
+  KOKKOS_INLINE_FUNCTION
+  CustomValueType(){};
+
+  KOKKOS_INLINE_FUNCTION
+  CustomValueType(value_type val) : value(val){};
+
+  KOKKOS_INLINE_FUNCTION
+  CustomValueType(const CustomValueType& other) { this->value = other.value; }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& operator()() { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  const value_type& operator()() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  CustomValueType& operator+=(const CustomValueType& other) {
+    this->value += other.value;
+    return *this;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  CustomValueType& operator=(const CustomValueType& other) {
+    this->value = other.value;
+    return *this;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  CustomValueType operator+(const CustomValueType& other) const {
+    CustomValueType result;
+    result.value = this->value + other.value;
+    return result;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  CustomValueType operator*(const CustomValueType& other) const {
+    CustomValueType result;
+    result.value = this->value * other.value;
+    return result;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  bool operator==(const CustomValueType& other) const {
+    return this->value == other.value;
+  }
+
+  //
+  // volatile overloads needed for the kokkos reductions
+  //
+  // note the void return
+  KOKKOS_INLINE_FUNCTION
+  void operator+=(const volatile CustomValueType& other) volatile {
+    this->value += other.value;
+  }
+
+  // note the void return
+  KOKKOS_INLINE_FUNCTION
+  void operator=(const CustomValueType& other) volatile {
+    this->value = other.value;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  CustomValueType operator+(const volatile CustomValueType& other) const
+      volatile {
+    CustomValueType result;
+    result.value = this->value + other.value;
+    return result;
+  }
+
+ private:
+  value_type value = {};
+};
+
+template <class ValueType>
+struct TimesTwoUnaryTransformFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a) const { return (a * 2.); }
+};
+
+template <class ValueType>
+struct MultiplyAndHalveBinaryTransformFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return (a * b) * 0.5;
+  }
+};
+
+template <class ValueType>
+struct SumJoinFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return a + b;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const volatile ValueType& a,
+                       const volatile ValueType& b) const {
+    return a + b;
+  }
+};
+
+struct std_algorithms_numerics_test : public ::testing::Test {
+  Kokkos::LayoutStride layout{20, 2};
+
+  // value_type
+  using static_view_t  = Kokkos::View<value_type[20]>;
+  using dyn_view_t     = Kokkos::View<value_type*>;
+  using strided_view_t = Kokkos::View<value_type*, Kokkos::LayoutStride>;
+
+  static_view_t m_static_view{"std-algo-test-1D-contiguous-view-static"};
+  dyn_view_t m_dynamic_view{"std-algo-test-1D-contiguous-view-dyn", 20};
+  strided_view_t m_strided_view{"std-algo-test-1D-strided-view", layout};
+
+  // custom scalar (cs)
+  using static_view_cs_t = Kokkos::View<CustomValueType[20]>;
+  using dyn_view_cs_t    = Kokkos::View<CustomValueType*>;
+  using strided_view_cs_t =
+      Kokkos::View<CustomValueType*, Kokkos::LayoutStride>;
+
+  static_view_cs_t m_static_view_cs{
+      "std-algo-test-1D-contiguous-view-static-custom-scalar"};
+  dyn_view_cs_t m_dynamic_view_cs{
+      "std-algo-test-1D-contiguous-view-dyn-custom_scalar", 20};
+  strided_view_cs_t m_strided_view_cs{
+      "std-algo-test-1D-strided-view-custom-scalar", layout};
+
+  template <class ViewFromType, class ViewToType>
+  void copyPodViewToCustom(ViewFromType v_from, ViewToType v_to) {
+    for (std::size_t i = 0; i < v_from.extent(0); ++i) {
+      v_to(i)() = v_from(i);
+    }
+  }
+
+  void fillFixtureViews() {
+    static_view_t tmpView("tmpView");
+    static_view_cs_t tmpViewCs("tmpViewCs");
+    auto tmp_view_h = Kokkos::create_mirror_view(Kokkos::HostSpace(), tmpView);
+    auto tmp_view_cs_h =
+        Kokkos::create_mirror_view(Kokkos::HostSpace(), tmpViewCs);
+    tmp_view_h(0)  = 0.;
+    tmp_view_h(1)  = 0.;
+    tmp_view_h(2)  = 0.;
+    tmp_view_h(3)  = 2.;
+    tmp_view_h(4)  = 2.;
+    tmp_view_h(5)  = 1.;
+    tmp_view_h(6)  = 1.;
+    tmp_view_h(7)  = 1.;
+    tmp_view_h(8)  = 1.;
+    tmp_view_h(9)  = 0.;
+    tmp_view_h(10) = -2.;
+    tmp_view_h(11) = -2.;
+    tmp_view_h(12) = 0.;
+    tmp_view_h(13) = 2.;
+    tmp_view_h(14) = 2.;
+    tmp_view_h(15) = 1.;
+    tmp_view_h(16) = 1.;
+    tmp_view_h(17) = 1.;
+    tmp_view_h(18) = 1.;
+    tmp_view_h(19) = 0.;
+
+    copyPodViewToCustom(tmp_view_h, tmp_view_cs_h);
+
+    Kokkos::deep_copy(tmpView, tmp_view_h);
+    Kokkos::deep_copy(tmpViewCs, tmp_view_cs_h);
+
+    CopyFunctor<static_view_t, static_view_t> F1(tmpView, m_static_view);
+    Kokkos::parallel_for("_std_algo_copy1", 20, F1);
+
+    CopyFunctor<static_view_t, dyn_view_t> F2(tmpView, m_dynamic_view);
+    Kokkos::parallel_for("_std_algo_copy2", 20, F2);
+
+    CopyFunctor<static_view_t, strided_view_t> F3(tmpView, m_strided_view);
+    Kokkos::parallel_for("_std_algo_copy3", 20, F3);
+
+    CopyFunctor<static_view_cs_t, static_view_cs_t> F4(tmpViewCs,
+                                                       m_static_view_cs);
+    Kokkos::parallel_for("_std_algo_copy4", 20, F4);
+
+    CopyFunctor<static_view_cs_t, dyn_view_cs_t> F5(tmpViewCs,
+                                                    m_dynamic_view_cs);
+    Kokkos::parallel_for("_std_algo_copy5", 20, F5);
+
+    CopyFunctor<static_view_cs_t, strided_view_cs_t> F6(tmpViewCs,
+                                                        m_strided_view_cs);
+    Kokkos::parallel_for("_std_algo_copy6", 20, F6);
+  }
+};
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+
+// -------------------------------------------------------------------
+// test default case of transform_reduce
+//
+// test for both POD types and custom scalar types
+// -------------------------------------------------------------------
+template <class ExecutionSpace, class ViewType1, class ViewType2,
+          class ValueType>
+void run_and_check_transform_reduce_default(ViewType1 first_view,
+                                            ViewType2 second_view,
+                                            ValueType init_value,
+                                            ValueType result_value) {
+  // trivial cases
+  const auto r1 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(first_view),
+                                       KE::cbegin(first_view),
+                                       KE::cbegin(second_view), init_value);
+
+  const auto r2 = KE::transform_reduce(
+      "MYLABEL", ExecutionSpace(), KE::cbegin(first_view),
+      KE::cbegin(first_view), KE::cbegin(second_view), init_value);
+  EXPECT_TRUE(r1 == init_value);
+  EXPECT_TRUE(r2 == init_value);
+
+  // non-trivial cases
+  const auto r3 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(first_view),
+                                       KE::cend(first_view),
+                                       KE::cbegin(second_view), init_value);
+
+  const auto r4 = KE::transform_reduce(
+      "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view),
+      KE::cbegin(second_view), init_value);
+
+  const auto r5 = KE::transform_reduce(ExecutionSpace(), first_view,
+                                       second_view, init_value);
+  const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view,
+                                       second_view, init_value);
+
+  EXPECT_TRUE(r3 == result_value);
+  EXPECT_TRUE(r4 == result_value);
+  EXPECT_TRUE(r5 == result_value);
+  EXPECT_TRUE(r6 == result_value);
+}
+
+TEST_F(std_algorithms_numerics_test,
+       transform_reduce_default_functors_using_pod_value_type) {
+  fillFixtureViews();
+  const value_type init0 = 0.;
+  const value_type init5 = 5.;
+  const value_type gold0 = 32.;
+  const value_type gold5 = 37.;
+
+  run_and_check_transform_reduce_default<exespace>(
+      m_static_view, m_dynamic_view, init0, gold0);
+  run_and_check_transform_reduce_default<exespace>(
+      m_static_view, m_dynamic_view, init5, gold5);
+
+  run_and_check_transform_reduce_default<exespace>(
+      m_static_view, m_strided_view, init0, gold0);
+  run_and_check_transform_reduce_default<exespace>(
+      m_static_view, m_strided_view, init5, gold5);
+
+  run_and_check_transform_reduce_default<exespace>(
+      m_dynamic_view, m_strided_view, init0, gold0);
+  run_and_check_transform_reduce_default<exespace>(
+      m_dynamic_view, m_strided_view, init5, gold5);
+}
+
+TEST_F(std_algorithms_numerics_test,
+       transform_reduce_default_functors_using_custom_value_type) {
+  fillFixtureViews();
+  const CustomValueType init0{0.};
+  const CustomValueType init5{5.};
+  const CustomValueType gold0{32.};
+  const CustomValueType gold5{37.};
+
+  run_and_check_transform_reduce_default<exespace>(
+      m_static_view_cs, m_dynamic_view_cs, init0, gold0);
+  run_and_check_transform_reduce_default<exespace>(
+      m_static_view_cs, m_dynamic_view_cs, init5, gold5);
+
+  run_and_check_transform_reduce_default<exespace>(
+      m_static_view_cs, m_strided_view_cs, init0, gold0);
+  run_and_check_transform_reduce_default<exespace>(
+      m_static_view_cs, m_strided_view_cs, init5, gold5);
+
+  run_and_check_transform_reduce_default<exespace>(
+      m_dynamic_view_cs, m_strided_view_cs, init0, gold0);
+  run_and_check_transform_reduce_default<exespace>(
+      m_dynamic_view_cs, m_strided_view_cs, init5, gold5);
+}
+
+// -------------------------------------------------------------------
+// transform_reduce for custom joiner and custom transform op
+// test for both POD types and custom scalar types
+//
+// test overload1 accepting two intervals
+//
+// Note that in the std, the reducer is called BinaryReductionOp
+// but in the Kokkos naming convention, it corresponds to a "joiner"
+// that knows how to join two values.
+// the "joiner" is assumed to be commutative:
+//
+// https://en.cppreference.com/w/cpp/algorithm/transform_reduce
+//
+// -------------------------------------------------------------------
+
+template <class ExecutionSpace, class ViewType1, class ViewType2,
+          class ValueType, class... Args>
+void run_and_check_transform_reduce_overloadA(ViewType1 first_view,
+                                              ViewType2 second_view,
+                                              ValueType init_value,
+                                              ValueType result_value,
+                                              Args&&... args) {
+  // trivial cases
+  const auto r1 = KE::transform_reduce(
+      ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view),
+      KE::cbegin(second_view), init_value, std::forward<Args>(args)...);
+
+  const auto r2 =
+      KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(first_view),
+                           KE::cbegin(first_view), KE::cbegin(second_view),
+                           init_value, std::forward<Args>(args)...);
+
+  EXPECT_TRUE(r1 == init_value);
+  EXPECT_TRUE(r2 == init_value);
+
+  // non trivial cases
+  const auto r3 = KE::transform_reduce(
+      ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view),
+      KE::cbegin(second_view), init_value, std::forward<Args>(args)...);
+
+  const auto r4 = KE::transform_reduce(
+      "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view),
+      KE::cbegin(second_view), init_value, std::forward<Args>(args)...);
+
+  const auto r5 =
+      KE::transform_reduce(ExecutionSpace(), first_view, second_view,
+                           init_value, std::forward<Args>(args)...);
+  const auto r6 =
+      KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view,
+                           init_value, std::forward<Args>(args)...);
+
+  EXPECT_TRUE(r3 == result_value);
+  EXPECT_TRUE(r4 == result_value);
+  EXPECT_TRUE(r5 == result_value);
+  EXPECT_TRUE(r6 == result_value);
+}
+
+TEST_F(std_algorithms_numerics_test,
+       transform_reduce_custom_functors_overloadA_using_pod_value_type) {
+  using joiner_type = SumJoinFunctor<value_type>;
+  using transf_type = MultiplyAndHalveBinaryTransformFunctor<value_type>;
+
+  const value_type init0 = 0.;
+  const value_type init5 = 5.;
+  const value_type gold0 = 16.;
+  const value_type gold5 = 21.;
+
+  fillFixtureViews();
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_static_view, m_dynamic_view, init0, gold0, joiner_type(),
+      transf_type());
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_static_view, m_dynamic_view, init5, gold5, joiner_type(),
+      transf_type());
+
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_static_view, m_strided_view, init0, gold0, joiner_type(),
+      transf_type());
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_static_view, m_strided_view, init5, gold5, joiner_type(),
+      transf_type());
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_dynamic_view, m_strided_view, init0, gold0, joiner_type(),
+      transf_type());
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_dynamic_view, m_strided_view, init5, gold5, joiner_type(),
+      transf_type());
+}
+
+TEST_F(std_algorithms_numerics_test,
+       transform_reduce_custom_functors_overloadA_using_custom_value_type) {
+  using joiner_type = SumJoinFunctor<CustomValueType>;
+  using transf_type = MultiplyAndHalveBinaryTransformFunctor<CustomValueType>;
+
+  const CustomValueType init0{0.};
+  const CustomValueType init5{5.};
+  const CustomValueType gold0{16.};
+  const CustomValueType gold5{21.};
+
+  fillFixtureViews();
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_static_view_cs, m_dynamic_view_cs, init0, gold0, joiner_type(),
+      transf_type());
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_static_view_cs, m_dynamic_view_cs, init5, gold5, joiner_type(),
+      transf_type());
+
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_static_view_cs, m_strided_view_cs, init0, gold0, joiner_type(),
+      transf_type());
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_static_view_cs, m_strided_view_cs, init5, gold5, joiner_type(),
+      transf_type());
+
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_dynamic_view_cs, m_strided_view_cs, init0, gold0, joiner_type(),
+      transf_type());
+  run_and_check_transform_reduce_overloadA<exespace>(
+      m_dynamic_view_cs, m_strided_view_cs, init5, gold5, joiner_type(),
+      transf_type());
+}
+
+// -------------------------------------------------------------------
+// transform_reduce for custom joiner and custom transform op
+// test for both POD types and custom scalar types
+//
+// test overload1 accepting single interval/view
+//
+// Note that in the std, the reducer is called BinaryReductionOp
+// but in the Kokkos naming convention, it corresponds to a "joiner"
+// that knows how to join two values.
+// the "joiner" is assumed to be commutative:
+//
+// https://en.cppreference.com/w/cpp/algorithm/transform_reduce
+//
+// -------------------------------------------------------------------
+
+template <class ExecutionSpace, class ViewType, class ValueType, class... Args>
+void run_and_check_transform_reduce_overloadB(ViewType view,
+                                              ValueType init_value,
+                                              ValueType result_value,
+                                              Args&&... args) {
+  // trivial
+  const auto r1 =
+      KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cbegin(view),
+                           init_value, std::forward<Args>(args)...);
+
+  const auto r2 = KE::transform_reduce("MYLABEL", ExecutionSpace(),
+                                       KE::cbegin(view), KE::cbegin(view),
+                                       init_value, std::forward<Args>(args)...);
+
+  EXPECT_TRUE(r1 == init_value);
+  EXPECT_TRUE(r2 == init_value);
+
+  // non trivial
+  const auto r3 =
+      KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view),
+                           init_value, std::forward<Args>(args)...);
+
+  const auto r4 = KE::transform_reduce("MYLABEL", ExecutionSpace(),
+                                       KE::cbegin(view), KE::cend(view),
+                                       init_value, std::forward<Args>(args)...);
+  const auto r5 = KE::transform_reduce(ExecutionSpace(), view, init_value,
+                                       std::forward<Args>(args)...);
+
+  const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view,
+                                       init_value, std::forward<Args>(args)...);
+
+  EXPECT_TRUE(r3 == result_value);
+  EXPECT_TRUE(r4 == result_value);
+  EXPECT_TRUE(r5 == result_value);
+  EXPECT_TRUE(r6 == result_value);
+}
+
+TEST_F(std_algorithms_numerics_test,
+       transform_reduce_custom_functors_overloadB_using_pod_value_type) {
+  using joiner_type = SumJoinFunctor<value_type>;
+  using transf_type = TimesTwoUnaryTransformFunctor<value_type>;
+
+  const value_type init0 = 0.;
+  const value_type init5 = 5.;
+  const value_type gold0 = 24.;
+  const value_type gold5 = 29.;
+
+  fillFixtureViews();
+  run_and_check_transform_reduce_overloadB<exespace>(
+      m_static_view, init0, gold0, joiner_type(), transf_type());
+  run_and_check_transform_reduce_overloadB<exespace>(
+      m_dynamic_view, init5, gold5, joiner_type(), transf_type());
+  run_and_check_transform_reduce_overloadB<exespace>(
+      m_strided_view, init0, gold0, joiner_type(), transf_type());
+}
+
+TEST_F(std_algorithms_numerics_test,
+       transform_reduce_custom_functors_overloadB_using_custom_value_type) {
+  using joiner_type = SumJoinFunctor<CustomValueType>;
+  using transf_type = TimesTwoUnaryTransformFunctor<CustomValueType>;
+
+  const CustomValueType init0{0.};
+  const CustomValueType init5{5.};
+  const CustomValueType gold0{24.};
+  const CustomValueType gold5{29.};
+
+  fillFixtureViews();
+  run_and_check_transform_reduce_overloadB<exespace>(
+      m_static_view_cs, init0, gold0, joiner_type(), transf_type());
+  run_and_check_transform_reduce_overloadB<exespace>(
+      m_dynamic_view_cs, init5, gold5, joiner_type(), transf_type());
+  run_and_check_transform_reduce_overloadB<exespace>(
+      m_strided_view_cs, init0, gold0, joiner_type(), transf_type());
+}
+
+// -------------------------------------------------------------------
+// test reduce overload1
+//
+// test for both POD types and custom scalar types
+// -------------------------------------------------------------------
+template <class ExecutionSpace, class ViewType, class ValueType>
+void run_and_check_reduce_overloadA(ViewType view, ValueType non_trivial_result,
+                                    ValueType trivial_result) {
+  // trivial cases
+  const auto r1 =
+      KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cbegin(view));
+  const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view),
+                             KE::cbegin(view));
+  EXPECT_TRUE(r1 == trivial_result);
+  EXPECT_TRUE(r2 == trivial_result);
+
+  // non trivial cases
+  const auto r3 =
+      KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view));
+  const auto r4 =
+      KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cend(view));
+  const auto r5 = KE::reduce(ExecutionSpace(), view);
+  const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view);
+
+  EXPECT_TRUE(r3 == non_trivial_result);
+  EXPECT_TRUE(r4 == non_trivial_result);
+  EXPECT_TRUE(r5 == non_trivial_result);
+  EXPECT_TRUE(r6 == non_trivial_result);
+}
+
+TEST_F(std_algorithms_numerics_test,
+       reduce_default_functors_overloadA_using_pod_value_type) {
+  fillFixtureViews();
+  const value_type trivial_gold     = 0.;
+  const value_type non_trivial_gold = 12.;
+  run_and_check_reduce_overloadA<exespace>(m_static_view, non_trivial_gold,
+                                           trivial_gold);
+  run_and_check_reduce_overloadA<exespace>(m_dynamic_view, non_trivial_gold,
+                                           trivial_gold);
+  run_and_check_reduce_overloadA<exespace>(m_strided_view, non_trivial_gold,
+                                           trivial_gold);
+}
+
+TEST_F(std_algorithms_numerics_test,
+       reduce_default_functors_overloadA_using_custom_value_type) {
+  fillFixtureViews();
+  const CustomValueType trivial_gold{0.};
+  const CustomValueType non_trivial_gold{12.};
+  run_and_check_reduce_overloadA<exespace>(m_static_view_cs, non_trivial_gold,
+                                           trivial_gold);
+  run_and_check_reduce_overloadA<exespace>(m_dynamic_view_cs, non_trivial_gold,
+                                           trivial_gold);
+  run_and_check_reduce_overloadA<exespace>(m_strided_view_cs, non_trivial_gold,
+                                           trivial_gold);
+}
+
+// -------------------------------------------------------------------
+// test reduce overload2 with init value
+//
+// test for both POD types and custom scalar types
+// -------------------------------------------------------------------
+template <class ExecutionSpace, class ViewType, class ValueType>
+void run_and_check_reduce_overloadB(ViewType view, ValueType result_value,
+                                    ValueType init_value) {
+  // trivial cases
+  const auto r1 = KE::reduce(ExecutionSpace(), KE::cbegin(view),
+                             KE::cbegin(view), init_value);
+  const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view),
+                             KE::cbegin(view), init_value);
+  EXPECT_TRUE(r1 == init_value);
+  EXPECT_TRUE(r2 == init_value);
+
+  // non trivial cases
+  const auto r3 = KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view),
+                             init_value);
+  const auto r4 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view),
+                             KE::cend(view), init_value);
+  const auto r5 = KE::reduce(ExecutionSpace(), view, init_value);
+  const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view, init_value);
+
+  EXPECT_TRUE(r3 == result_value);
+  EXPECT_TRUE(r4 == result_value);
+  EXPECT_TRUE(r5 == result_value);
+  EXPECT_TRUE(r6 == result_value);
+}
+
+TEST_F(std_algorithms_numerics_test,
+       reduce_default_functors_overloadB_using_pod_value_type) {
+  fillFixtureViews();
+  const value_type init = 5.;
+  const value_type gold = 17.;
+  run_and_check_reduce_overloadB<exespace>(m_static_view, gold, init);
+  run_and_check_reduce_overloadB<exespace>(m_dynamic_view, gold, init);
+  run_and_check_reduce_overloadB<exespace>(m_strided_view, gold, init);
+}
+
+TEST_F(std_algorithms_numerics_test,
+       reduce_default_functors_overloadB_using_custom_value_type) {
+  fillFixtureViews();
+  const CustomValueType init{5.};
+  const CustomValueType gold{17.};
+  run_and_check_reduce_overloadB<exespace>(m_static_view_cs, gold, init);
+  run_and_check_reduce_overloadB<exespace>(m_dynamic_view_cs, gold, init);
+  run_and_check_reduce_overloadB<exespace>(m_strided_view_cs, gold, init);
+}
+
+// -------------------------------------------------------------------
+// test reduce overload3 with init value
+//
+// test for both POD types and custom scalar types
+// -------------------------------------------------------------------
+template <class ExecutionSpace, class ViewType, class ValueType, class BinaryOp>
+void run_and_check_reduce_overloadC(ViewType view, ValueType result_value,
+                                    ValueType init_value, BinaryOp joiner) {
+  // trivial cases
+  const auto r1 = KE::reduce(ExecutionSpace(), KE::cbegin(view),
+                             KE::cbegin(view), init_value, joiner);
+  const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view),
+                             KE::cbegin(view), init_value, joiner);
+  EXPECT_TRUE(r1 == init_value);
+  EXPECT_TRUE(r2 == init_value);
+
+  // non trivial cases
+  const auto r3 = KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view),
+                             init_value, joiner);
+  const auto r4 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view),
+                             KE::cend(view), init_value, joiner);
+  const auto r5 = KE::reduce(ExecutionSpace(), view, init_value, joiner);
+  const auto r6 =
+      KE::reduce("MYLABEL", ExecutionSpace(), view, init_value, joiner);
+
+  EXPECT_TRUE(r3 == result_value);
+  EXPECT_TRUE(r4 == result_value);
+  EXPECT_TRUE(r5 == result_value);
+  EXPECT_TRUE(r6 == result_value);
+}
+
+TEST_F(std_algorithms_numerics_test,
+       reduce_custom_functors_using_pod_value_type) {
+  using joiner_type = SumJoinFunctor<value_type>;
+
+  fillFixtureViews();
+  const value_type init = 5.;
+  const value_type gold = 17.;
+  run_and_check_reduce_overloadC<exespace>(m_static_view, gold, init,
+                                           joiner_type());
+  run_and_check_reduce_overloadC<exespace>(m_dynamic_view, gold, init,
+                                           joiner_type());
+  run_and_check_reduce_overloadC<exespace>(m_strided_view, gold, init,
+                                           joiner_type());
+}
+
+TEST_F(std_algorithms_numerics_test,
+       reduce_custom_functors_using_custom_value_type) {
+  using joiner_type = SumJoinFunctor<CustomValueType>;
+
+  fillFixtureViews();
+  const CustomValueType init{5.};
+  const CustomValueType gold{17.};
+  run_and_check_reduce_overloadC<exespace>(m_static_view_cs, gold, init,
+                                           joiner_type());
+  run_and_check_reduce_overloadC<exespace>(m_dynamic_view_cs, gold, init,
+                                           joiner_type());
+  run_and_check_reduce_overloadC<exespace>(m_strided_view_cs, gold, init,
+                                           joiner_type());
+}
+
+#endif  // not defined KOKKOS_ENABLE_OPENMPTARGET
+
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3fb4f9d15f405c310c2ea4393f1a42e713b8c10d
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp
@@ -0,0 +1,303 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_PartitioningOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace PartitionCopy {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-100, 100) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = value_type{-5} + static_cast<value_type>(i + 1);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(22);
+    }
+  }
+
+  else if (name == "small-c") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(-13);
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewTypeFrom, class ResultType, class ViewTypeDestTrue,
+          class ViewTypeDestFalse, class PredType>
+void verify_data(const std::string& name, ResultType my_result,
+                 ViewTypeFrom view_from, ViewTypeDestTrue view_dest_true,
+                 ViewTypeDestFalse view_dest_false, PredType pred) {
+  using value_type = typename ViewTypeFrom::value_type;
+  static_assert(
+      std::is_same<value_type, typename ViewTypeDestTrue::value_type>::value,
+      "");
+  static_assert(
+      std::is_same<value_type, typename ViewTypeDestFalse::value_type>::value,
+      "");
+
+  const std::size_t ext = view_from.extent(0);
+
+  // create host clone of view_from and run std::partition_copy on it
+  auto view_from_h = create_host_space_copy(view_from);
+  std::vector<value_type> std_vec_true(ext, 0);
+  std::vector<value_type> std_vec_false(ext, 0);
+  auto std_result =
+      std::partition_copy(KE::cbegin(view_from_h), KE::cend(view_from_h),
+                          std_vec_true.begin(), std_vec_false.begin(), pred);
+  const std::size_t std_diff_true  = std_result.first - std_vec_true.begin();
+  const std::size_t std_diff_false = std_result.second - std_vec_false.begin();
+  const std::size_t my_diff_true = my_result.first - KE::begin(view_dest_true);
+  const std::size_t my_diff_false =
+      my_result.second - KE::begin(view_dest_false);
+  EXPECT_TRUE(std_diff_true == my_diff_true);
+  EXPECT_TRUE(std_diff_false == my_diff_false);
+
+  auto view_dest_true_h = create_host_space_copy(view_dest_true);
+  for (std::size_t i = 0; i < std_diff_true; ++i) {
+    EXPECT_TRUE(std_vec_true[i] == view_dest_true_h(i));
+    // std::cout << "i= " << i << " "
+    // 	      << " std_true = " << std_vec_true[i] << " "
+    // 	      << " mine     = " << view_dest_true_h(i) << '\n';
+  }
+
+  auto view_dest_false_h = create_host_space_copy(view_dest_false);
+  for (std::size_t i = 0; i < std_diff_false; ++i) {
+    EXPECT_TRUE(std_vec_false[i] == view_dest_false_h(i));
+    // std::cout << "i= " << i << " "
+    // 	      << " std_false = " << std_vec_false[i] << " "
+    // 	      << " mine     = " << view_dest_false_h(i) << '\n';
+  }
+
+  if (name == "empty") {
+    EXPECT_TRUE(my_diff_true == 0);
+    EXPECT_TRUE(my_diff_false == 0);
+  }
+
+  else if (name == "one-element-a") {
+    EXPECT_TRUE(my_diff_true == 0);
+    EXPECT_TRUE(my_diff_false == 1);
+  }
+
+  else if (name == "one-element-b") {
+    EXPECT_TRUE(my_diff_true == 1);
+    EXPECT_TRUE(my_diff_false == 0);
+  }
+
+  else if (name == "two-elements-a") {
+    EXPECT_TRUE(my_diff_true == 1);
+    EXPECT_TRUE(my_diff_false == 1);
+  }
+
+  else if (name == "two-elements-b") {
+    EXPECT_TRUE(my_diff_true == 1);
+    EXPECT_TRUE(my_diff_false == 1);
+  }
+
+  else if (name == "small-b") {
+    EXPECT_TRUE(my_diff_true == 13);
+    EXPECT_TRUE(my_diff_false == 0);
+  }
+
+  else if (name == "small-c") {
+    EXPECT_TRUE(my_diff_true == 0);
+    EXPECT_TRUE(my_diff_false == 15);
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "partition_copy: " << name << ", " <<
+  // view_tag_to_string(Tag{})
+  //           << ", " << value_type_to_string(ValueType()) << std::endl;
+
+  auto view_from =
+      create_view<ValueType>(Tag{}, view_ext, "partition_copy_from");
+  IsEvenFunctor<ValueType> pred;
+
+  {
+    auto view_dest_true =
+        create_view<ValueType>(Tag{}, view_ext, "partition_copy_dest_true");
+    auto view_dest_false =
+        create_view<ValueType>(Tag{}, view_ext, "partition_copy_dest_false");
+    fill_view(view_from, name);
+    auto result = KE::partition_copy(
+        exespace(), KE::cbegin(view_from), KE::cend(view_from),
+        KE::begin(view_dest_true), KE::begin(view_dest_false), pred);
+    verify_data(name, result, view_from, view_dest_true, view_dest_false, pred);
+  }
+
+  {
+    auto view_dest_true =
+        create_view<ValueType>(Tag{}, view_ext, "partition_copy_dest_true");
+    auto view_dest_false =
+        create_view<ValueType>(Tag{}, view_ext, "partition_copy_dest_false");
+    fill_view(view_from, name);
+    auto result = KE::partition_copy(
+        "my_label", exespace(), KE::cbegin(view_from), KE::cend(view_from),
+        KE::begin(view_dest_true), KE::begin(view_dest_false), pred);
+    verify_data(name, result, view_from, view_dest_true, view_dest_false, pred);
+  }
+
+  {
+    auto view_dest_true =
+        create_view<ValueType>(Tag{}, view_ext, "partition_copy_dest_true");
+    auto view_dest_false =
+        create_view<ValueType>(Tag{}, view_ext, "partition_copy_dest_false");
+    fill_view(view_from, name);
+    auto result = KE::partition_copy(exespace(), view_from, view_dest_true,
+                                     view_dest_false, pred);
+    verify_data(name, result, view_from, view_dest_true, view_dest_false, pred);
+  }
+
+  {
+    auto view_dest_true =
+        create_view<ValueType>(Tag{}, view_ext, "partition_copy_dest_true");
+    auto view_dest_false =
+        create_view<ValueType>(Tag{}, view_ext, "partition_copy_dest_false");
+    fill_view(view_from, name);
+    auto result = KE::partition_copy("my_label", exespace(), view_from,
+                                     view_dest_true, view_dest_false, pred);
+    verify_data(name, result, view_from, view_dest_true, view_dest_false, pred);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},
+      {"one-element-b", 1},  {"two-elements-a", 2},
+      {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"small-c", 15},
+      {"medium", 103}};  //      {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_partitioning_ops, partition_copy) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace PartitionCopy
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..990d41ead6b5689aa90f29d95813c726f0799809
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp
@@ -0,0 +1,258 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_PartitioningOperations.hpp>
+
+namespace KE = Kokkos::Experimental;
+
+namespace Test {
+namespace stdalgos {
+
+struct std_algorithms_partitioning_test : public std_algorithms_test {
+  enum FixtureViews {
+    Mixed,
+    NegativeFirst,
+    AllNegative,
+    AllPositive,
+    NegativeLast,
+    SingleNegative,
+    Count
+  };
+
+  void fillFixtureViews(FixtureViews caseNumber) {
+    static_view_t tmpView("tmpView");
+    auto tmp_view_h = Kokkos::create_mirror_view(Kokkos::HostSpace(), tmpView);
+
+    switch (caseNumber) {
+      case FixtureViews::Mixed:
+        tmp_view_h(0) = -1;
+        tmp_view_h(1) = -2;
+        tmp_view_h(2) = 3;
+        tmp_view_h(3) = -4;
+        tmp_view_h(4) = 5;
+        tmp_view_h(5) = -6;
+        tmp_view_h(6) = 7;
+        tmp_view_h(7) = -8;
+        tmp_view_h(8) = 9;
+        tmp_view_h(9) = 10;
+        break;
+
+      case FixtureViews::NegativeFirst:
+        tmp_view_h(0) = -2;
+        tmp_view_h(1) = -4;
+        tmp_view_h(2) = -6;
+        tmp_view_h(3) = -80;
+        tmp_view_h(4) = 5;
+        tmp_view_h(5) = 7;
+        tmp_view_h(6) = 115;
+        tmp_view_h(7) = 3;
+        tmp_view_h(8) = 9;
+        tmp_view_h(9) = 11;
+        break;
+
+      case FixtureViews::AllNegative:
+        tmp_view_h(0) = -2;
+        tmp_view_h(1) = -4;
+        tmp_view_h(2) = -6;
+        tmp_view_h(3) = -8;
+        tmp_view_h(4) = -4;
+        tmp_view_h(5) = -12;
+        tmp_view_h(6) = -14;
+        tmp_view_h(7) = -2;
+        tmp_view_h(8) = -6;
+        tmp_view_h(9) = -8;
+        break;
+
+      case FixtureViews::AllPositive:
+        tmp_view_h(0) = 11;
+        tmp_view_h(1) = 3;
+        tmp_view_h(2) = 17;
+        tmp_view_h(3) = 9;
+        tmp_view_h(4) = 3;
+        tmp_view_h(5) = 11;
+        tmp_view_h(6) = 13;
+        tmp_view_h(7) = 1;
+        tmp_view_h(8) = 9;
+        tmp_view_h(9) = 43;
+        break;
+
+      case FixtureViews::NegativeLast:
+        tmp_view_h(0) = 1;
+        tmp_view_h(1) = 11;
+        tmp_view_h(2) = 1;
+        tmp_view_h(3) = 33;
+        tmp_view_h(4) = 3;
+        tmp_view_h(5) = 3;
+        tmp_view_h(6) = -3;
+        tmp_view_h(7) = -5;
+        tmp_view_h(8) = -5;
+        tmp_view_h(9) = -10;
+        break;
+
+      case FixtureViews::SingleNegative:
+        tmp_view_h(0) = -200;
+        tmp_view_h(1) = 1;
+        tmp_view_h(2) = 1;
+        tmp_view_h(3) = 3;
+        tmp_view_h(4) = 3;
+        tmp_view_h(5) = 211;
+        tmp_view_h(6) = 3;
+        tmp_view_h(7) = 5;
+        tmp_view_h(8) = 5;
+        tmp_view_h(9) = 11;
+        break;
+
+      default: break;
+    }
+
+    Kokkos::deep_copy(tmpView, tmp_view_h);
+    copyInputViewToFixtureViews(tmpView);
+  }
+
+  bool goldSolutionIsPartitioned(FixtureViews caseNumber) const {
+    switch (caseNumber) {
+      case Mixed: return false;
+      case NegativeFirst: return true;
+      case AllNegative: return true;
+      case AllPositive: return false;
+      case NegativeLast: return false;
+      case SingleNegative: return true;
+      default: return false;
+    }
+  }
+
+  int goldSolutionPartitionedPoint(FixtureViews caseNumber) const {
+    switch (caseNumber) {
+      case Mixed: return 2;
+      case NegativeFirst: return 4;
+      case AllNegative: return 10;
+      case AllPositive: return 0;
+      case NegativeLast: return 0;
+      case SingleNegative: return 1;
+      default: return -1;
+    }
+  }
+};
+
+TEST_F(std_algorithms_partitioning_test, is_partitioned_trivial) {
+  IsNegativeFunctor<value_type> p;
+  const auto result1 = KE::is_partitioned(exespace(), KE::cbegin(m_static_view),
+                                          KE::cbegin(m_static_view), p);
+  EXPECT_EQ(true, result1);
+
+  const auto result2 = KE::is_partitioned(
+      exespace(), KE::cbegin(m_dynamic_view), KE::cbegin(m_dynamic_view), p);
+  EXPECT_EQ(true, result2);
+
+  const auto result3 = KE::is_partitioned(
+      exespace(), KE::cbegin(m_strided_view), KE::cbegin(m_strided_view), p);
+  EXPECT_EQ(true, result3);
+}
+
+TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_iterators) {
+  const IsNegativeFunctor<value_type> p;
+
+  for (int id = 0; id < FixtureViews::Count; ++id) {
+    fillFixtureViews(static_cast<FixtureViews>(id));
+    const bool goldBool =
+        goldSolutionIsPartitioned(static_cast<FixtureViews>(id));
+    const auto result1 = KE::is_partitioned(
+        exespace(), KE::cbegin(m_static_view), KE::cend(m_static_view), p);
+    EXPECT_EQ(goldBool, result1);
+
+    const auto result2 = KE::is_partitioned(
+        exespace(), KE::cbegin(m_dynamic_view), KE::cend(m_dynamic_view), p);
+    EXPECT_EQ(goldBool, result2);
+
+    const auto result3 = KE::is_partitioned(
+        exespace(), KE::cbegin(m_strided_view), KE::cend(m_strided_view), p);
+    EXPECT_EQ(goldBool, result3);
+  }
+}
+
+TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_view) {
+  const IsNegativeFunctor<value_type> p;
+
+  for (int id = 0; id < FixtureViews::Count; ++id) {
+    fillFixtureViews(static_cast<FixtureViews>(id));
+    const bool goldBool =
+        goldSolutionIsPartitioned(static_cast<FixtureViews>(id));
+    const auto result1 = KE::is_partitioned(exespace(), m_static_view, p);
+    EXPECT_EQ(goldBool, result1);
+
+    const auto result2 = KE::is_partitioned(exespace(), m_dynamic_view, p);
+    EXPECT_EQ(goldBool, result2);
+
+    const auto result3 = KE::is_partitioned(exespace(), m_strided_view, p);
+    EXPECT_EQ(goldBool, result3);
+  }
+}
+
+TEST_F(std_algorithms_partitioning_test, partition_point) {
+  const IsNegativeFunctor<value_type> p;
+
+  for (int id = 0; id < FixtureViews::Count; ++id) {
+    fillFixtureViews(static_cast<FixtureViews>(id));
+    const auto goldIndex =
+        goldSolutionPartitionedPoint(static_cast<FixtureViews>(id));
+    auto first1        = KE::cbegin(m_static_view);
+    auto last1         = KE::cend(m_static_view);
+    const auto result1 = KE::partition_point(exespace(), first1, last1, p);
+    EXPECT_EQ(goldIndex, result1 - first1);
+
+    auto first2        = KE::cbegin(m_dynamic_view);
+    auto last2         = KE::cend(m_dynamic_view);
+    const auto result2 = KE::partition_point(exespace(), first2, last2, p);
+    EXPECT_EQ(goldIndex, result2 - first2);
+
+    auto first3        = KE::cbegin(m_strided_view);
+    auto last3         = KE::cend(m_strided_view);
+    const auto result3 = KE::partition_point(exespace(), first3, last3, p);
+    EXPECT_EQ(goldIndex, result3 - first3);
+  }
+}
+
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0cd931d87288848ef429805df1da23758c4e1085
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp
@@ -0,0 +1,234 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace Remove {
+
+namespace KE = Kokkos::Experimental;
+
+constexpr int match_value = 4;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-100, 100) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(match_value);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(match_value);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(match_value);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i + 1);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 2 == 0) {
+        v_h(i) = static_cast<value_type>(match_value);
+      } else {
+        v_h(i) = static_cast<value_type>(-12);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 8 == 0) {
+        v_h(i) = static_cast<value_type>(match_value);
+      } else {
+        v_h(i) = randObj();
+      }
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewTypeData, class ViewTypeTest, class MyItResult>
+void verify_data(ViewTypeData view_data_h, ViewTypeTest view_test,
+                 MyItResult my_result) {
+  // run std::remove
+  auto std_result =
+      std::remove(KE::begin(view_data_h), KE::end(view_data_h), match_value);
+
+  // check that returned iterators are correct
+  const std::size_t std_diff = std_result - KE::begin(view_data_h);
+  const std::size_t my_diff  = my_result - KE::begin(view_test);
+  EXPECT_TRUE(std_diff == my_diff);
+
+  // check the actual data after algo has been applied
+  auto view_test_h = create_host_space_copy(view_test);
+  for (std::size_t i = 0; i < my_diff; ++i) {
+    EXPECT_TRUE(view_test_h(i) == view_data_h[i]);
+    // std::cout << "i= " << i << " "
+    // 	      << "mine: " << view_test_h(i) << " "
+    // 	      << "std: " << view_data_h(i)
+    // 	      << '\n';
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "remove: " << name << ", " << view_tag_to_string(Tag{}) << ",
+  // "
+  //           << value_type_to_string(ValueType()) << std::endl;
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "remove_from");
+    fill_view(view, name);
+    // make host copy BEFORE running algo
+    auto data_h = create_host_space_copy(view);
+    auto rit    = KE::remove(exespace(), KE::begin(view), KE::end(view),
+                          (ValueType)match_value);
+    verify_data(data_h, view, rit);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "remove_from");
+    fill_view(view, name);
+    // make host copy BEFORE running algo
+    auto data_h = create_host_space_copy(view);
+    auto rit = KE::remove("label", exespace(), KE::begin(view), KE::end(view),
+                          (ValueType)match_value);
+    verify_data(data_h, view, rit);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "remove_from");
+    fill_view(view, name);
+    // make host copy BEFORE running algo
+    auto data_h = create_host_space_copy(view);
+    auto rit    = KE::remove(exespace(), view, (ValueType)match_value);
+    verify_data(data_h, view, rit);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "remove_from");
+    fill_view(view, name);
+    // make host copy BEFORE running algo
+    auto data_h = create_host_space_copy(view);
+    auto rit    = KE::remove("label", exespace(), view, (ValueType)match_value);
+    verify_data(data_h, view, rit);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 13031},     {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, remove) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace Remove
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0b2de8147c3751e34d1ba19734b2d53c28fd4225
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp
@@ -0,0 +1,265 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace RemoveCopy {
+
+namespace KE = Kokkos::Experimental;
+
+constexpr int match_value = 4;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-100, 100) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-100., 100.) { m_gen.seed(341043); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(match_value);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(match_value);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(match_value);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i + 1);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 2 == 0) {
+        v_h(i) = static_cast<value_type>(match_value);
+      } else {
+        v_h(i) = static_cast<value_type>(-12);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 8 == 0) {
+        v_h(i) = static_cast<value_type>(match_value);
+      } else {
+        v_h(i) = randObj();
+      }
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewFromType, class ViewDestType, class MyItResult>
+void verify_data(ViewFromType view_from, ViewDestType view_dest,
+                 MyItResult my_result) {
+  // make a host copy of the view_from
+  auto view_from_h      = create_host_space_copy(view_from);
+  const std::size_t ext = view_from_h.extent(0);
+  using value_type      = typename ViewFromType::value_type;
+
+  // run std::remove_copy
+  std::vector<value_type> gold_dest_std(ext);
+  auto std_result =
+      std::remove_copy(KE::cbegin(view_from_h), KE::cend(view_from_h),
+                       gold_dest_std.begin(), (value_type)match_value);
+
+  // check that returned iterators are correct
+  const std::size_t std_diff = std_result - gold_dest_std.begin();
+  const std::size_t my_diff  = my_result - KE::begin(view_dest);
+  EXPECT_TRUE(std_diff == my_diff);
+
+  // check the actual data after algo has been applied
+  auto view_dest_h = create_host_space_copy(view_dest);
+  for (std::size_t i = 0; i < my_diff; ++i) {
+    EXPECT_TRUE(view_dest_h(i) == gold_dest_std[i]);
+    // std::cout << "i= " << i << " "
+    // 	      << "mine: " << view_dest_h(i) << " "
+    // 	      << "std: " << gold_dest_std[i]
+    // 	      << '\n';
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "remove_copy: " << name << ", " << view_tag_to_string(Tag{})
+  //           << ", " << value_type_to_string(ValueType()) << std::endl;
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_view_from");
+    fill_view(view_from, name);
+
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_view_dest");
+    auto rit =
+        KE::remove_copy(exespace(), KE::cbegin(view_from), KE::cend(view_from),
+                        KE::begin(view_dest), (ValueType)match_value);
+    verify_data(view_from, view_dest, rit);
+  }
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_view_from");
+    fill_view(view_from, name);
+
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_view_dest");
+    auto rit = KE::remove_copy("label", exespace(), KE::cbegin(view_from),
+                               KE::cend(view_from), KE::begin(view_dest),
+                               (ValueType)match_value);
+    verify_data(view_from, view_dest, rit);
+  }
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_view_from");
+    fill_view(view_from, name);
+
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_view_dest");
+    auto rit = KE::remove_copy(exespace(), view_from, view_dest,
+                               (ValueType)match_value);
+    verify_data(view_from, view_dest, rit);
+  }
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_view_from");
+    fill_view(view_from, name);
+
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_view_dest");
+    auto rit = KE::remove_copy("label", exespace(), view_from, view_dest,
+                               (ValueType)match_value);
+    verify_data(view_from, view_dest, rit);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 13031},     {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, remove_copy) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace RemoveCopy
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0c20b6b0a727b8163320e0694b4241fe313b1fd4
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp
@@ -0,0 +1,247 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace RemoveCopyIf {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-100, 100) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i + 1);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 2 == 0) {
+        v_h(i) = static_cast<value_type>(22);
+      } else {
+        v_h(i) = static_cast<value_type>(-12);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewTypeFrom, class ViewTypeDest, class MyItResult,
+          class PredicateType>
+void verify_data(ViewTypeFrom view_from, ViewTypeDest view_dest,
+                 MyItResult my_result, PredicateType pred) {
+  // make a host copy of the view_from
+  auto view_from_h      = create_host_space_copy(view_from);
+  const std::size_t ext = view_from_h.extent(0);
+  using value_type      = typename ViewTypeFrom::value_type;
+
+  // run std::remove_copy_if
+  std::vector<value_type> gold_dest_std(ext);
+  auto std_result =
+      std::remove_copy_if(KE::cbegin(view_from_h), KE::cend(view_from_h),
+                          gold_dest_std.begin(), pred);
+
+  // check that returned iterators are correct
+  const std::size_t std_diff = std_result - gold_dest_std.begin();
+  const std::size_t my_diff  = my_result - KE::begin(view_dest);
+  EXPECT_TRUE(std_diff == my_diff);
+
+  // check the actual data after algo has been applied
+  auto view_dest_h = create_host_space_copy(view_dest);
+  for (std::size_t i = 0; i < my_diff; ++i) {
+    EXPECT_TRUE(view_dest_h(i) == gold_dest_std[i]);
+    // std::cout << "i= " << i << " "
+    // 	      << "mine: " << view_dest_h(i) << " "
+    // 	      << "std: " << gold_dest_std[i]
+    // 	      << '\n';
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "remove_copy_if: " << name << ", " <<
+  // view_tag_to_string(Tag{})
+  //           << ", " << value_type_to_string(ValueType()) << std::endl;
+
+  using pred_type = IsEvenFunctor<ValueType>;
+  pred_type remove_if_even;
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_if_view_from");
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_if_view_dest");
+    fill_view(view_from, name);
+    auto rit = KE::remove_copy_if(exespace(), KE::cbegin(view_from),
+                                  KE::cend(view_from), KE::begin(view_dest),
+                                  remove_if_even);
+    verify_data(view_from, view_dest, rit, remove_if_even);
+  }
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_if_view_from");
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_if_view_dest");
+    fill_view(view_from, name);
+    auto rit = KE::remove_copy_if("label", exespace(), KE::cbegin(view_from),
+                                  KE::cend(view_from), KE::begin(view_dest),
+                                  remove_if_even);
+    verify_data(view_from, view_dest, rit, remove_if_even);
+  }
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_if_view_from");
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_if_view_dest");
+    fill_view(view_from, name);
+    auto rit =
+        KE::remove_copy_if(exespace(), view_from, view_dest, remove_if_even);
+    verify_data(view_from, view_dest, rit, remove_if_even);
+  }
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_if_view_from");
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "remove_copy_if_view_dest");
+    fill_view(view_from, name);
+    auto rit = KE::remove_copy_if("label", exespace(), view_from, view_dest,
+                                  remove_if_even);
+    verify_data(view_from, view_dest, rit, remove_if_even);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 23103},     {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, remove_copy_if) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace RemoveCopyIf
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..05c86690a28689619e4c83efd92afbe5400ad463
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp
@@ -0,0 +1,231 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace RemoveIf {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-100, 100) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i + 1);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 2 == 0) {
+        v_h(i) = static_cast<value_type>(22);
+      } else {
+        v_h(i) = static_cast<value_type>(-12);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewTypeData, class ViewTypeTest, class MyItResult,
+          class PredicateType>
+void verify_data(ViewTypeData view_data_h, ViewTypeTest view_test,
+                 MyItResult my_result, PredicateType pred) {
+  // run std::remove_if
+  auto std_result =
+      std::remove_if(KE::begin(view_data_h), KE::end(view_data_h), pred);
+
+  // check that returned iterators are correct
+  const std::size_t std_diff = std_result - KE::begin(view_data_h);
+  const std::size_t my_diff  = my_result - KE::begin(view_test);
+  EXPECT_TRUE(std_diff == my_diff);
+
+  // check the actual data after algo has been applied
+  auto view_test_h = create_host_space_copy(view_test);
+  for (std::size_t i = 0; i < my_diff; ++i) {
+    EXPECT_TRUE(view_test_h(i) == view_data_h[i]);
+    // std::cout << "i= " << i << " "
+    // 	      << "mine: " << view_test_h(i) << " "
+    // 	      << "std: " << view_data_h(i)
+    // 	      << '\n';
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "remove_if: " << name << ", " << view_tag_to_string(Tag{})
+  //           << ", " << value_type_to_string(ValueType()) << std::endl;
+
+  using pred_type = IsEvenFunctor<ValueType>;
+  pred_type remove_if_even;
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "remove_if_view");
+    fill_view(view, name);
+    // make host copy BEFORE running algo
+    auto data_h = create_host_space_copy(view);
+    auto rit    = KE::remove_if(exespace(), KE::begin(view), KE::end(view),
+                             remove_if_even);
+    verify_data(data_h, view, rit, remove_if_even);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "remove_if_view");
+    fill_view(view, name);
+    // make host copy BEFORE running algo
+    auto data_h = create_host_space_copy(view);
+    auto rit    = KE::remove_if("label", exespace(), KE::begin(view),
+                             KE::end(view), remove_if_even);
+    verify_data(data_h, view, rit, remove_if_even);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "remove_if_view");
+    fill_view(view, name);
+    // make host copy BEFORE running algo
+    auto data_h = create_host_space_copy(view);
+    auto rit    = KE::remove_if(exespace(), view, remove_if_even);
+    verify_data(data_h, view, rit, remove_if_even);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "remove_if_view");
+    fill_view(view, name);
+    // make host copy BEFORE running algo
+    auto data_h = create_host_space_copy(view);
+    auto rit    = KE::remove_if("label", exespace(), view, remove_if_even);
+    verify_data(data_h, view, rit, remove_if_even);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 23103},     {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, remove_if) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace RemoveIf
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..14ee73376957c3caa272bae1814a958a5a701bba
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp
@@ -0,0 +1,255 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace Replace {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = value_type{-5} + static_cast<value_type>(i + 1);
+    }
+    v_h(0) = static_cast<value_type>(2);
+    v_h(3) = static_cast<value_type>(2);
+    v_h(5) = static_cast<value_type>(2);
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i < 4) {
+        v_h(i) = static_cast<value_type>(-1);
+      } else {
+        v_h(i) = static_cast<value_type>(2);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 2 == 0) {
+        v_h(i) = static_cast<value_type>(-1);
+      } else {
+        v_h(i) = static_cast<value_type>(2);
+      }
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewType1, class ValueType>
+void verify_data(const std::string& name, ViewType1 test_view,
+                 ValueType new_value) {
+  //! always careful because views might not be deep copyable
+  auto view_dc = create_deep_copyable_compatible_clone(test_view);
+  auto view_h  = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    EXPECT_TRUE(view_h(0) == ValueType{1});
+  }
+
+  else if (name == "one-element-b") {
+    EXPECT_TRUE(view_h(0) == new_value);
+  }
+
+  else if (name == "two-elements-a") {
+    EXPECT_TRUE(view_h(0) == ValueType{1});
+    EXPECT_TRUE(view_h(1) == new_value);
+  }
+
+  else if (name == "two-elements-b") {
+    EXPECT_TRUE(view_h(0) == new_value);
+    EXPECT_TRUE(view_h(1) == ValueType{-1});
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < view_h.extent(0); ++i) {
+      if (i == 0 || i == 3 || i == 5 || i == 6) {
+        EXPECT_TRUE(view_h(i) == new_value);
+      } else {
+        const auto gold = ValueType{-5} + static_cast<ValueType>(i + 1);
+        EXPECT_TRUE(view_h(i) == gold);
+      }
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < view_h.extent(0); ++i) {
+      if (i < 4) {
+        EXPECT_TRUE(view_h(i) == ValueType{-1});
+      } else {
+        EXPECT_TRUE(view_h(i) == new_value);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < view_h.extent(0); ++i) {
+      if (i % 2 == 0) {
+        EXPECT_TRUE(view_h(i) == ValueType{-1});
+      } else {
+        EXPECT_TRUE(view_h(i) == new_value);
+      }
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "replace: " << name << ", " << view_tag_to_string(Tag{}) << ",
+  // "
+  //           << value_type_to_string(ValueType()) << std::endl;
+
+  ValueType old_value{2};
+  ValueType new_value{43};
+  auto view = create_view<ValueType>(Tag{}, view_ext, "replace");
+
+  {
+    fill_view(view, name);
+    KE::replace(exespace(), KE::begin(view), KE::end(view), old_value,
+                new_value);
+    verify_data(name, view, new_value);
+  }
+
+  {
+    fill_view(view, name);
+    KE::replace("label", exespace(), KE::begin(view), KE::end(view), old_value,
+                new_value);
+    verify_data(name, view, new_value);
+  }
+
+  {
+    fill_view(view, name);
+    KE::replace(exespace(), view, old_value, new_value);
+    verify_data(name, view, new_value);
+  }
+
+  {
+    fill_view(view, name);
+    KE::replace("label", exespace(), view, old_value, new_value);
+    verify_data(name, view, new_value);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 1103},      {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_replace_ops_test, replace) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace Replace
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1e7f48067f57697252959efb7d67a597c9f2e0d0
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp
@@ -0,0 +1,299 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace ReplaceCopy {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = value_type{-5} + static_cast<value_type>(i + 1);
+    }
+    v_h(0) = static_cast<value_type>(2);
+    v_h(3) = static_cast<value_type>(2);
+    v_h(5) = static_cast<value_type>(2);
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i < 4) {
+        v_h(i) = static_cast<value_type>(-1);
+      } else {
+        v_h(i) = static_cast<value_type>(2);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 2 == 0) {
+        v_h(i) = static_cast<value_type>(-1);
+      } else {
+        v_h(i) = static_cast<value_type>(2);
+      }
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewTypeFrom, class ViewTypeTest, class ValueType>
+void verify_data(const std::string& name, ViewTypeFrom view_from,
+                 ViewTypeTest view_test, ValueType new_value) {
+  //! always careful because views might not be deep copyable
+  auto view_test_dc = create_deep_copyable_compatible_clone(view_test);
+  auto view_test_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_test_dc);
+
+  auto view_from_dc = create_deep_copyable_compatible_clone(view_from);
+  auto view_from_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_from_dc);
+
+  // we check that view_from is unchanged from what it was after filling
+  // while view_test should be changed
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    EXPECT_TRUE(view_from_h(0) == ValueType{1});
+    EXPECT_TRUE(view_test_h(0) == view_from_h(0));
+  }
+
+  else if (name == "one-element-b") {
+    EXPECT_TRUE(view_from_h(0) == ValueType{2});
+    EXPECT_TRUE(view_test_h(0) == new_value);
+  }
+
+  else if (name == "two-elements-a") {
+    EXPECT_TRUE(view_from_h(0) == ValueType{1});
+    EXPECT_TRUE(view_from_h(1) == ValueType{2});
+
+    EXPECT_TRUE(view_test_h(0) == view_from_h(0));
+    EXPECT_TRUE(view_test_h(1) == new_value);
+  }
+
+  else if (name == "two-elements-b") {
+    EXPECT_TRUE(view_from_h(0) == ValueType{2});
+    EXPECT_TRUE(view_from_h(1) == ValueType{-1});
+
+    EXPECT_TRUE(view_test_h(0) == new_value);
+    EXPECT_TRUE(view_test_h(1) == view_from_h(1));
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < view_test_h.extent(0); ++i) {
+      if (i == 0 || i == 3 || i == 5 || i == 6) {
+        EXPECT_TRUE(view_from_h(i) == ValueType{2});
+        EXPECT_TRUE(view_test_h(i) == new_value);
+      } else {
+        const auto gold = ValueType{-5} + static_cast<ValueType>(i + 1);
+        EXPECT_TRUE(view_from_h(i) == gold);
+        EXPECT_TRUE(view_test_h(i) == gold);
+      }
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < view_test_h.extent(0); ++i) {
+      if (i < 4) {
+        EXPECT_TRUE(view_from_h(i) == ValueType{-1});
+        EXPECT_TRUE(view_test_h(i) == view_from_h(i));
+      } else {
+        EXPECT_TRUE(view_from_h(i) == ValueType{2});
+        EXPECT_TRUE(view_test_h(i) == new_value);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < view_test_h.extent(0); ++i) {
+      if (i % 2 == 0) {
+        EXPECT_TRUE(view_from_h(i) == ValueType{-1});
+        EXPECT_TRUE(view_test_h(i) == view_from_h(i));
+      } else {
+        EXPECT_TRUE(view_from_h(i) == ValueType{2});
+        EXPECT_TRUE(view_test_h(i) == new_value);
+      }
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "replace_copy: " << name << ", " << view_tag_to_string(Tag{})
+  //           << ", " << value_type_to_string(ValueType()) << std::endl;
+
+  ValueType old_value{2};
+  ValueType new_value{43};
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_from");
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_dest");
+    fill_view(view_from, name);
+    auto rit =
+        KE::replace_copy(exespace(), KE::cbegin(view_from), KE::cend(view_from),
+                         KE::begin(view_dest), old_value, new_value);
+    verify_data(name, view_from, view_dest, new_value);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_from");
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_dest");
+    fill_view(view_from, name);
+    auto rit = KE::replace_copy("label", exespace(), KE::cbegin(view_from),
+                                KE::cend(view_from), KE::begin(view_dest),
+                                old_value, new_value);
+    verify_data(name, view_from, view_dest, new_value);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_from");
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_dest");
+    fill_view(view_from, name);
+    auto rit = KE::replace_copy(exespace(), view_from, view_dest, old_value,
+                                new_value);
+    verify_data(name, view_from, view_dest, new_value);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  {
+    auto view_from =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_from");
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_dest");
+    fill_view(view_from, name);
+    auto rit = KE::replace_copy("label", exespace(), view_from, view_dest,
+                                old_value, new_value);
+    verify_data(name, view_from, view_dest, new_value);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 1103},      {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_replace_ops_test, replace_copy) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace ReplaceCopy
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cb98aac08c2cb28351ea2dfd8dc56f9229d48f4d
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp
@@ -0,0 +1,300 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace ReplaceCopyIf {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = value_type{-5} + static_cast<value_type>(i + 1);
+    }
+    v_h(0) = static_cast<value_type>(2);
+    v_h(3) = static_cast<value_type>(2);
+    v_h(5) = static_cast<value_type>(2);
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i < 4) {
+        v_h(i) = static_cast<value_type>(-1);
+      } else {
+        v_h(i) = static_cast<value_type>(2);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      if (i % 2 == 0) {
+        v_h(i) = static_cast<value_type>(-1);
+      } else {
+        v_h(i) = static_cast<value_type>(2);
+      }
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewTypeFrom, class ViewTypeTest, class ValueType>
+void verify_data(const std::string& name, ViewTypeFrom view_from,
+                 ViewTypeTest view_test, ValueType new_value) {
+  //! always careful because views might not be deep copyable
+  auto view_test_dc = create_deep_copyable_compatible_clone(view_test);
+  auto view_test_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_test_dc);
+
+  auto view_from_dc = create_deep_copyable_compatible_clone(view_from);
+  auto view_from_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_from_dc);
+
+  // we check that view_from is unchanged from what it was after filling
+  // while view_test should be changed
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    EXPECT_TRUE(view_from_h(0) == ValueType{1});
+    EXPECT_TRUE(view_test_h(0) == view_from_h(0));
+  }
+
+  else if (name == "one-element-b") {
+    EXPECT_TRUE(view_from_h(0) == ValueType{2});
+    EXPECT_TRUE(view_test_h(0) == new_value);
+  }
+
+  else if (name == "two-elements-a") {
+    EXPECT_TRUE(view_from_h(0) == ValueType{1});
+    EXPECT_TRUE(view_from_h(1) == ValueType{2});
+
+    EXPECT_TRUE(view_test_h(0) == view_from_h(0));
+    EXPECT_TRUE(view_test_h(1) == new_value);
+  }
+
+  else if (name == "two-elements-b") {
+    EXPECT_TRUE(view_from_h(0) == ValueType{2});
+    EXPECT_TRUE(view_from_h(1) == ValueType{-1});
+
+    EXPECT_TRUE(view_test_h(0) == new_value);
+    EXPECT_TRUE(view_test_h(1) == view_from_h(1));
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < view_test_h.extent(0); ++i) {
+      if (i == 0 || i == 3 || i == 5 || i == 6) {
+        EXPECT_TRUE(view_from_h(i) == ValueType{2});
+        EXPECT_TRUE(view_test_h(i) == new_value);
+      } else {
+        const auto gold = ValueType{-5} + static_cast<ValueType>(i + 1);
+        EXPECT_TRUE(view_from_h(i) == gold);
+        EXPECT_TRUE(view_test_h(i) == gold);
+      }
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < view_test_h.extent(0); ++i) {
+      if (i < 4) {
+        EXPECT_TRUE(view_from_h(i) == ValueType{-1});
+        EXPECT_TRUE(view_test_h(i) == view_from_h(i));
+      } else {
+        EXPECT_TRUE(view_from_h(i) == ValueType{2});
+        EXPECT_TRUE(view_test_h(i) == new_value);
+      }
+    }
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < view_test_h.extent(0); ++i) {
+      if (i % 2 == 0) {
+        EXPECT_TRUE(view_from_h(i) == ValueType{-1});
+        EXPECT_TRUE(view_test_h(i) == view_from_h(i));
+      } else {
+        EXPECT_TRUE(view_from_h(i) == ValueType{2});
+        EXPECT_TRUE(view_test_h(i) == new_value);
+      }
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class ValueType>
+struct EqualsTwoFunctor {
+  KOKKOS_INLINE_FUNCTION
+  bool operator()(const ValueType val) const { return (val == ValueType(2)); }
+};
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "replace_copy_if: " << name << ", " <<
+  // view_tag_to_string(Tag{})
+  //           << ", " << value_type_to_string(ValueType()) << std::endl;
+
+  ValueType new_value{43};
+  auto view_from =
+      create_view<ValueType>(Tag{}, view_ext, "replace_copy_if_from");
+  using pred_type = EqualsTwoFunctor<ValueType>;
+
+  {
+    fill_view(view_from, name);
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_if_dest");
+    auto rit = KE::replace_copy_if(exespace(), KE::cbegin(view_from),
+                                   KE::cend(view_from), KE::begin(view_dest),
+                                   pred_type(), new_value);
+    verify_data(name, view_from, view_dest, new_value);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  {
+    fill_view(view_from, name);
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_if_dest");
+    auto rit = KE::replace_copy_if("label", exespace(), KE::cbegin(view_from),
+                                   KE::cend(view_from), KE::begin(view_dest),
+                                   pred_type(), new_value);
+    verify_data(name, view_from, view_dest, new_value);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  {
+    fill_view(view_from, name);
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_if_dest");
+    auto rit = KE::replace_copy_if(exespace(), view_from, view_dest,
+                                   pred_type(), new_value);
+    verify_data(name, view_from, view_dest, new_value);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  {
+    fill_view(view_from, name);
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "replace_copy_if_dest");
+    auto rit = KE::replace_copy_if("label", exespace(), view_from, view_dest,
+                                   pred_type(), new_value);
+    verify_data(name, view_from, view_dest, new_value);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 1103},      {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_replace_ops_test, replace_copy_if) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace ReplaceCopyIf
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8c4d04889f60c0ee69da63b9db5271346b2520d2
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp
@@ -0,0 +1,257 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace ReplaceIf {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-20., 20.) { m_gen.seed(1034343); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-100, 100) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  UnifDist<value_type> randObj;
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = value_type{-5} + static_cast<value_type>(i + 1);
+    }
+  }
+
+  else if (name == "small-b") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+    v_h(5) = static_cast<value_type>(-2);
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+// my own because std::replace_if is ONLY found with std=c++20
+template <class ForwardIt, class UnaryPredicate, class T>
+void my_host_replace_if(ForwardIt first, ForwardIt last, UnaryPredicate p,
+                        const T& new_value) {
+  for (; first != last; ++first) {
+    if (p(*first)) {
+      *first = new_value;
+    }
+  }
+}
+
+template <class ViewType1, class ViewType2, class ValueType,
+          class PredicateType>
+void verify_data(ViewType1 data_view,  // contains data
+                 ViewType2 test_view,  // the view to test
+                 ValueType new_value, PredicateType pred) {
+  //! always careful because views might not be deep copyable
+
+  auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
+  auto data_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
+  my_host_replace_if(KE::begin(data_view_h), KE::end(data_view_h), pred,
+                     new_value);
+
+  auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
+  auto test_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
+
+  if (test_view_h.extent(0) > 0) {
+    for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
+      // std::cout << i << " " << std::setprecision(15)
+      // 		<< data_view_dc(i) << " "
+      // 		<< data_view_h(i) << " "
+      // 		<< test_view_h(i) << std::endl;
+      EXPECT_TRUE(data_view_h(i) == test_view_h(i));
+    }
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType, class PredicateType>
+void run_single_scenario(const InfoType& scenario_info, PredicateType pred) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "replace_if: " << name << ", " << view_tag_to_string(Tag{})
+  //           << ", " << value_type_to_string(ValueType()) << std::endl;
+
+  ValueType new_value{23};
+  auto view_with_data =
+      create_view<ValueType>(Tag{}, view_ext, "replace_if_v2");
+  auto view_to_test = create_view<ValueType>(Tag{}, view_ext, "replace_if_v1");
+  fill_view(view_with_data, name);
+
+  {
+    CopyFunctor<decltype(view_with_data), decltype(view_to_test)> F1(
+        view_with_data, view_to_test);
+    Kokkos::parallel_for("copy", view_to_test.extent(0), F1);
+
+    KE::replace_if(exespace(), KE::begin(view_to_test), KE::end(view_to_test),
+                   pred, new_value);
+    verify_data(view_with_data, view_to_test, new_value, pred);
+  }
+
+  {
+    CopyFunctor<decltype(view_with_data), decltype(view_to_test)> F1(
+        view_with_data, view_to_test);
+    Kokkos::parallel_for("copy", view_to_test.extent(0), F1);
+
+    KE::replace_if("label", exespace(), KE::begin(view_to_test),
+                   KE::end(view_to_test), pred, new_value);
+    verify_data(view_with_data, view_to_test, new_value, pred);
+  }
+
+  {
+    CopyFunctor<decltype(view_with_data), decltype(view_to_test)> F1(
+        view_with_data, view_to_test);
+    Kokkos::parallel_for("copy", view_to_test.extent(0), F1);
+
+    KE::replace_if(exespace(), view_to_test, pred, new_value);
+    verify_data(view_with_data, view_to_test, new_value, pred);
+  }
+
+  {
+    CopyFunctor<decltype(view_with_data), decltype(view_to_test)> F1(
+        view_with_data, view_to_test);
+    Kokkos::parallel_for("copy", view_to_test.extent(0), F1);
+
+    KE::replace_if("label", exespace(), view_to_test, pred, new_value);
+    verify_data(view_with_data, view_to_test, new_value, pred);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element", 1}, {"two-elements-a", 2},
+      {"two-elements-b", 2}, {"small-a", 9},     {"small-b", 13},
+      {"medium", 1103},      {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    using pred_p_t = IsPositiveFunctor<ValueType>;
+    run_single_scenario<Tag, ValueType>(it, pred_p_t{});
+    using pred_n_t = IsNegativeFunctor<ValueType>;
+    run_single_scenario<Tag, ValueType>(it, pred_n_t{});
+  }
+}
+
+TEST(std_algorithms_replace_ops_test, replace_if) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace ReplaceIf
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..77c80ed02fb629cb8c01bd0763b82c370d8b99f0
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp
@@ -0,0 +1,180 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace Reverse {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a" || name == "small-b" || name == "medium" ||
+           name == "large") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(-11) + static_cast<value_type>(i);
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewType1, class ViewType2>
+void verify_data(ViewType1 test_view, ViewType2 orig_view) {
+  auto tv_h = create_host_space_copy(test_view);
+  auto ov_h = create_host_space_copy(orig_view);
+
+  const std::size_t ext = test_view.extent(0);
+  for (std::size_t i = 0; i < ext; ++i) {
+    EXPECT_TRUE(tv_h(i) == ov_h(ext - i - 1));
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "reverse: " << name << ", " << view_tag_to_string(Tag{}) << ",
+  // "
+  //           << value_type_to_string(ValueType()) << std::endl;
+
+  auto test_view = create_view<ValueType>(Tag{}, view_ext, "reverse");
+  auto orig_view = create_view<ValueType>(Tag{}, view_ext, "reverse");
+
+  {
+    fill_view(test_view, name);
+    fill_view(orig_view, name);
+    KE::reverse(exespace(), KE::begin(test_view), KE::end(test_view));
+    verify_data(test_view, orig_view);
+  }
+
+  {
+    fill_view(test_view, name);
+    fill_view(orig_view, name);
+    KE::reverse("label", exespace(), KE::begin(test_view), KE::end(test_view));
+    verify_data(test_view, orig_view);
+  }
+
+  {
+    fill_view(test_view, name);
+    fill_view(orig_view, name);
+    KE::reverse(exespace(), test_view);
+    verify_data(test_view, orig_view);
+  }
+
+  {
+    fill_view(test_view, name);
+    fill_view(orig_view, name);
+    KE::reverse("label", exespace(), test_view);
+    verify_data(test_view, orig_view);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 1103},      {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_modseq_test, reverse) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace Reverse
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..49d40115c9c040a6a58803f62c891c3b900bc5c8
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp
@@ -0,0 +1,275 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace Rotate {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-50, 50) { m_gen.seed(1034343); }
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-90., 100.) { m_gen.seed(1034343); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    v_h(0)  = static_cast<value_type>(0);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(-2);
+    v_h(4)  = static_cast<value_type>(3);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(-40);
+    v_h(7)  = static_cast<value_type>(4);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(62);
+    v_h(10) = static_cast<value_type>(6);
+  }
+
+  else if (name == "small-b") {
+    v_h(0)  = static_cast<value_type>(1);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(-1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(-3);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(4);
+    v_h(7)  = static_cast<value_type>(24);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(-46);
+    v_h(10) = static_cast<value_type>(8);
+    v_h(11) = static_cast<value_type>(9);
+    v_h(12) = static_cast<value_type>(8);
+  }
+
+  else if (name == "medium" || name == "large") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewType, class ResultIt, class ViewHostType>
+void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host,
+                 std::size_t rotation_point) {
+  // run std::rotate
+  auto n_it = KE::begin(data_view_host) + rotation_point;
+  auto std_rit =
+      std::rotate(KE::begin(data_view_host), n_it, KE::end(data_view_host));
+
+  // make sure results match
+  const auto my_diff  = result_it - KE::begin(view);
+  const auto std_diff = std_rit - KE::begin(data_view_host);
+  EXPECT_TRUE(my_diff == std_diff);
+
+  // check views match
+  auto view_h           = create_host_space_copy(view);
+  const std::size_t ext = view_h.extent(0);
+  for (std::size_t i = 0; i < ext; ++i) {
+    EXPECT_TRUE(view_h(i) == data_view_host[i]);
+    // std::cout << "i= " << i << " "
+    // 	      << "mine: " << view_h(i) << " "
+    // 	      << "std: " << data_view_host(i)
+    // 	      << '\n';
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name,
+                            std::size_t rotation_point) {
+  std::cout << "rotate: "
+            << " at " << rotation_point << ", " << name << ", "
+            << view_tag_to_string(Tag{}) << ", "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info,
+                         std::size_t rotation_point) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, rotation_point);
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "rotate_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE rotate or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto n_it   = KE::begin(view) + rotation_point;
+    auto rit    = KE::rotate(exespace(), KE::begin(view), n_it, KE::end(view));
+    verify_data(rit, view, view_h, rotation_point);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "rotate_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE rotate or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto n_it   = KE::begin(view) + rotation_point;
+    auto rit =
+        KE::rotate("label", exespace(), KE::begin(view), n_it, KE::end(view));
+    verify_data(rit, view, view_h, rotation_point);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "rotate_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE rotate or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit    = KE::rotate(exespace(), view, rotation_point);
+    // verify_data(rit, view, view_h, rotation_point);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "rotate_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE rotate or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit    = KE::rotate("label", exespace(), view, rotation_point);
+    verify_data(rit, view, view_h, rotation_point);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 11},
+      {"small-b", 13},       {"medium", 21103},     {"large", 101513}};
+
+  std::vector<std::size_t> rotation_points = {0,  1,   2,    3,     8,
+                                              56, 101, 1003, 101501};
+
+  for (const auto& it : scenarios) {
+    for (const auto& it2 : rotation_points) {
+      // for each view scenario, we rotate at multiple points
+      // but only if the view has an extent that is >= rotation point
+      const auto view_ext = it.second;
+      if (view_ext >= it2) {
+        run_single_scenario<Tag, ValueType>(it, it2);
+      }
+    }
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, rotate) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace Rotate
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..02867478da181a3bd2a2bf82f22e03a0621c89a5
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp
@@ -0,0 +1,275 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace RotateCopy {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-50, 50) { m_gen.seed(1034343); }
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-90., 100.) { m_gen.seed(1034343); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    v_h(0)  = static_cast<value_type>(0);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(3);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(4);
+    v_h(7)  = static_cast<value_type>(4);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(6);
+    v_h(10) = static_cast<value_type>(6);
+  }
+
+  else if (name == "small-b") {
+    v_h(0)  = static_cast<value_type>(1);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(3);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(4);
+    v_h(7)  = static_cast<value_type>(4);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(6);
+    v_h(10) = static_cast<value_type>(8);
+    v_h(11) = static_cast<value_type>(9);
+    v_h(12) = static_cast<value_type>(8);
+  }
+
+  else if (name == "medium" || name == "large") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewTypeFrom, class ViewTypeTest>
+void verify_data(ViewTypeFrom view_from, ViewTypeTest view_test,
+                 std::size_t rotation_point) {
+  auto view_from_h      = create_host_space_copy(view_from);
+  auto view_test_h      = create_host_space_copy(view_test);
+  const std::size_t ext = view_test_h.extent(0);
+
+  using value_type = typename ViewTypeTest::value_type;
+  std::vector<value_type> std_gold_h(ext);
+  auto first_n = KE::cbegin(view_from_h) + rotation_point;
+  std::rotate_copy(KE::cbegin(view_from_h), first_n, KE::cend(view_from_h),
+                   std_gold_h.begin());
+
+  for (std::size_t i = 0; i < ext; ++i) {
+    EXPECT_TRUE(view_test_h(i) == std_gold_h[i]);
+    // std::cout << "i= " << i << " "
+    // 	      << "from: " << view_from_h(i) << " "
+    // 	      << "mine: " << view_test_h(i) << " "
+    // 	      << "std: " << std_gold_h[i]
+    // 	      << '\n';
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name,
+                            std::size_t rotation_point) {
+  std::cout << "rotate_copy: "
+            << " at " << rotation_point << ", " << name << ", "
+            << view_tag_to_string(Tag{}) << ", "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info,
+                         std::size_t rotation_point) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, rotation_point);
+
+  auto view_from = create_view<ValueType>(Tag{}, view_ext, "rotate_copy_from");
+  fill_view(view_from, name);
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "rotate_copy_dest");
+    auto n_it = KE::cbegin(view_from) + rotation_point;
+    auto rit  = KE::rotate_copy(exespace(), KE::cbegin(view_from), n_it,
+                               KE::cend(view_from), KE::begin(view_dest));
+    verify_data(view_from, view_dest, rotation_point);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "rotate_copy_dest");
+    auto n_it = KE::cbegin(view_from) + rotation_point;
+    auto rit = KE::rotate_copy("label", exespace(), KE::cbegin(view_from), n_it,
+                               KE::cend(view_from), KE::begin(view_dest));
+    verify_data(view_from, view_dest, rotation_point);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "rotate_copy_dest");
+    auto rit =
+        KE::rotate_copy(exespace(), view_from, rotation_point, view_dest);
+    verify_data(view_from, view_dest, rotation_point);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "rotate_copy_dest");
+    auto rit = KE::rotate_copy("label", exespace(), view_from, rotation_point,
+                               view_dest);
+    verify_data(view_from, view_dest, rotation_point);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext));
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 11},
+      {"small-b", 13},       {"medium", 21103},     {"large", 101513}};
+
+  std::vector<std::size_t> rotation_points = {0,  1,   2,    3,     8,
+                                              56, 101, 1003, 101501};
+
+  for (const auto& it : scenarios) {
+    for (const auto& it2 : rotation_points) {
+      // for each view scenario, we rotate at multiple points
+      // but only if the view has an extent that is >= rotation point
+      const auto view_ext = it.second;
+      if (view_ext >= it2) {
+        run_single_scenario<Tag, ValueType>(it, it2);
+      }
+    }
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, rotate_copy) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace RotateCopy
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsScalarRedVsView.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsScalarRedVsView.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c054dfcc1013c15c856d56f14bf0866f22749252
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsScalarRedVsView.cpp
@@ -0,0 +1,235 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+#include <TestStdAlgorithmsHelperFunctors.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_MinMaxElementOperations.hpp>
+
+namespace KE = Kokkos::Experimental;
+
+namespace Test {
+namespace stdalgos {
+
+template <class ViewType>
+void fill_view(ViewType dest_view) {
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+  using aux_view_t = Kokkos::View<value_type*, exe_space>;
+
+  const std::size_t ext = dest_view.extent(0);
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  for (std::size_t i = 0; i < ext; ++i) {
+    v_h(i) = (value_type)i;
+  }
+  v_h(ext / 2) = (value_type)-101;
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewType, class IndexType, class ReducerType>
+struct MyFunctor {
+  using red_value_type = typename ReducerType::value_type;
+
+  ViewType m_view;
+  ReducerType m_reducer;
+
+  KOKKOS_FUNCTION
+  void operator()(const IndexType i, red_value_type& red_value) const {
+    m_reducer.join(red_value, red_value_type{m_view(i), i});
+  }
+
+  KOKKOS_FUNCTION
+  MyFunctor(ViewType view, ReducerType reducer)
+      : m_view(view), m_reducer(std::move(reducer)) {}
+};
+
+TEST(scalar_vs_view_red, use_scalar) {
+  using exe_space   = Kokkos::DefaultExecutionSpace;
+  using index_type  = int;
+  using scalar_type = int;
+  using view_type   = Kokkos::View<scalar_type*, exe_space>;
+
+  const auto ext = 10001;
+  view_type view("myview", ext);
+  fill_view(view);
+
+  using reducer_type    = ::Kokkos::MinLoc<scalar_type, index_type>;
+  using red_result_type = typename reducer_type::value_type;
+  using func_type       = MyFunctor<view_type, index_type, reducer_type>;
+  red_result_type result;
+  reducer_type reducer(result);
+  Kokkos::parallel_reduce("MinLocReduce",
+                          Kokkos::RangePolicy<exe_space>(exe_space(), 0, ext),
+                          func_type(view, reducer), reducer);
+  std::cout << " use_scalar = " << result.val << '\n';
+}
+
+template <class IteratorType, class ReducerType>
+struct StdMyMinFunctor {
+  using index_type     = typename IteratorType::difference_type;
+  using red_value_type = typename ReducerType::value_type;
+
+  IteratorType m_first;
+  ReducerType m_reducer;
+
+  KOKKOS_FUNCTION
+  void operator()(const index_type i, red_value_type& red_value) const {
+    m_reducer.join(red_value, red_value_type{m_first[i], i});
+  }
+
+  KOKKOS_FUNCTION
+  StdMyMinFunctor(IteratorType first, ReducerType reducer)
+      : m_first(std::move(first)), m_reducer(std::move(reducer)) {}
+};
+
+template <class ViewType, class ReducerType>
+struct StdMyMinFunctor2 {
+  using red_value_type = typename ReducerType::value_type;
+
+  ViewType m_view;
+  ReducerType m_reducer;
+
+  KOKKOS_FUNCTION
+  void operator()(const std::size_t i, red_value_type& red_value) const {
+    m_reducer.join(red_value, red_value_type{m_view(i), i});
+  }
+
+  KOKKOS_FUNCTION
+  StdMyMinFunctor2(ViewType viewIn, ReducerType reducer)
+      : m_view(viewIn), m_reducer(std::move(reducer)) {}
+};
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType my_min_1(const ExecutionSpace& ex, IteratorType first,
+                      IteratorType last) {
+  using index_type = typename IteratorType::difference_type;
+  using value_type = typename IteratorType::value_type;
+  using reducer_type =
+      Kokkos::MinFirstLoc<value_type, index_type, ExecutionSpace>;
+  using result_view_type = typename reducer_type::result_view_type;
+  using func_t           = StdMyMinFunctor<IteratorType, reducer_type>;
+
+  result_view_type result("min_or_max_elem_impl_result");
+  reducer_type reducer(result);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(
+      "label", Kokkos::RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_t(first, reducer), reducer);
+  const auto result_h =
+      ::Kokkos::create_mirror_view_and_copy(::Kokkos::HostSpace(), result);
+  return first + result_h().loc;
+}
+
+template <class ExecutionSpace, class IteratorType>
+IteratorType my_min_2(const ExecutionSpace& ex, IteratorType first,
+                      IteratorType last) {
+  using index_type   = typename IteratorType::difference_type;
+  using value_type   = typename IteratorType::value_type;
+  using reducer_type = Kokkos::MinFirstLoc<value_type, index_type>;
+  using result_type  = typename reducer_type::value_type;
+  using func_t       = StdMyMinFunctor<IteratorType, reducer_type>;
+
+  result_type result;
+  reducer_type reducer(result);
+  const auto num_elements = Kokkos::Experimental::distance(first, last);
+  ::Kokkos::parallel_reduce(
+      "label", Kokkos::RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_t(first, reducer), reducer);
+  return first + result.loc;
+}
+
+template <class ExecutionSpace, class ViewType>
+std::size_t my_min_3(const ExecutionSpace& ex, ViewType view) {
+  using index_type   = std::size_t;
+  using value_type   = typename ViewType::value_type;
+  using reducer_type = Kokkos::MinFirstLoc<value_type, index_type>;
+  using result_type  = typename reducer_type::value_type;
+  using func_t       = StdMyMinFunctor2<ViewType, reducer_type>;
+
+  result_type result;
+  reducer_type reducer(result);
+  const auto num_elements = view.extent(0);
+  ::Kokkos::parallel_reduce(
+      "label", Kokkos::RangePolicy<ExecutionSpace>(ex, 0, num_elements),
+      func_t(view, reducer), reducer);
+  return result.loc;
+}
+
+TEST(scalar_vs_view_red, my_min_it_use_result_view) {
+  using exe_space = Kokkos::DefaultExecutionSpace;
+  using view_type = Kokkos::View<int*, exe_space>;
+  view_type view("myview", 10001);
+  fill_view(view);
+
+  auto rit = my_min_1(exe_space(), KE::cbegin(view), KE::cend(view));
+  std::cout << " my_min_el = " << KE::distance(KE::cbegin(view), rit) << '\n';
+}
+
+TEST(scalar_vs_view_red, my_min_no_it_use_result_scalar) {
+  using exe_space = Kokkos::DefaultExecutionSpace;
+  using view_type = Kokkos::View<int*, exe_space>;
+  view_type view("myview", 10001);
+  fill_view(view);
+
+  auto ind = my_min_3(exe_space(), view);
+  std::cout << " my_min_el = " << ind << '\n';
+}
+
+TEST(scalar_vs_view_red, my_min_it_use_result_scalar) {
+  using exe_space = Kokkos::DefaultExecutionSpace;
+  using view_type = Kokkos::View<int*, exe_space>;
+  view_type view("myview", 10001);
+  fill_view(view);
+
+  auto rit = my_min_2(exe_space(), KE::cbegin(view), KE::cend(view));
+  std::cout << " my_min_el = " << KE::distance(KE::cbegin(view), rit) << '\n';
+}
+
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..20e93e9648696b1117ef19730f2486b45b124fe0
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp
@@ -0,0 +1,364 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace Search {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(0, 20) { m_gen.seed(1034343); }
+  UnifDist(int a, int b) : m_dist(a, b) { m_gen.seed(234343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "three-elements-a") {
+    v_h(0) = static_cast<value_type>(-1);
+    v_h(1) = static_cast<value_type>(2);
+    v_h(2) = static_cast<value_type>(2);
+  }
+
+  else if (name == "three-elements-b") {
+    v_h(0) = static_cast<value_type>(3);
+    v_h(1) = static_cast<value_type>(1);
+    v_h(2) = static_cast<value_type>(3);
+  }
+
+  else if (name == "four-elements-a") {
+    v_h(0) = static_cast<value_type>(-1);
+    v_h(1) = static_cast<value_type>(2);
+    v_h(2) = static_cast<value_type>(2);
+    v_h(3) = static_cast<value_type>(4);
+  }
+
+  else if (name == "four-elements-b") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(1);
+    v_h(2) = static_cast<value_type>(1);
+    v_h(3) = static_cast<value_type>(1);
+  }
+
+  else if (name == "small-a") {
+    v_h(0)  = static_cast<value_type>(0);
+    v_h(1)  = static_cast<value_type>(4);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(-1);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(1);
+    v_h(7)  = static_cast<value_type>(2);
+    v_h(8)  = static_cast<value_type>(2);
+    v_h(9)  = static_cast<value_type>(4);
+    v_h(10) = static_cast<value_type>(1);
+  }
+
+  else if (name == "small-b") {
+    v_h(0)  = static_cast<value_type>(1);
+    v_h(1)  = static_cast<value_type>(2);
+    v_h(2)  = static_cast<value_type>(3);
+    v_h(3)  = static_cast<value_type>(1);
+    v_h(4)  = static_cast<value_type>(-1);
+    v_h(5)  = static_cast<value_type>(-2);
+    v_h(6)  = static_cast<value_type>(0);
+    v_h(7)  = static_cast<value_type>(1);
+    v_h(8)  = static_cast<value_type>(2);
+    v_h(9)  = static_cast<value_type>(2);
+    v_h(10) = static_cast<value_type>(5);
+    v_h(11) = static_cast<value_type>(9);
+    v_h(12) = static_cast<value_type>(8);
+  }
+
+  else {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewType>
+auto create_seq_to_search(ViewType data_view, std::size_t seq_extent) {
+  // for the search, we need to specify a sequence that we search for
+  // within the original view/range.
+  // to do this, rather than doing something purely random,
+  // we use the view with the data, and select a subsequence.
+
+  auto data_view_h            = create_host_space_copy(data_view);
+  const auto data_view_extent = data_view.extent(0);
+
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+  using seq_view_t = Kokkos::View<value_type*, exe_space>;
+  seq_view_t seq_view("seq_view", seq_extent);
+  auto seq_view_h = create_mirror_view(Kokkos::HostSpace(), seq_view);
+
+  // when the target sequence is of same size as view, just fill
+  // sequeunce with all values of the view
+  if (seq_extent == data_view_extent) {
+    for (std::size_t i = 0; i < seq_extent; ++i) {
+      seq_view_h(i) = data_view_h(i);
+    }
+  } else {
+    // if target sequence to fill is smaller, then we need to pick
+    // a starting point to copy data from to make the the sequence.
+    // we pick randomly between 0 and data_view_extent - seq_extent.
+    // and fill the sequeunce data with the values copied from data view.
+
+    using dist_type = std::uniform_int_distribution<int>;
+    std::random_device r;
+    // from this:
+    // https://stackoverflow.com/questions/34490599/c11-how-to-set-seed-using-random
+    std::seed_seq seed{r(), r(), r(), r(), r(), r()};
+    std::mt19937 gen(seed);
+    dist_type dist(0, data_view_extent - seq_extent);
+    const auto start = dist(gen);
+    // std::cout << "start= " << start << "\n";
+    for (std::size_t i = 0; i < seq_extent; ++i) {
+      seq_view_h(i) = data_view_h(start + i);
+      // std::cout << "i= " << i << " " << seq_view_h(i) << "\n";
+    }
+  }
+
+  Kokkos::deep_copy(seq_view, seq_view_h);
+  return seq_view;
+}
+
+// search is only avai from c++17, so I have to put it here
+template <class ForwardIt1, class ForwardIt2, class BinaryPredicate>
+ForwardIt1 my_std_search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first,
+                         ForwardIt2 s_last, BinaryPredicate p) {
+  for (;; ++first) {
+    ForwardIt1 it = first;
+    for (ForwardIt2 s_it = s_first;; ++it, ++s_it) {
+      if (s_it == s_last) {
+        return first;
+      }
+      if (it == last) {
+        return last;
+      }
+      if (!p(*it, *s_it)) {
+        break;
+      }
+    }
+  }
+}
+
+// search is only avai from c++17, so I have to put it here
+template <class ForwardIt1, class ForwardIt2>
+ForwardIt1 my_std_search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first,
+                         ForwardIt2 s_last) {
+  using value_type1 = typename ForwardIt1::value_type;
+  using value_type2 = typename ForwardIt2::value_type;
+
+  using pred_t = IsEqualFunctor<value_type1, value_type2>;
+  return my_std_search(first, last, s_first, s_last, pred_t());
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name, std::size_t seq_ext) {
+  std::cout << "search: default predicate: " << name << ", "
+            << "search_seq_ext = " << seq_ext << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class Predicate>
+void print_scenario_details(const std::string& name, std::size_t seq_ext,
+                            Predicate pred) {
+  (void)pred;
+  std::cout << "search: custom  predicate: " << name << ", "
+            << "search_seq_ext = " << seq_ext << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class InfoType, class... Args>
+void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext,
+                         Args... args) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, seq_ext, args...);
+
+  auto view = create_view<ValueType>(Tag{}, view_ext, "search_test_view");
+  fill_view(view, name);
+  auto s_view = create_seq_to_search(view, seq_ext);
+
+  // run std
+  auto view_h   = create_host_space_copy(view);
+  auto s_view_h = create_host_space_copy(s_view);
+  auto stdrit =
+      my_std_search(KE::cbegin(view_h), KE::cend(view_h), KE::cbegin(s_view_h),
+                    KE::cend(s_view_h), args...);
+
+  {
+    auto myrit        = KE::search(exespace(), KE::cbegin(view), KE::cend(view),
+                            KE::cbegin(s_view), KE::cend(s_view), args...);
+    const auto mydiff = myrit - KE::cbegin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit =
+        KE::search("label", exespace(), KE::cbegin(view), KE::cend(view),
+                   KE::cbegin(s_view), KE::cend(s_view), args...);
+    const auto mydiff  = myrit - KE::cbegin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit         = KE::search(exespace(), view, s_view, args...);
+    const auto mydiff  = myrit - KE::begin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit         = KE::search("label", exespace(), view, s_view, args...);
+    const auto mydiff  = myrit - KE::begin(view);
+    const auto stddiff = stdrit - KE::cbegin(view_h);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {{"empty", 0},
+                                                        {"one-element-a", 1},
+                                                        {"one-element-b", 1},
+                                                        {"two-elements-a", 2},
+                                                        {"two-elements-b", 2},
+                                                        {"three-elements-a", 3},
+                                                        {"three-elements-b", 3},
+                                                        {"four-elements-a", 4},
+                                                        {"four-elements-b", 4},
+                                                        {"small-a", 11},
+                                                        {"small-b", 13},
+                                                        {"medium-a", 11103},
+                                                        {"medium-b", 21103},
+                                                        {"large-a", 101513},
+                                                        {"large-b", 100111}};
+
+  const std::vector<std::size_t> seq_extents = {
+      0, 1, 2, 3, 4, 5, 8, 11, 15, 31, 113, 523, 1035, 11103};
+
+  // for each scenario we want to run "search"
+  // for a set of sequences of various extents
+  for (const auto& it : scenarios) {
+    for (const auto& it2 : seq_extents) {
+      // only run if view is larger or equal than sequence to search for
+      if (it.second >= it2) {
+        run_single_scenario<Tag, ValueType>(it, it2);
+
+        using func_t = IsEqualFunctor<ValueType>;
+        run_single_scenario<Tag, ValueType>(it, it2, func_t());
+      }
+    }
+  }
+}
+
+TEST(std_algorithms_non_mod_seq_ops, search) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace Search
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4d54166353b5cb388e6485e8cf2990d873c52d58
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp
@@ -0,0 +1,336 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace Search_n {
+
+namespace KE = Kokkos::Experimental;
+
+// search_n is only available from c++20, so I have to put it here
+template <class ForwardIt, class Size, class T, class BinaryPredicate>
+ForwardIt my_std_search_n(ForwardIt first, ForwardIt last, Size count,
+                          const T& value, BinaryPredicate p) {
+  if (count <= 0) {
+    return first;
+  }
+  for (; first != last; ++first) {
+    if (!p(*first, value)) {
+      continue;
+    }
+
+    ForwardIt candidate = first;
+    Size cur_count      = 0;
+
+    while (true) {
+      ++cur_count;
+      if (cur_count >= count) {
+        // success
+        return candidate;
+      }
+      ++first;
+      if (first == last) {
+        // exhausted the list
+        return last;
+      }
+      if (!p(*first, value)) {
+        // too few in a row
+        break;
+      }
+    }
+  }
+
+  return last;
+}
+
+template <class ForwardIt, class Size, class T>
+ForwardIt my_std_search_n(ForwardIt first, ForwardIt last, Size count,
+                          const T& value) {
+  using iter_value_type = typename ForwardIt::value_type;
+  using p_type          = IsEqualFunctor<iter_value_type, T>;
+  return my_std_search_n(first, last, count, value, p_type());
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(0, 20) { m_gen.seed(1034343); }
+  UnifDist(int a, int b) : m_dist(a, b) { m_gen.seed(234343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType, class ValueType>
+void fill_view(ViewType dest_view, ValueType value, std::size_t count,
+               const std::string& where_to_place_count_values) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  // fill with something
+  for (std::size_t i = 0; i < ext; ++i) {
+    v_h(i) = 15;
+  }
+
+  if (where_to_place_count_values == "none") {
+    // do nothing
+  }
+
+  else if (where_to_place_count_values == "left") {
+    for (std::size_t i = 0; i < count; ++i) {
+      v_h(i) = value;
+    }
+  }
+
+  else if (where_to_place_count_values == "left_and_1567") {
+    for (std::size_t i = 0; i < count; ++i) {
+      v_h(i) = value;
+    }
+
+    for (std::size_t i = 0; i < count; ++i) {
+      v_h(1567 + i) = value;
+    }
+  }
+
+  else if (where_to_place_count_values == "random") {
+    // find a random location to start filling view
+    using dist_type = std::uniform_int_distribution<int>;
+    std::random_device r;
+    // from this:
+    // https://stackoverflow.com/questions/34490599/c11-how-to-set-seed-using-random
+    std::seed_seq seed{r(), r(), r(), r(), r(), r()};
+    std::mt19937 gen(seed);
+    dist_type dist(0, ext - count);
+    const auto start_at = dist(gen);
+    // std::cout << "start_at " << start_at << std::endl;
+
+    for (std::size_t i = 0; i < count; ++i) {
+      v_h(start_at + i) = value;
+    }
+  }
+
+  else if (where_to_place_count_values == "11133_and_right") {
+    for (std::size_t i = 0; i < count; ++i) {
+      v_h(11133 + i) = value;
+    }
+
+    for (std::size_t i = 0; i < count; ++i) {
+      v_h(ext - count + i) = value;
+    }
+  }
+
+  else if (where_to_place_count_values == "right") {
+    for (std::size_t i = 0; i < count; ++i) {
+      v_h(ext - count + i) = value;
+    }
+  }
+
+  else {
+    throw std::runtime_error("Kokkos: test: search_n: this should not happen");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name, std::size_t count,
+                            const std::string& where_to_place_count_values) {
+  std::cout << "search_n: default predicate: " << name << ", "
+            << "count = " << count << ", " << where_to_place_count_values
+            << ", " << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class Predicate>
+void print_scenario_details(const std::string& name, std::size_t count,
+                            const std::string& where_to_place_count_values,
+                            Predicate pred) {
+  (void)pred;
+  std::cout << "search_n: custom predicate: " << name << ", "
+            << "count = " << count << ", " << where_to_place_count_values
+            << ", " << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class InfoType, class... Args>
+void run_single_scenario(const InfoType& scenario_info, std::size_t count,
+                         ValueType value, Args... args) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  const auto count_place     = std::get<2>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, count, count_place, args...);
+
+  auto view = create_view<ValueType>(Tag{}, view_ext, "search_n_test_view");
+  fill_view(view, value, count, count_place);
+
+  // run std
+  auto view_h = create_host_space_copy(view);
+  auto stdrit = my_std_search_n(KE::cbegin(view_h), KE::cend(view_h), count,
+                                value, args...);
+  const auto stddiff = stdrit - KE::cbegin(view_h);
+
+  {
+    auto myrit = KE::search_n(exespace(), KE::cbegin(view), KE::cend(view),
+                              count, value, args...);
+    const auto mydiff = myrit - KE::cbegin(view);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit        = KE::search_n("label", exespace(), KE::cbegin(view),
+                              KE::cend(view), count, value, args...);
+    const auto mydiff = myrit - KE::cbegin(view);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit = KE::search_n("label", exespace(), view, count, value, args...);
+    const auto mydiff = myrit - KE::begin(view);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  {
+    auto myrit        = KE::search_n(exespace(), view, count, value, args...);
+    const auto mydiff = myrit - KE::begin(view);
+    EXPECT_TRUE(mydiff == stddiff);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  using scenario_t = std::tuple<std::string, std::size_t, std::string>;
+  std::vector<scenario_t> scenarios(31);
+  scenarios[0] = scenario_t("empty", 0u, "none");
+  scenarios[1] = scenario_t("one-element-a", 1u, "none");
+  scenarios[2] = scenario_t("one-element-b", 1u, "left");
+
+  scenarios[3] = scenario_t("two-elements-a", 2u, "none");
+  scenarios[4] = scenario_t("two-elements-b", 2u, "left");
+  scenarios[5] = scenario_t("two-elements-c", 2u, "right");
+
+  scenarios[6] = scenario_t("three-elements-a", 3u, "none");
+  scenarios[7] = scenario_t("three-elements-b", 3u, "left");
+  scenarios[8] = scenario_t("three-elements-c", 3u, "random");
+  scenarios[9] = scenario_t("three-elements-d", 3u, "right");
+
+  scenarios[10] = scenario_t("four-elements-a", 4u, "none");
+  scenarios[11] = scenario_t("four-elements-b", 4u, "left");
+  scenarios[12] = scenario_t("four-elements-c", 4u, "random");
+  scenarios[13] = scenario_t("four-elements-d", 4u, "right");
+
+  scenarios[14] = scenario_t("small-a", 13u, "none");
+  scenarios[15] = scenario_t("small-b", 13u, "left");
+  scenarios[16] = scenario_t("small-c", 13u, "random");
+  scenarios[17] = scenario_t("small-d", 13u, "right");
+  scenarios[18] = scenario_t("small-e", 131u, "none");
+  scenarios[19] = scenario_t("small-f", 131u, "left");
+  scenarios[20] = scenario_t("small-g", 131u, "random");
+  scenarios[21] = scenario_t("small-h", 131u, "right");
+
+  scenarios[22] = scenario_t("medium-a", 21103u, "none");
+  scenarios[22] = scenario_t("medium-b", 21103u, "left");
+  scenarios[23] = scenario_t("medium-c", 21103u, "random");
+  scenarios[24] = scenario_t("medium-d", 21103u, "right");
+  scenarios[25] = scenario_t("medium-e", 21103u, "left_and_1567");
+  scenarios[26] = scenario_t("medium-f", 21103u, "11133_and_right");
+
+  scenarios[27] = scenario_t("large-a", 101333u, "none");
+  scenarios[28] = scenario_t("large-b", 101333u, "left");
+  scenarios[29] = scenario_t("large-c", 101333u, "random");
+  scenarios[30] = scenario_t("large-d", 101333u, "right");
+
+  const std::vector<std::size_t> counts = {1,  2,  3,  4,   5,  8,
+                                           11, 13, 31, 131, 523};
+
+  const ValueType target_value = 3;
+
+  // for each view scenario, run "search_n" for multiple counts
+  for (const auto& it : scenarios) {
+    const std::size_t view_ext = std::get<1>(it);
+
+    if (view_ext == 0) {
+      run_single_scenario<Tag, ValueType>(it, 0, target_value);
+    } else {
+      for (const auto& it2 : counts) {
+        // only run if view is larger or equal than count
+        if (view_ext >= it2) {
+          run_single_scenario<Tag, ValueType>(it, it2, target_value);
+
+          using func_t = IsEqualFunctor<ValueType>;
+          run_single_scenario<Tag, ValueType>(it, it2, target_value, func_t());
+        }
+      }
+    }
+  }
+}
+
+TEST(std_algorithms_non_mod_seq_ops, search_n) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace Search_n
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2af0b438ae7832cb227b0a246c30e39a776118ff
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp
@@ -0,0 +1,243 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace ShiftLeft {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-50, 50) { m_gen.seed(1034343); }
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-90., 100.) { m_gen.seed(1034343); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ForwardIterator>
+ForwardIterator my_std_shift_left(
+    ForwardIterator first, ForwardIterator last,
+    typename std::iterator_traits<ForwardIterator>::difference_type n) {
+  // copied from
+  // https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/shift_left.h
+
+  if (n == 0) {
+    return last;
+  }
+
+  ForwardIterator m = first;
+  for (; n > 0; --n) {
+    if (m == last) {
+      return first;
+    }
+    ++m;
+  }
+  return std::move(m, last, first);
+}
+
+template <class ViewType, class ResultIt, class ViewHostType>
+void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host,
+                 std::size_t shift_value) {
+  auto std_rit = my_std_shift_left(KE::begin(data_view_host),
+                                   KE::end(data_view_host), shift_value);
+
+  // make sure results match
+  const auto my_diff  = result_it - KE::begin(view);
+  const auto std_diff = std_rit - KE::begin(data_view_host);
+  EXPECT_TRUE(my_diff == std_diff);
+
+  // check views match
+  auto view_h = create_host_space_copy(view);
+  for (std::size_t i = 0; i < (std::size_t)my_diff; ++i) {
+    EXPECT_TRUE(view_h(i) == data_view_host[i]);
+    // std::cout << "i= " << i << " "
+    // 	      << "mine: " << view_h(i) << " "
+    // 	      << "std: " << data_view_host(i)
+    // 	      << '\n';
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name, std::size_t shift_value) {
+  std::cout << "shift_left: "
+            << " by " << shift_value << ", " << name << ", "
+            << view_tag_to_string(Tag{}) << ", "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info,
+                         std::size_t shift_value) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, shift_value);
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "shift_left_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE shift_left or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit =
+        KE::shift_left(exespace(), KE::begin(view), KE::end(view), shift_value);
+    verify_data(rit, view, view_h, shift_value);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "shift_left_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE shift_left or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit    = KE::shift_left("label", exespace(), KE::begin(view),
+                              KE::end(view), shift_value);
+    verify_data(rit, view, view_h, shift_value);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "shift_left_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE shift_left or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit    = KE::shift_left(exespace(), view, shift_value);
+    verify_data(rit, view, view_h, shift_value);
+  }
+
+  {
+    auto view = create_view<ValueType>(Tag{}, view_ext, "shift_left_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE shift_left or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit    = KE::shift_left("label", exespace(), view, shift_value);
+    verify_data(rit, view, view_h, shift_value);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {{"empty", 0},
+                                                        {"one-element-a", 1},
+                                                        {"one-element-b", 1},
+                                                        {"two-elements-a", 2},
+                                                        {"two-elements-b", 2},
+                                                        {"three-elements-a", 3},
+                                                        {"three-elements-b", 3},
+                                                        {"small-a", 11},
+                                                        {"small-b", 13},
+                                                        {"medium", 21103},
+                                                        {"large", 101513}};
+
+  // a shift value MUST be non-negative but it does not matter
+  // if it is larger than the view, the algorithm is supposed
+  // to handle that case too
+  std::vector<std::size_t> shifts = {0, 1, 2, 3, 8, 56, 101, 1003, 101501};
+
+  for (const auto& it : scenarios) {
+    for (const auto& it2 : shifts) {
+      run_single_scenario<Tag, ValueType>(it, it2);
+    }
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, shift_left) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace ShiftLeft
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ae85e5c6d45c9444ae7d1d18fb5a364ba198eec7
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp
@@ -0,0 +1,247 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace ShiftRight {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-50, 50) { m_gen.seed(1034343); }
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(-90., 100.) { m_gen.seed(1034343); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ForwardIterator>
+ForwardIterator my_std_shift_right(
+    ForwardIterator first, ForwardIterator last,
+    typename std::iterator_traits<ForwardIterator>::difference_type n) {
+  // copied from
+  // https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/shift_right.h
+
+  if (n == 0) {
+    return first;
+  }
+
+  decltype(n) d = last - first;
+  if (n >= d) {
+    return last;
+  }
+  ForwardIterator m = first + (d - n);
+  return std::move_backward(first, m, last);
+}
+
+template <class ViewType, class ResultIt, class ViewHostType>
+void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host,
+                 std::size_t shift_value) {
+  auto std_rit = my_std_shift_right(KE::begin(data_view_host),
+                                    KE::end(data_view_host), shift_value);
+
+  // make sure results match
+  const auto my_diff  = KE::end(view) - result_it;
+  const auto std_diff = KE::end(data_view_host) - std_rit;
+  EXPECT_TRUE(my_diff == std_diff);
+
+  // check views match
+  auto view_h = create_host_space_copy(view);
+  auto it1    = KE::cbegin(view_h);
+  auto it2    = KE::cbegin(data_view_host);
+  for (std::size_t i = 0; i < (std::size_t)my_diff; ++i) {
+    EXPECT_TRUE(it1[i] == it2[i]);
+    // std::cout << "i= " << i << " "
+    // 	      << "mine: " << it1[i] << " "
+    // 	      << "std:  " << it2[i]
+    // 	      << '\n';
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name, std::size_t shift_value) {
+  std::cout << "shift_right: "
+            << " by " << shift_value << ", " << name << ", "
+            << view_tag_to_string(Tag{}) << ", "
+            << value_type_to_string(ValueType()) << std::endl;
+}
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info,
+                         std::size_t shift_value) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, shift_value);
+
+  {
+    auto view =
+        create_view<ValueType>(Tag{}, view_ext, "shift_right_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE shift_right or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit    = KE::shift_right(exespace(), KE::begin(view), KE::end(view),
+                               shift_value);
+    verify_data(rit, view, view_h, shift_value);
+  }
+
+  {
+    auto view =
+        create_view<ValueType>(Tag{}, view_ext, "shift_right_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE shift_right or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit    = KE::shift_right("label", exespace(), KE::begin(view),
+                               KE::end(view), shift_value);
+    verify_data(rit, view, view_h, shift_value);
+  }
+
+  {
+    auto view =
+        create_view<ValueType>(Tag{}, view_ext, "shift_right_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE shift_right or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit    = KE::shift_right(exespace(), view, shift_value);
+    verify_data(rit, view, view_h, shift_value);
+  }
+
+  {
+    auto view =
+        create_view<ValueType>(Tag{}, view_ext, "shift_right_data_view");
+    fill_view(view, name);
+    // create host copy BEFORE shift_right or view will be modified
+    auto view_h = create_host_space_copy(view);
+    auto rit    = KE::shift_right("label", exespace(), view, shift_value);
+    verify_data(rit, view, view_h, shift_value);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {{"empty", 0},
+                                                        {"one-element-a", 1},
+                                                        {"one-element-b", 1},
+                                                        {"two-elements-a", 2},
+                                                        {"two-elements-b", 2},
+                                                        {"three-elements-a", 3},
+                                                        {"three-elements-b", 3},
+                                                        {"small-a", 11},
+                                                        {"small-b", 13},
+                                                        {"medium", 21103},
+                                                        {"large", 101513}};
+
+  // a shift value MUST be non-negative but it does not matter
+  // if it is larger than the view, the algorithm is supposed
+  // to handle that case too
+  std::vector<std::size_t> shifts = {0, 1, 2, 3, 8, 56, 101, 1003, 101501};
+
+  for (const auto& it : scenarios) {
+    for (const auto& it2 : shifts) {
+      run_single_scenario<Tag, ValueType>(it, it2);
+    }
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, shift_right) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace ShiftRight
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e7785734433249a81543ae9cc5a1d168d694a8e4
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp
@@ -0,0 +1,320 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_Numeric.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace TransformEScan {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(0.05, 1.2) { m_gen.seed(1034343); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(1, 3) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_zero(ViewType view) {
+  Kokkos::parallel_for(view.extent(0), FillZeroFunctor<ViewType>(view));
+}
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  UnifDist<value_type> randObj;
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element") {
+    assert(v_h.extent(0) == 1);
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "two-elements-a") {
+    assert(v_h.extent(0) == 2);
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    assert(v_h.extent(0) == 2);
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    assert(v_h.extent(0) == 9);
+    v_h(0) = static_cast<value_type>(3);
+    v_h(1) = static_cast<value_type>(1);
+    v_h(2) = static_cast<value_type>(4);
+    v_h(3) = static_cast<value_type>(1);
+    v_h(4) = static_cast<value_type>(5);
+    v_h(5) = static_cast<value_type>(9);
+    v_h(6) = static_cast<value_type>(2);
+    v_h(7) = static_cast<value_type>(6);
+    v_h(8) = static_cast<value_type>(2);
+  }
+
+  else if (name == "small-b") {
+    assert(v_h.extent(0) >= 6);
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+    v_h(5) = static_cast<value_type>(-2);
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+// I had to write my own because std::transform_exclusive_scan is ONLY found
+// with std=c++17
+template <class it1, class it2, class ValType, class BopType, class UopType>
+void my_host_transform_exclusive_scan(it1 first, it1 last, it2 dest,
+                                      ValType init, BopType bop, UopType uop) {
+  const auto num_elements = last - first;
+  if (num_elements > 0) {
+    while (first < last - 1) {
+      *(dest++) = init;
+      init      = bop(uop(*(first++)), init);
+    }
+    *dest = init;
+  }
+}
+
+template <class ViewType1, class ViewType2, class ValueType, class BinaryOp,
+          class UnaryOp>
+void verify_data(ViewType1 data_view,  // contains data
+                 ViewType2 test_view,  // the view to test
+                 ValueType init_value, BinaryOp bop, UnaryOp uop) {
+  //! always careful because views might not be deep copyable
+
+  auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
+  auto data_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
+
+  using gold_view_value_type = typename ViewType2::value_type;
+  Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
+      "goldh", data_view.extent(0));
+  my_host_transform_exclusive_scan(KE::cbegin(data_view_h),
+                                   KE::cend(data_view_h), KE::begin(gold_h),
+                                   init_value, bop, uop);
+
+  auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
+  auto test_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
+  if (test_view_h.extent(0) > 0) {
+    for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
+      // std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
+      //           << gold_h(i) << " " << test_view_h(i) << " "
+      //           << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
+
+      if (std::is_same<gold_view_value_type, int>::value) {
+        EXPECT_TRUE(gold_h(i) == test_view_h(i));
+      } else {
+        const auto error = std::abs(gold_h(i) - test_view_h(i));
+        if (error > 1e-10) {
+          std::cout << i << " " << std::setprecision(15) << data_view_h(i)
+                    << " " << gold_h(i) << " " << test_view_h(i) << " "
+                    << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
+        }
+        EXPECT_TRUE(error < 1e-10);
+      }
+    }
+    // std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
+    // std::endl;
+  }
+}
+
+template <class ValueType>
+struct TimesTwoUnaryFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a) const { return (a * ValueType(2)); }
+};
+
+template <class ValueType>
+struct SumBinaryFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return (a + b);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const volatile ValueType& a,
+                       const volatile ValueType& b) const {
+    return (a + b);
+  }
+};
+
+std::string value_type_to_string(int) { return "int"; }
+
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType, class InfoType, class BinaryOp,
+          class UnaryOp>
+void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
+                         BinaryOp bop, UnaryOp uop) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "transform_exclusive_scan custom op: " << name << ", "
+  //           << view_tag_to_string(Tag{}) << ", "
+  //           << value_type_to_string(ValueType()) << ", "
+  //           << "init = " << init_value << std::endl;
+
+  auto view_dest =
+      create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
+  auto view_from =
+      create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
+  fill_view(view_from, name);
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::transform_exclusive_scan(
+        exespace(), KE::cbegin(view_from), KE::cend(view_from),
+        KE::begin(view_dest), init_value, bop, uop);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, bop, uop);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::transform_exclusive_scan(
+        "label", exespace(), KE::cbegin(view_from), KE::cend(view_from),
+        KE::begin(view_dest), init_value, bop, uop);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, bop, uop);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::transform_exclusive_scan(exespace(), view_from, view_dest,
+                                          init_value, bop, uop);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, bop, uop);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::transform_exclusive_scan("label", exespace(), view_from,
+                                          view_dest, init_value, bop, uop);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, init_value, bop, uop);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element", 1}, {"two-elements-a", 2},
+      {"two-elements-b", 2}, {"small-a", 9},     {"small-b", 13},
+      {"medium", 1103},      {"large", 10513}};
+
+  for (const auto& it : scenarios) {
+    using uop_t = TimesTwoUnaryFunctor<ValueType>;
+    using bop_t = SumBinaryFunctor<ValueType>;
+    run_single_scenario<Tag, ValueType>(it, ValueType{0}, bop_t(), uop_t());
+    run_single_scenario<Tag, ValueType>(it, ValueType{1}, bop_t(), uop_t());
+    run_single_scenario<Tag, ValueType>(it, ValueType{-2}, bop_t(), uop_t());
+    run_single_scenario<Tag, ValueType>(it, ValueType{3}, bop_t(), uop_t());
+  }
+}
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan) {
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+#endif
+
+}  // namespace TransformEScan
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a30d6737931e8797bd0b2d34f6faea35afe75fc9
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp
@@ -0,0 +1,347 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_Numeric.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace TransformIncScan {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<double> {
+  using dist_type = std::uniform_real_distribution<double>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(0.05, 1.2) { m_gen.seed(1034343); }
+
+  double operator()() { return m_dist(m_gen); }
+};
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(1, 3) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_zero(ViewType view) {
+  Kokkos::parallel_for(view.extent(0), FillZeroFunctor<ViewType>(view));
+}
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type = typename ViewType::value_type;
+  using exe_space  = typename ViewType::execution_space;
+
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  UnifDist<value_type> randObj;
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element") {
+    assert(v_h.extent(0) == 1);
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "two-elements-a") {
+    assert(v_h.extent(0) == 2);
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    assert(v_h.extent(0) == 2);
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    assert(v_h.extent(0) == 9);
+    v_h(0) = static_cast<value_type>(3);
+    v_h(1) = static_cast<value_type>(1);
+    v_h(2) = static_cast<value_type>(4);
+    v_h(3) = static_cast<value_type>(1);
+    v_h(4) = static_cast<value_type>(5);
+    v_h(5) = static_cast<value_type>(9);
+    v_h(6) = static_cast<value_type>(2);
+    v_h(7) = static_cast<value_type>(6);
+    v_h(8) = static_cast<value_type>(2);
+  }
+
+  else if (name == "small-b") {
+    assert(v_h.extent(0) >= 6);
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+    v_h(5) = static_cast<value_type>(-2);
+  }
+
+  else if (name == "medium" || name == "large") {
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+// I had to write my own because std::transform_inclusive_scan is ONLY found
+// with std=c++17
+template <class it1, class it2, class BopType, class UopType>
+void my_host_transform_inclusive_scan(it1 first, it1 last, it2 dest,
+                                      BopType bop, UopType uop) {
+  if (first != last) {
+    auto init = uop(*first);
+    *dest     = init;
+    while (++first < last) {
+      init      = bop(uop(*first), init);
+      *(++dest) = init;
+    }
+  }
+}
+
+template <class it1, class it2, class ValType, class BopType, class UopType>
+void my_host_transform_inclusive_scan(it1 first, it1 last, it2 dest,
+                                      BopType bop, UopType uop, ValType init) {
+  if (first != last) {
+    init  = bop(uop(*first), init);
+    *dest = init;
+    while (++first < last) {
+      init      = bop(uop(*first), init);
+      *(++dest) = init;
+    }
+  }
+}
+
+template <class ViewType1, class ViewType2, class... Args>
+void verify_data(ViewType1 data_view,  // contains data
+                 ViewType2 test_view,  // the view to test
+                 Args... args /* by value on purpose*/) {
+  //! always careful because views might not be deep copyable
+
+  auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
+  auto data_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
+
+  using gold_view_value_type = typename ViewType2::value_type;
+  Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
+      "goldh", data_view.extent(0));
+  my_host_transform_inclusive_scan(KE::cbegin(data_view_h),
+                                   KE::cend(data_view_h), KE::begin(gold_h),
+                                   args...);
+
+  auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
+  auto test_view_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
+  if (test_view_h.extent(0) > 0) {
+    for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
+      // std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
+      //           << gold_h(i) << " " << test_view_h(i) << " "
+      //           << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
+
+      if (std::is_same<gold_view_value_type, int>::value) {
+        EXPECT_TRUE(gold_h(i) == test_view_h(i));
+      } else {
+        const auto error = std::abs(gold_h(i) - test_view_h(i));
+        if (error > 1e-10) {
+          std::cout << i << " " << std::setprecision(15) << data_view_h(i)
+                    << " " << gold_h(i) << " " << test_view_h(i) << " "
+                    << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
+        }
+        EXPECT_TRUE(error < 1e-10);
+      }
+    }
+    // std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
+    // std::endl;
+  }
+}
+
+template <class ValueType>
+struct TimesTwoUnaryFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a) const { return (a * ValueType(2)); }
+};
+
+template <class ValueType>
+struct SumBinaryFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& a, const ValueType& b) const {
+    return (a + b);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const volatile ValueType& a,
+                       const volatile ValueType& b) const {
+    return (a + b);
+  }
+};
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class BopT, class UopT>
+void print_scenario_details(const std::string& name, BopT bop, UopT uop) {
+  (void)bop;
+  (void)uop;
+  std::cout << "transform_inclusive_scan: " << name << ", "
+            << view_tag_to_string(Tag{}) << std::endl;
+}
+
+template <class Tag, class BopT, class UopT, class ValueType>
+void print_scenario_details(const std::string& name, BopT bop, UopT uop,
+                            ValueType init_value) {
+  (void)bop;
+  (void)uop;
+  std::cout << "transform_inclusive_scan: " << name << ", "
+            << view_tag_to_string(Tag{}) << ", "
+            << "init = " << init_value << std::endl;
+}
+
+template <class Tag, class ValueType, class InfoType, class... Args>
+void run_single_scenario(const InfoType& scenario_info,
+                         Args... args /* by value on purpose*/) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag>(name, args...);
+
+  auto view_dest =
+      create_view<ValueType>(Tag{}, view_ext, "transform_inclusive_scan");
+  auto view_from =
+      create_view<ValueType>(Tag{}, view_ext, "transform_inclusive_scan");
+  fill_view(view_from, name);
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::transform_inclusive_scan(exespace(), KE::cbegin(view_from),
+                                          KE::cend(view_from),
+                                          KE::begin(view_dest), args...);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, args...);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::transform_inclusive_scan(
+        "label", exespace(), KE::cbegin(view_from), KE::cend(view_from),
+        KE::begin(view_dest), args...);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, args...);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r =
+        KE::transform_inclusive_scan(exespace(), view_from, view_dest, args...);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, args...);
+  }
+
+  {
+    fill_zero(view_dest);
+    auto r = KE::transform_inclusive_scan("label", exespace(), view_from,
+                                          view_dest, args...);
+    EXPECT_TRUE(r == KE::end(view_dest));
+    verify_data(view_from, view_dest, args...);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element", 1}, {"two-elements-a", 2},
+      {"two-elements-b", 2}, {"small-a", 9},     {"small-b", 13},
+      {"medium", 1103},      {"large", 10513}};
+
+  for (const auto& it : scenarios) {
+    using uop_t = TimesTwoUnaryFunctor<ValueType>;
+    using bop_t = SumBinaryFunctor<ValueType>;
+    run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t());
+    run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{0});
+    run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{1});
+    run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{2});
+    run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-1});
+    run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-2});
+  }
+}
+
+#if not defined KOKKOS_ENABLE_OPENMPTARGET
+TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan) {
+  run_all_scenarios<DynamicTag, double>();
+  // run_all_scenarios<StridedThreeTag, double>();
+  // run_all_scenarios<DynamicTag, int>();
+  // run_all_scenarios<StridedThreeTag, int>();
+}
+#endif
+
+}  // namespace TransformIncScan
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7adfc10c3e43830c5a4b1858630975bbca1b153b
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp
@@ -0,0 +1,174 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace TransformUnaryOp {
+
+namespace KE = Kokkos::Experimental;
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class ViewType>
+void fill_view(ViewType dest_view) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  for (std::size_t i = 0; i < ext; ++i) {
+    v_h(i) = static_cast<value_type>(i);
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ViewTypeFrom, class ViewTypeTest>
+void verify_data(ViewTypeFrom view_from, ViewTypeTest view_test) {
+  using value_type = typename ViewTypeFrom::value_type;
+
+  //! always careful because views might not be deep copyable
+  auto view_test_dc = create_deep_copyable_compatible_clone(view_test);
+  auto view_test_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_test_dc);
+
+  auto view_from_dc = create_deep_copyable_compatible_clone(view_from);
+  auto view_from_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_from_dc);
+
+  for (std::size_t i = 0; i < view_test_h.extent(0); ++i) {
+    EXPECT_TRUE(view_test_h(i) == view_from_h(i) + value_type(1));
+  }
+}
+
+template <class ValueType>
+struct TransformFunctor {
+  KOKKOS_INLINE_FUNCTION
+  ValueType operator()(const ValueType& val) const {
+    return val + ValueType(1);
+  }
+};
+
+template <class Tag, class ValueType, class InfoType>
+void run_single_scenario(const InfoType& scenario_info) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // std::cout << "transform_unary_op: " << name << ", "
+  //           << view_tag_to_string(Tag{}) << ", "
+  //           << value_type_to_string(ValueType()) << std::endl;
+
+  auto view_from =
+      create_view<ValueType>(Tag{}, view_ext, "transform_uop_from");
+  fill_view(view_from);
+  TransformFunctor<ValueType> unOp;
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "transform_uop_dest");
+    auto r1 = KE::transform(exespace(), KE::begin(view_from),
+                            KE::end(view_from), KE::begin(view_dest), unOp);
+    verify_data(view_from, view_dest);
+    EXPECT_EQ(r1, KE::end(view_dest));
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "transform_uop_dest");
+    auto r1 = KE::transform("label", exespace(), KE::begin(view_from),
+                            KE::end(view_from), KE::begin(view_dest), unOp);
+    verify_data(view_from, view_dest);
+    EXPECT_EQ(r1, KE::end(view_dest));
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "transform_uop_dest");
+    auto r1 = KE::transform(exespace(), view_from, view_dest, unOp);
+    verify_data(view_from, view_dest);
+    EXPECT_EQ(r1, KE::end(view_dest));
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "transform_uop_dest");
+    auto r1 = KE::transform("label", exespace(), view_from, view_dest, unOp);
+    verify_data(view_from, view_dest);
+    EXPECT_EQ(r1, KE::end(view_dest));
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9},
+      {"small-b", 13},       {"medium", 1103},      {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+  }
+}
+
+TEST(std_algorithms_transform_ops_test, transform_unary_op) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+  run_all_scenarios<DynamicTag, double>();
+  run_all_scenarios<StridedThreeTag, double>();
+}
+
+}  // namespace TransformUnaryOp
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a13ba8240c73e822197b1871d5a4d80cf2068370
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp
@@ -0,0 +1,312 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+#include <algorithm>
+
+namespace Test {
+namespace stdalgos {
+namespace Unique {
+
+namespace KE = Kokkos::Experimental;
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(2, 7) { m_gen.seed(1034343); }
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+void fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+  }
+
+  else if (name == "small-a") {
+    v_h(0)  = static_cast<value_type>(0);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(3);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(2);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(4);
+    v_h(7)  = static_cast<value_type>(4);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(6);
+    v_h(10) = static_cast<value_type>(6);
+  }
+
+  else if (name == "small-b") {
+    v_h(0)  = static_cast<value_type>(1);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(3);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(4);
+    v_h(7)  = static_cast<value_type>(4);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(6);
+    v_h(10) = static_cast<value_type>(8);
+    v_h(11) = static_cast<value_type>(9);
+    v_h(12) = static_cast<value_type>(8);
+  }
+
+  else if (name == "medium-a") {
+    // beginning just contains increasing values
+    for (std::size_t i = 0; i < 1000; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+
+    // then use random
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 1000; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else if (name == "medium-b") {
+    for (std::size_t i = 0; i < 1000; ++i) {
+      v_h(i) = static_cast<value_type>(22);
+    }
+    for (std::size_t i = 1000; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(44);
+    }
+  }
+
+  else if (name == "large-a") {
+    // put equal elements at the end
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = static_cast<value_type>(i);
+    }
+    v_h(ext - 3) = static_cast<value_type>(44);
+    v_h(ext - 2) = static_cast<value_type>(44);
+    v_h(ext - 1) = static_cast<value_type>(44);
+  }
+
+  else if (name == "large-b") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+}
+
+template <class ResultIt, class ViewType1, class ViewType2, class... Args>
+void verify_data(const std::string& name, ResultIt my_result_it,
+                 ViewType1 view_test, ViewType2 data_v_h, Args... args) {
+  // run std unique on host data
+  auto std_r = std::unique(KE::begin(data_v_h), KE::end(data_v_h), args...);
+
+  //
+  // check the returned iterator is correct
+  //
+  const auto std_diff = (std::size_t)(std_r - KE::begin(data_v_h));
+  const auto my_diff  = (std::size_t)(my_result_it - KE::begin(view_test));
+  EXPECT_TRUE(my_diff == std_diff);
+
+  //
+  // check the data in the view
+  //
+  // Note that we need to stop after std_diff because
+  // after that values are unspecified, see std::unique
+  auto view_test_h = create_host_space_copy(view_test);
+  for (std::size_t i = 0; i < std_diff; ++i) {
+    // std::cout << "i = " << i
+    // 		<< " my  = " << view_test_h(i) << " "
+    // 		<< " std = " << data_v_h(i)
+    // 		<< '\n';
+    EXPECT_TRUE(view_test_h(i) == data_v_h(i));
+  }
+
+  if (name == "medium-b") {
+    using value_type = typename ViewType1::value_type;
+    EXPECT_TRUE(my_diff == (std::size_t)2);
+    EXPECT_TRUE(view_test_h(0) == (value_type)22);
+    EXPECT_TRUE(view_test_h(1) == (value_type)44);
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name) {
+  std::cout << "unique: default predicate: " << name << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << '\n';
+}
+
+template <class Tag, class ValueType, class Predicate>
+void print_scenario_details(const std::string& name, Predicate pred) {
+  (void)pred;
+  std::cout << "unique: custom  predicate: " << name << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << '\n';
+}
+
+template <class Tag, class ValueType, class InfoType, class... Args>
+void run_single_scenario(const InfoType& scenario_info, Args... args) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, args...);
+
+  auto test_view = create_view<ValueType>(Tag{}, view_ext, "unique_test_view");
+
+  {
+    fill_view(test_view, name);
+    // make host copy BEFORE running unique on it since unique modifies it
+    auto data_h = create_host_space_copy(test_view);
+
+    // run unique
+    auto rit = KE::unique(exespace(), KE::begin(test_view), KE::end(test_view),
+                          args...);
+    // verify
+    verify_data(name, rit, test_view, data_h, args...);
+  }
+
+  {
+    fill_view(test_view, name);
+    // make host copy BEFORE running unique on it since unique modifies it
+    auto data_h = create_host_space_copy(test_view);
+
+    // run unique
+    auto rit = KE::unique("label", exespace(), KE::begin(test_view),
+                          KE::end(test_view), args...);
+    // verify
+    verify_data(name, rit, test_view, data_h, args...);
+  }
+
+  {
+    fill_view(test_view, name);
+    // make host copy BEFORE running unique on it since unique modifies it
+    auto data_h = create_host_space_copy(test_view);
+
+    // run unique
+    auto rit = KE::unique(exespace(), test_view, args...);
+    // verify
+    verify_data(name, rit, test_view, data_h, args...);
+  }
+
+  {
+    fill_view(test_view, name);
+    // make host copy BEFORE running unique on it since unique modifies it
+    auto data_h = create_host_space_copy(test_view);
+
+    // run unique
+    auto rit = KE::unique("label", exespace(), test_view, args...);
+    // verify
+    verify_data(name, rit, test_view, data_h, args...);
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 11},
+      {"small-b", 13},       {"medium-a", 11103},   {"medium-b", 21103},
+      {"large-a", 101513},   {"large-b", 100111}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+
+    using func_t = IsEqualFunctor<ValueType>;
+    run_single_scenario<Tag, ValueType>(it, func_t());
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, unique) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace Unique
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..64f205b28e7c6db929054b63049c1f6588879032
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp
@@ -0,0 +1,361 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestStdAlgorithmsCommon.hpp>
+#include <std_algorithms/Kokkos_BeginEnd.hpp>
+#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
+#include <utility>
+
+namespace Test {
+namespace stdalgos {
+namespace UniqueCopy {
+
+namespace KE = Kokkos::Experimental;
+
+// impl is here for std because it is only avail from c++>=17
+template <class InputIterator, class OutputIterator, class BinaryPredicate>
+auto my_unique_copy(InputIterator first, InputIterator last,
+                    OutputIterator result, BinaryPredicate pred) {
+  if (first != last) {
+    typename OutputIterator::value_type t(*first);
+    *result = t;
+    ++result;
+    while (++first != last) {
+      if (!pred(t, *first)) {
+        t       = *first;
+        *result = t;
+        ++result;
+      }
+    }
+  }
+  return result;
+}
+
+template <class InputIterator, class OutputIterator>
+auto my_unique_copy(InputIterator first, InputIterator last,
+                    OutputIterator result) {
+  using value_type = typename OutputIterator::value_type;
+  using func_t     = IsEqualFunctor<value_type>;
+  return my_unique_copy(first, last, result, func_t());
+}
+
+template <class ValueType>
+struct UnifDist;
+
+template <>
+struct UnifDist<int> {
+  using dist_type = std::uniform_int_distribution<int>;
+  std::mt19937 m_gen;
+  dist_type m_dist;
+
+  UnifDist() : m_dist(2, 7) { m_gen.seed(1034343); }
+
+  int operator()() { return m_dist(m_gen); }
+};
+
+template <class ViewType>
+std::size_t fill_view(ViewType dest_view, const std::string& name) {
+  using value_type      = typename ViewType::value_type;
+  using exe_space       = typename ViewType::execution_space;
+  const std::size_t ext = dest_view.extent(0);
+  using aux_view_t      = Kokkos::View<value_type*, exe_space>;
+  aux_view_t aux_view("aux_view", ext);
+  auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view);
+
+  std::size_t count = 0;
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    v_h(0) = static_cast<value_type>(1);
+    count  = 1;
+  }
+
+  else if (name == "one-element-b") {
+    v_h(0) = static_cast<value_type>(2);
+    count  = 1;
+  }
+
+  else if (name == "two-elements-a") {
+    v_h(0) = static_cast<value_type>(1);
+    v_h(1) = static_cast<value_type>(2);
+    count  = 2;
+  }
+
+  else if (name == "two-elements-b") {
+    v_h(0) = static_cast<value_type>(2);
+    v_h(1) = static_cast<value_type>(-1);
+    count  = 2;
+  }
+
+  else if (name == "small-a") {
+    v_h(0)  = static_cast<value_type>(0);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(3);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(4);
+    v_h(7)  = static_cast<value_type>(4);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(6);
+    v_h(10) = static_cast<value_type>(6);
+    count   = 7;
+  }
+
+  else if (name == "small-b") {
+    v_h(0)  = static_cast<value_type>(1);
+    v_h(1)  = static_cast<value_type>(1);
+    v_h(2)  = static_cast<value_type>(1);
+    v_h(3)  = static_cast<value_type>(2);
+    v_h(4)  = static_cast<value_type>(3);
+    v_h(5)  = static_cast<value_type>(4);
+    v_h(6)  = static_cast<value_type>(4);
+    v_h(7)  = static_cast<value_type>(4);
+    v_h(8)  = static_cast<value_type>(5);
+    v_h(9)  = static_cast<value_type>(6);
+    v_h(10) = static_cast<value_type>(8);
+    v_h(11) = static_cast<value_type>(9);
+    v_h(12) = static_cast<value_type>(8);
+    count   = 9;
+  }
+
+  else if (name == "medium" || name == "large") {
+    UnifDist<value_type> randObj;
+    for (std::size_t i = 0; i < ext; ++i) {
+      v_h(i) = randObj();
+    }
+    std::vector<value_type> tmp(v_h.extent(0));
+    std::fill(tmp.begin(), tmp.end(), static_cast<value_type>(0));
+    using func_t = IsEqualFunctor<value_type>;
+    auto std_r =
+        my_unique_copy(KE::cbegin(v_h), KE::cend(v_h), tmp.begin(), func_t());
+    count = (std::size_t)(std_r - tmp.begin());
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+
+  Kokkos::deep_copy(aux_view, v_h);
+  CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view);
+  Kokkos::parallel_for("copy", dest_view.extent(0), F1);
+  return count;
+}
+
+template <class ViewTypeFrom, class ViewTypeTest, class... Args>
+void verify_data(const std::string& name, ViewTypeFrom view_from,
+                 ViewTypeTest view_test, Args... args) {
+  using value_type = typename ViewTypeTest::value_type;
+
+  //! always careful because views might not be deep copyable
+  auto view_test_dc = create_deep_copyable_compatible_clone(view_test);
+  auto view_test_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_test_dc);
+
+  auto view_from_dc = create_deep_copyable_compatible_clone(view_from);
+  auto view_from_h =
+      create_mirror_view_and_copy(Kokkos::HostSpace(), view_from_dc);
+
+  if (name == "empty") {
+    // no op
+  }
+
+  else if (name == "one-element-a") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(1));
+  }
+
+  else if (name == "one-element-b") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2));
+  }
+
+  else if (name == "two-elements-a") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(1));
+    EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(2));
+  }
+
+  else if (name == "two-elements-b") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2));
+    EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(-1));
+  }
+
+  else if (name == "small-a") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(1));
+    EXPECT_TRUE(view_test_h(2) == static_cast<value_type>(2));
+    EXPECT_TRUE(view_test_h(3) == static_cast<value_type>(3));
+    EXPECT_TRUE(view_test_h(4) == static_cast<value_type>(4));
+    EXPECT_TRUE(view_test_h(5) == static_cast<value_type>(5));
+    EXPECT_TRUE(view_test_h(6) == static_cast<value_type>(6));
+    EXPECT_TRUE(view_test_h(7) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(8) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(9) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(10) == static_cast<value_type>(0));
+  }
+
+  else if (name == "small-b") {
+    EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(1));
+    EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(2));
+    EXPECT_TRUE(view_test_h(2) == static_cast<value_type>(3));
+    EXPECT_TRUE(view_test_h(3) == static_cast<value_type>(4));
+    EXPECT_TRUE(view_test_h(4) == static_cast<value_type>(5));
+    EXPECT_TRUE(view_test_h(5) == static_cast<value_type>(6));
+    EXPECT_TRUE(view_test_h(6) == static_cast<value_type>(8));
+    EXPECT_TRUE(view_test_h(7) == static_cast<value_type>(9));
+    EXPECT_TRUE(view_test_h(8) == static_cast<value_type>(8));
+    EXPECT_TRUE(view_test_h(9) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(10) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(11) == static_cast<value_type>(0));
+    EXPECT_TRUE(view_test_h(12) == static_cast<value_type>(0));
+  }
+
+  else if (name == "medium" || name == "large") {
+    std::vector<value_type> tmp(view_test_h.extent(0));
+    std::fill(tmp.begin(), tmp.end(), static_cast<value_type>(0));
+
+    auto std_r = my_unique_copy(KE::cbegin(view_from_h), KE::cend(view_from_h),
+                                tmp.begin(), args...);
+    (void)std_r;
+
+    for (std::size_t i = 0; i < view_from_h.extent(0); ++i) {
+      EXPECT_TRUE(view_test_h(i) == tmp[i]);
+    }
+  }
+
+  else {
+    throw std::runtime_error("invalid choice");
+  }
+}
+
+std::string value_type_to_string(int) { return "int"; }
+std::string value_type_to_string(double) { return "double"; }
+
+template <class Tag, class ValueType>
+void print_scenario_details(const std::string& name) {
+  std::cout << "unique_copy: default predicate: " << name << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << '\n';
+}
+
+template <class Tag, class ValueType, class Predicate>
+void print_scenario_details(const std::string& name, Predicate pred) {
+  (void)pred;
+  std::cout << "unique_copy: custom  predicate: " << name << ", "
+            << view_tag_to_string(Tag{}) << " "
+            << value_type_to_string(ValueType()) << '\n';
+}
+
+template <class Tag, class ValueType, class InfoType, class... Args>
+void run_single_scenario(const InfoType& scenario_info, Args... args) {
+  const auto name            = std::get<0>(scenario_info);
+  const std::size_t view_ext = std::get<1>(scenario_info);
+  // print_scenario_details<Tag, ValueType>(name, args...);
+
+  auto view_from = create_view<ValueType>(Tag{}, view_ext, "unique_copy_from");
+  auto n         = fill_view(view_from, name);
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "unique_copy_dest");
+    auto rit =
+        KE::unique_copy(exespace(), KE::cbegin(view_from), KE::cend(view_from),
+                        KE::begin(view_dest), args...);
+    verify_data(name, view_from, view_dest, args...);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + n));
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "unique_copy_dest");
+    auto rit =
+        KE::unique_copy("label", exespace(), KE::cbegin(view_from),
+                        KE::cend(view_from), KE::begin(view_dest), args...);
+    verify_data(name, view_from, view_dest, args...);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + n));
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "unique_copy_dest");
+    auto rit = KE::unique_copy(exespace(), view_from, view_dest, args...);
+    verify_data(name, view_from, view_dest, args...);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + n));
+  }
+
+  {
+    auto view_dest =
+        create_view<ValueType>(Tag{}, view_ext, "unique_copy_dest");
+    auto rit =
+        KE::unique_copy("label", exespace(), view_from, view_dest, args...);
+    verify_data(name, view_from, view_dest, args...);
+    EXPECT_TRUE(rit == (KE::begin(view_dest) + n));
+  }
+
+  Kokkos::fence();
+}
+
+template <class Tag, class ValueType>
+void run_all_scenarios() {
+  const std::map<std::string, std::size_t> scenarios = {
+      {"empty", 0},          {"one-element-a", 1},  {"one-element-b", 1},
+      {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 11},
+      {"small-b", 13},       {"medium", 21103},     {"large", 101513}};
+
+  for (const auto& it : scenarios) {
+    run_single_scenario<Tag, ValueType>(it);
+
+    using func_t = IsEqualFunctor<ValueType>;
+    run_single_scenario<Tag, ValueType>(it, func_t());
+  }
+}
+
+TEST(std_algorithms_mod_seq_ops, unique_copy) {
+  run_all_scenarios<DynamicTag, int>();
+  run_all_scenarios<StridedThreeTag, int>();
+}
+
+}  // namespace UniqueCopy
+}  // namespace stdalgos
+}  // namespace Test
diff --git a/packages/kokkos/algorithms/unit_tests/TestStdReducers.cpp b/packages/kokkos/algorithms/unit_tests/TestStdReducers.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3ed2ecd839b4d2fa9088f079b98e767976bd8781
--- /dev/null
+++ b/packages/kokkos/algorithms/unit_tests/TestStdReducers.cpp
@@ -0,0 +1,281 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <Kokkos_Core.hpp>
+#include <gtest/gtest.h>
+
+// purpose of this test is to check that the reducers used
+// to implement some std algorithms work independently of the order
+
+namespace Test {
+
+enum class StdReducersTestEnumOrder { LeftToRight, RightToLeft, Random };
+
+std::string order_to_string(StdReducersTestEnumOrder value) {
+  switch (value) {
+    case StdReducersTestEnumOrder::LeftToRight: return "LeftToRight";
+    case StdReducersTestEnumOrder::RightToLeft: return "RightToLeft";
+    case StdReducersTestEnumOrder::Random: return "Random";
+  }
+  return {};
+}
+
+auto create_host_view_with_reduction_order_indices(
+    std::size_t extent, StdReducersTestEnumOrder enum_value) {
+  using view_t = Kokkos::View<int*, Kokkos::HostSpace>;
+  view_t result("v", extent);
+
+  if (enum_value == StdReducersTestEnumOrder::LeftToRight) {
+    result(0) = 0;
+    result(1) = 1;
+    result(2) = 2;
+    result(3) = 3;
+    result(4) = 4;
+    result(5) = 5;
+    result(6) = 6;
+    result(7) = 7;
+    result(8) = 8;
+    result(9) = 9;
+  } else if (enum_value == StdReducersTestEnumOrder::RightToLeft) {
+    result(0) = 9;
+    result(1) = 8;
+    result(2) = 7;
+    result(3) = 6;
+    result(4) = 5;
+    result(5) = 4;
+    result(6) = 3;
+    result(7) = 2;
+    result(8) = 1;
+    result(9) = 0;
+  } else if (enum_value == StdReducersTestEnumOrder::Random) {
+    result(0) = 0;
+    result(1) = 8;
+    result(2) = 3;
+    result(3) = 2;
+    result(4) = 9;
+    result(5) = 4;
+    result(6) = 6;
+    result(7) = 1;
+    result(8) = 7;
+    result(9) = 5;
+  } else {
+    throw std::runtime_error("test: Invalid enum");
+  }
+
+  return result;
+}
+
+template <int flag, class ExeSpace, class IndexType, class ViewType>
+auto run_min_or_max_test(ViewType view, StdReducersTestEnumOrder enValue) {
+  static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
+                "test is only enabled for HostSpace");
+
+  std::cout << "checking reduction with order: " << order_to_string(enValue)
+            << "\n";
+
+  using view_value_type = typename ViewType::value_type;
+  using reducer_type    = typename std::conditional<
+      (flag == 0), Kokkos::MaxFirstLoc<view_value_type, IndexType, ExeSpace>,
+      Kokkos::MinFirstLoc<view_value_type, IndexType, ExeSpace> >::type;
+  using reduction_value_type = typename reducer_type::value_type;
+
+  reduction_value_type red_result;
+  reducer_type reducer(red_result);
+  EXPECT_TRUE(reducer.references_scalar());
+  reducer.init(red_result);
+
+  auto red_order =
+      create_host_view_with_reduction_order_indices(view.extent(0), enValue);
+  for (std::size_t i = 0; i < view.extent(0); ++i) {
+    const auto index = red_order(i);
+    reducer.join(red_result, reduction_value_type{view(index), index});
+  }
+
+  using return_type = Kokkos::pair<view_value_type, IndexType>;
+  return return_type{red_result.val, red_result.loc};
+}
+
+TEST(std_algorithms_reducers, max_first_loc) {
+  using hostspace = Kokkos::HostSpace;
+
+  using view_t                 = Kokkos::View<double*, hostspace>;
+  constexpr std::size_t extent = 10;
+  view_t view_h("v", extent);
+  view_h(0) = 0.;
+  view_h(1) = 0.;
+  view_h(2) = 0.;
+  view_h(3) = 2.;
+  view_h(4) = 2.;
+  view_h(5) = 1.;
+  view_h(6) = 1.;
+  view_h(7) = 1.;
+  view_h(8) = 1.;
+  view_h(9) = 0.;
+
+  using index_type                 = int;
+  using view_value_type            = typename view_t::value_type;
+  const view_value_type gold_value = 2.;
+  const index_type gold_location   = 3;
+
+  const auto pair1 = run_min_or_max_test<0, hostspace, index_type>(
+      view_h, StdReducersTestEnumOrder::LeftToRight);
+  ASSERT_EQ(pair1.first, gold_value);
+  ASSERT_EQ(pair1.second, gold_location);
+
+  const auto pair2 = run_min_or_max_test<0, hostspace, index_type>(
+      view_h, StdReducersTestEnumOrder::RightToLeft);
+  ASSERT_EQ(pair2.first, gold_value);
+  ASSERT_EQ(pair2.second, gold_location);
+
+  const auto pair3 = run_min_or_max_test<0, hostspace, index_type>(
+      view_h, StdReducersTestEnumOrder::Random);
+  ASSERT_EQ(pair3.first, gold_value);
+  ASSERT_EQ(pair3.second, gold_location);
+}
+
+TEST(std_algorithms_reducers, min_first_loc) {
+  using hostspace = Kokkos::HostSpace;
+
+  using view_t                 = Kokkos::View<double*, hostspace>;
+  constexpr std::size_t extent = 10;
+  view_t view_h("v", extent);
+  view_h(0) = 0.;
+  view_h(1) = 0.;
+  view_h(2) = 0.;
+  view_h(3) = 2.;
+  view_h(4) = 2.;
+  view_h(5) = -1.;
+  view_h(6) = -1.;
+  view_h(7) = 1.;
+  view_h(8) = 1.;
+  view_h(9) = 0.;
+
+  using index_type                 = int;
+  using view_value_type            = typename view_t::value_type;
+  const view_value_type gold_value = -1.;
+  const index_type gold_location   = 5;
+
+  const auto pair1 = run_min_or_max_test<1, hostspace, index_type>(
+      view_h, StdReducersTestEnumOrder::LeftToRight);
+  ASSERT_EQ(pair1.first, gold_value);
+  ASSERT_EQ(pair1.second, gold_location);
+
+  const auto pair2 = run_min_or_max_test<1, hostspace, index_type>(
+      view_h, StdReducersTestEnumOrder::RightToLeft);
+  ASSERT_EQ(pair2.first, gold_value);
+  ASSERT_EQ(pair2.second, gold_location);
+
+  const auto pair3 = run_min_or_max_test<1, hostspace, index_type>(
+      view_h, StdReducersTestEnumOrder::Random);
+  ASSERT_EQ(pair3.first, gold_value);
+  ASSERT_EQ(pair3.second, gold_location);
+}
+
+template <class ExeSpace, class IndexType, class ViewType, class ValuesPair,
+          class IndexPair>
+void run_min_max_test(ViewType view, StdReducersTestEnumOrder enValue,
+                      const ValuesPair gold_values, const IndexPair gold_locs) {
+  static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
+                "test is only enabled for HostSpace");
+
+  std::cout << "checking reduction with order: " << order_to_string(enValue)
+            << "\n";
+
+  using view_value_type = typename ViewType::value_type;
+  using reducer_type =
+      Kokkos::MinMaxFirstLastLoc<view_value_type, IndexType, ExeSpace>;
+  using reduction_value_type = typename reducer_type::value_type;
+
+  reduction_value_type red_result;
+  reducer_type reducer(red_result);
+  EXPECT_TRUE(reducer.references_scalar());
+  reducer.init(red_result);
+
+  auto red_order =
+      create_host_view_with_reduction_order_indices(view.extent(0), enValue);
+  for (std::size_t i = 0; i < view.extent(0); ++i) {
+    const auto index = red_order(i);
+    reducer.join(red_result,
+                 reduction_value_type{view(index), view(index), index, index});
+  }
+
+  ASSERT_EQ(red_result.min_val, gold_values.first);
+  ASSERT_EQ(red_result.max_val, gold_values.second);
+  ASSERT_EQ(red_result.min_loc, gold_locs.first);
+  ASSERT_EQ(red_result.max_loc, gold_locs.second);
+}
+
+TEST(std_algorithms_reducers, min_max_first_last_loc) {
+  using hostspace = Kokkos::HostSpace;
+
+  using view_t                 = Kokkos::View<double*, hostspace>;
+  constexpr std::size_t extent = 10;
+  view_t view_h("v", extent);
+  view_h(0) = 0.;
+  view_h(1) = 0.;
+  view_h(2) = 0.;
+  view_h(3) = 2.;
+  view_h(4) = 2.;
+  view_h(5) = -1.;
+  view_h(6) = 1.;
+  view_h(7) = -1.;
+  view_h(8) = 2.;
+  view_h(9) = 0.;
+
+  using index_type      = int;
+  using view_value_type = typename view_t::value_type;
+  Kokkos::pair<view_value_type, view_value_type> gold_values = {-1., 2.};
+  Kokkos::pair<index_type, index_type> gold_indices          = {5, 8};
+
+  run_min_max_test<hostspace, index_type>(
+      view_h, StdReducersTestEnumOrder::LeftToRight, gold_values, gold_indices);
+
+  run_min_max_test<hostspace, index_type>(
+      view_h, StdReducersTestEnumOrder::RightToLeft, gold_values, gold_indices);
+
+  run_min_max_test<hostspace, index_type>(
+      view_h, StdReducersTestEnumOrder::Random, gold_values, gold_indices);
+}
+
+}  // namespace Test
diff --git a/packages/kokkos/benchmarks/atomic/main.cpp b/packages/kokkos/benchmarks/atomic/main.cpp
index cc0d3e41e85aaa7483d11dbf639cc5a9d5809a47..6e32c6fe641a1c6f2c78c3bc6a5755149aa3ade2 100644
--- a/packages/kokkos/benchmarks/atomic/main.cpp
+++ b/packages/kokkos/benchmarks/atomic/main.cpp
@@ -4,7 +4,7 @@
 
 template <class Scalar>
 double test_atomic(int L, int N, int M, int K, int R,
-                   Kokkos::View<const int*> offsets) {
+                   Kokkos::View<const int**> offsets) {
   Kokkos::View<Scalar*> output("Output", N);
   Kokkos::Timer timer;
 
@@ -26,7 +26,7 @@ double test_atomic(int L, int N, int M, int K, int R,
 
 template <class Scalar>
 double test_no_atomic(int L, int N, int M, int K, int R,
-                      Kokkos::View<const int*> offsets) {
+                      Kokkos::View<const int**> offsets) {
   Kokkos::View<Scalar*> output("Output", N);
   Kokkos::Timer timer;
   for (int r = 0; r < R; r++)
@@ -77,7 +77,7 @@ int main(int argc, char* argv[]) {
     int R    = std::stoi(argv[6]);
     int type = std::stoi(argv[7]);
 
-    Kokkos::View<int*> offsets("Offsets", L, M);
+    Kokkos::View<int**> offsets("Offsets", L, M);
     Kokkos::Random_XorShift64_Pool<> pool(12371);
     Kokkos::fill_random(offsets, pool, D);
     double time = 0;
diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_double.cpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_double.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fb852377741a70c92e3f24baaf698431fa536fb7
--- /dev/null
+++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_double.cpp
@@ -0,0 +1,48 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <bench.hpp>
+
+template void run_stride_unroll<double>(int N, int K, int R, int D, int U,
+                                        int F, int T, int S);
diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_float.cpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_float.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1df7a78c2ab1b7fd2fc381bffe7d2c5f16f06c31
--- /dev/null
+++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_float.cpp
@@ -0,0 +1,48 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <bench.hpp>
+
+template void run_stride_unroll<float>(int N, int K, int R, int D, int U, int F,
+                                       int T, int S);
diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..35fe4db376984871dca341ec585378b65339d500
--- /dev/null
+++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp
@@ -0,0 +1,48 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <bench.hpp>
+
+template void run_stride_unroll<int32_t>(int N, int K, int R, int D, int U,
+                                         int F, int T, int S);
diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3ce15027b7ddf99bca3a3c290a4c9aca64295417
--- /dev/null
+++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp
@@ -0,0 +1,48 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <bench.hpp>
+
+template void run_stride_unroll<int64_t>(int N, int K, int R, int D, int U,
+                                         int F, int T, int S);
diff --git a/packages/kokkos/benchmarks/bytes_and_flops/main.cpp b/packages/kokkos/benchmarks/bytes_and_flops/main.cpp
index 75f30a340938378fe85f72b4dd294235594cf21d..f05c7d9f514dd73d7e0326d1a25dac7fe78aac5a 100644
--- a/packages/kokkos/benchmarks/bytes_and_flops/main.cpp
+++ b/packages/kokkos/benchmarks/bytes_and_flops/main.cpp
@@ -47,12 +47,21 @@
 #include <bench.hpp>
 #include <cstdlib>
 
+extern template void run_stride_unroll<float>(int, int, int, int, int, int, int,
+                                              int);
+extern template void run_stride_unroll<double>(int, int, int, int, int, int,
+                                               int, int);
+extern template void run_stride_unroll<int32_t>(int, int, int, int, int, int,
+                                                int, int);
+extern template void run_stride_unroll<int64_t>(int, int, int, int, int, int,
+                                                int, int);
+
 int main(int argc, char* argv[]) {
   Kokkos::initialize();
 
   if (argc < 10) {
     printf("Arguments: N K R D U F T S\n");
-    printf("  P:   Precision (1==float, 2==double)\n");
+    printf("  P:   Precision (1==float, 2==double, 3==int32_t, 4==int64_t)\n");
     printf("  N,K: dimensions of the 2D array to allocate\n");
     printf("  R:   how often to loop through the K dimension with each team\n");
     printf("  D:   distance between loaded elements (stride)\n");
@@ -91,8 +100,8 @@ int main(int argc, char* argv[]) {
     printf("D must be one of 1,2,4,8,16,32\n");
     return 0;
   }
-  if ((P != 1) && (P != 2)) {
-    printf("P must be one of 1,2\n");
+  if ((P < 1) && (P > 2)) {
+    printf("P must be one of 1,2,3,4\n");
     return 0;
   }
 
@@ -102,6 +111,12 @@ int main(int argc, char* argv[]) {
   if (P == 2) {
     run_stride_unroll<double>(N, K, R, D, U, F, T, S);
   }
+  if (P == 3) {
+    run_stride_unroll<int32_t>(N, K, R, D, U, F, T, S);
+  }
+  if (P == 4) {
+    run_stride_unroll<int64_t>(N, K, R, D, U, F, T, S);
+  }
 
   Kokkos::finalize();
 }
diff --git a/packages/kokkos/bin/hpcbind b/packages/kokkos/bin/hpcbind
index 43f8a745da27c080ce54ac4cfd9b9358f618554f..9da3d994698ebe6554cc21222e153098a8d39d83 100755
--- a/packages/kokkos/bin/hpcbind
+++ b/packages/kokkos/bin/hpcbind
@@ -135,7 +135,7 @@ function show_help {
   echo "                        P.hpcbind.N, P.stdout.N and P.stderr.N where P is "
   echo "                        the prefix and N is the rank (no spaces)"
   echo "  --output-mode=<Op>    How console output should be handled."
-  echo "                        Options are all, rank0, and none.  Default: rank0" 
+  echo "                        Options are all, rank0, and none.  Default: rank0"
   echo "  --lstopo              Show bindings in lstopo"
   echo "  --save-topology=<Xml>  Save the topology to the given xml file"
   echo "  --load-topology=<Xml>  Load a previously saved topology from an xml file"
@@ -636,13 +636,13 @@ elif [[ ${HPCBIND_HAS_COMMAND} -eq 1 ]]; then
     if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
       hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- "$@" > ${HPCBIND_OUT} 2> ${HPCBIND_ERR}
     else
-      eval "$@" > ${HPCBIND_OUT} 2> ${HPCBIND_ERR}
+      "$@" > ${HPCBIND_OUT} 2> ${HPCBIND_ERR}
     fi
   else
     if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
       hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- "$@" > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2)
     else
-      eval "$@" > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2)
+      "$@" > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2)
     fi
   fi
 fi
diff --git a/packages/kokkos/bin/kokkos_launch_compiler b/packages/kokkos/bin/kokkos_launch_compiler
index d929d24f1dca42fc277940ffb27f54d374e89cd1..37c17956a1b46830172dc56c7dc68bf6dd50014a 100755
--- a/packages/kokkos/bin/kokkos_launch_compiler
+++ b/packages/kokkos/bin/kokkos_launch_compiler
@@ -83,9 +83,9 @@ shift
 # results in this command being executed:
 #       ${KOKKOS_COMPILER} -c file.cpp -o file.o
 if [[ "${KOKKOS_DEPENDENCE}" -eq "0" || "${CXX_COMPILER}" != "${1}" ]]; then
-    debug-message $@
+    debug-message "$@"
     # the command does not depend on Kokkos so just execute the command w/o re-directing to ${KOKKOS_COMPILER}
-    eval $@
+    exec "$@"
 else
     # the executable is the C++ compiler, so we need to re-direct to ${KOKKOS_COMPILER}
     if [ ! -f "${KOKKOS_COMPILER}" ]; then
@@ -115,7 +115,7 @@ else
     # discard the compiler from the command
     shift
 
-    debug-message ${KOKKOS_COMPILER} $@
+    debug-message ${KOKKOS_COMPILER} "$@"
     # execute ${KOKKOS_COMPILER} (again, usually nvcc_wrapper)
-    ${KOKKOS_COMPILER} $@
+    ${KOKKOS_COMPILER} "$@"
 fi
diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper
index ba2c55508aca0c139853ce7107c53f8a405ec9c3..8c168412e792459bed03478e950e88ca1aa10b47 100755
--- a/packages/kokkos/bin/nvcc_wrapper
+++ b/packages/kokkos/bin/nvcc_wrapper
@@ -99,7 +99,7 @@ first_xcompiler_arg=1
 if [[ -z ${NVCC_WRAPPER_TMPDIR+x} ]]; then
   temp_dir=${TMPDIR:-/tmp}
 else
-  temp_dir=${NVCC_WRAPPER_TMPDIR+x}
+  temp_dir=${NVCC_WRAPPER_TMPDIR}
 fi
 
 # optimization flag added as a command-line argument
@@ -149,11 +149,16 @@ do
   *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
     cpp_files="$cpp_files $1"
     ;;
-   # Ensure we only have one optimization flag because NVCC doesn't allow muliple
+   # Ensure we only have one optimization flag because NVCC doesn't allow multiple
   -O*)
     if [ -n "$optimization_flag" ]; then
-       echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the last is used because nvcc can only accept a single optimization setting."
-       shared_args=${shared_args/ $optimization_flag/}
+        if [ "$1" = "$optimization_flag" ]; then
+            # Silently consume duplicates of the same argument
+            shift
+            continue
+        fi
+        echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the last is used because nvcc can only accept a single optimization setting."
+        shared_args=${shared_args/ $optimization_flag/}
     fi
     if [ "$1" = "-O" ]; then
       optimization_flag="-O2"
@@ -222,7 +227,7 @@ do
     fi
     ;;
   #Handle known nvcc args
-  --dryrun|--verbose|--keep|--keep-dir*|-G|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
+  --dryrun|--verbose|--keep|--keep-dir*|-G|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
     cuda_args="$cuda_args $1"
     ;;
   #Handle more known nvcc args
@@ -230,10 +235,10 @@ do
     cuda_args="$cuda_args $1"
     ;;
   #Handle known nvcc args that have an argument
-  -maxrregcount=*|--maxrregcount=*|-time=*)
+  -maxrregcount=*|--maxrregcount=*|-time=*|-Xptxas=*)
     cuda_args="$cuda_args $1"
     ;;
-  -maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include|-time)
+  -maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include|-time|-Xptxas)
     cuda_args="$cuda_args $1 $2"
     shift
     ;;
diff --git a/packages/kokkos/cmake/KokkosConfig.cmake.in b/packages/kokkos/cmake/KokkosConfig.cmake.in
index 44a8fcd9c319326399ab19146f8cf213dbb51f64..e26c75b31224889d6d5806f8624d97933b6acf8e 100644
--- a/packages/kokkos/cmake/KokkosConfig.cmake.in
+++ b/packages/kokkos/cmake/KokkosConfig.cmake.in
@@ -60,3 +60,5 @@ ELSEIF(@Kokkos_ENABLE_CUDA@ AND NOT "separable_compilation" IN_LIST Kokkos_FIND_
     # be mindful of the environment, pollution is bad
     UNSET(IS_NVCC)
 ENDIF()
+
+set(Kokkos_COMPILE_LANGUAGE @KOKKOS_COMPILE_LANGUAGE@)
diff --git a/packages/kokkos/cmake/KokkosConfigCommon.cmake.in b/packages/kokkos/cmake/KokkosConfigCommon.cmake.in
index ab93e65afe97ab9be9295312e6cd879a1aff6b27..6788e77ade83368cfbb55a831420ea714361b5b2 100644
--- a/packages/kokkos/cmake/KokkosConfigCommon.cmake.in
+++ b/packages/kokkos/cmake/KokkosConfigCommon.cmake.in
@@ -13,6 +13,10 @@ ENDFOREACH()
 IF(NOT Kokkos_FIND_QUIETLY)
   MESSAGE(STATUS "Enabled Kokkos devices: ${Kokkos_DEVICES}")
 ENDIF()
+IF(Kokkos_ENABLE_THREADS AND NOT DEPRECATED_CODE_3 IN_LIST Kokkos_OPTIONS)  # for backward compatibility
+  SET(Kokkos_ENABLE_PTHREAD ON)
+  LIST(APPEND Kokkos_DEVICES PTHREAD)
+ENDIF()
 
 IF (Kokkos_ENABLE_CUDA)
   # If we are building CUDA, we have tricked CMake because we declare a CXX project
diff --git a/packages/kokkos/cmake/KokkosCore_config.h.in b/packages/kokkos/cmake/KokkosCore_config.h.in
index 07baa0a5f09a708d344a9ef72510baa6f4b8e15b..f3fd9f6d7aa127693a934e5230d5a346b7b8ef1a 100644
--- a/packages/kokkos/cmake/KokkosCore_config.h.in
+++ b/packages/kokkos/cmake/KokkosCore_config.h.in
@@ -43,6 +43,7 @@
 #cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
 #cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
 #cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
+#cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
 #cmakedefine KOKKOS_ENABLE_HPX_ASYNC_DISPATCH
 #cmakedefine KOKKOS_ENABLE_DEBUG
 #cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
@@ -112,5 +113,8 @@
 #cmakedefine KOKKOS_ARCH_AMD_ZEN
 #cmakedefine KOKKOS_ARCH_AMD_ZEN2
 #cmakedefine KOKKOS_ARCH_AMD_ZEN3
-
-#cmakedefine KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF
+#cmakedefine KOKKOS_ARCH_VEGA
+#cmakedefine KOKKOS_ARCH_VEGA900
+#cmakedefine KOKKOS_ARCH_VEGA906
+#cmakedefine KOKKOS_ARCH_VEGA908
+#cmakedefine KOKKOS_ARCH_VEGA90A
diff --git a/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake b/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake
index 0c825c59e04248f2cd76d5faf9c6aa16a663bbb1..792c92c07e9d2e121ac1504a658057fcd685d109 100644
--- a/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake
+++ b/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake
@@ -29,7 +29,7 @@ ELSE()
 ENDIF()
 
 include(FindPackageHandleStandardArgs)
-IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL PGI)
+IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC)
   SET(KOKKOS_CUDA_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
 ELSE()
   SET(KOKKOS_CUDA_ERROR DEFAULT_MSG)
diff --git a/packages/kokkos/cmake/Modules/FindTPLHPX.cmake b/packages/kokkos/cmake/Modules/FindTPLHPX.cmake
index c8b3bc4c9b84505eceff8ba3453501f9bb5d1e01..5636a9bb66b114dab18415da08065777251e9ee0 100644
--- a/packages/kokkos/cmake/Modules/FindTPLHPX.cmake
+++ b/packages/kokkos/cmake/Modules/FindTPLHPX.cmake
@@ -1,5 +1,5 @@
 
-FIND_PACKAGE(HPX REQUIRED)
+FIND_PACKAGE(HPX REQUIRED 1.7.0)
 #as of right now, HPX doesn't export correctly
 #so let's convert it to an interface target
 KOKKOS_CREATE_IMPORTED_TPL(HPX INTERFACE
diff --git a/packages/kokkos/cmake/Modules/FindTPLPTHREAD.cmake b/packages/kokkos/cmake/Modules/FindTPLPTHREAD.cmake
deleted file mode 100644
index a743fca0e45290cf7ad80e3b022e7f66a34947fa..0000000000000000000000000000000000000000
--- a/packages/kokkos/cmake/Modules/FindTPLPTHREAD.cmake
+++ /dev/null
@@ -1,20 +0,0 @@
-
-TRY_COMPILE(KOKKOS_HAS_PTHREAD_ARG
-  ${KOKKOS_TOP_BUILD_DIR}/tpl_tests
-  ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/pthread.cpp
-  LINK_LIBRARIES -pthread
-  COMPILE_DEFINITIONS -pthread
-)
-# The test no longer requires C++11
-# if we did needed C++ standard support, then we should add option
-# ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION}
-
-INCLUDE(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLPTHREAD DEFAULT_MSG KOKKOS_HAS_PTHREAD_ARG)
-#Only create the TPL if we succeed
-IF (KOKKOS_HAS_PTHREAD_ARG)
-  KOKKOS_CREATE_IMPORTED_TPL(PTHREAD
-    INTERFACE   #this is not a real library with a real location
-    COMPILE_OPTIONS -pthread
-    LINK_OPTIONS    -pthread)
-ENDIF()
diff --git a/packages/kokkos/cmake/Modules/FindTPLTHREADS.cmake b/packages/kokkos/cmake/Modules/FindTPLTHREADS.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..2d140c85c43caed2103e6da64769d282e33f20a7
--- /dev/null
+++ b/packages/kokkos/cmake/Modules/FindTPLTHREADS.cmake
@@ -0,0 +1,15 @@
+INCLUDE(FindPackageHandleStandardArgs)
+INCLUDE("${CMAKE_SOURCE_DIR}/cmake/tpls/FindTPLPthread.cmake")
+
+IF (TARGET Threads::Threads)
+  SET(FOUND_THREADS TRUE)
+ELSE()
+  SET(FOUND_THREADS FALSE)
+ENDIF()
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLTHREADS DEFAULT_MSG FOUND_THREADS)
+#Only create the TPL if we succeed
+IF (FOUND_THREADS)
+  KOKKOS_CREATE_IMPORTED_TPL(THREADS INTERFACE LINK_OPTIONS
+          ${TPL_Pthread_LIBRARIES})
+ENDIF()
diff --git a/packages/kokkos/cmake/compile_tests/cuda_compute_capability.cc b/packages/kokkos/cmake/compile_tests/cuda_compute_capability.cc
index a26ac5af4bf2dee2c26f1ee20c6c500fe465bf9f..f56cef16510df4e1b49f13765b4306d2c2fe9420 100644
--- a/packages/kokkos/cmake/compile_tests/cuda_compute_capability.cc
+++ b/packages/kokkos/cmake/compile_tests/cuda_compute_capability.cc
@@ -43,6 +43,7 @@
 */
 
 #include <iostream>
+#include <cuda_runtime_api.h>
 
 int main() {
   cudaDeviceProp device_properties;
diff --git a/packages/kokkos/cmake/kokkos_arch.cmake b/packages/kokkos/cmake/kokkos_arch.cmake
index c4637339f31fa47e80d9b53a1435b3ddc7641573..a8b5b68478ecc1e8c32410340b809bd55e0a06f7 100644
--- a/packages/kokkos/cmake/kokkos_arch.cmake
+++ b/packages/kokkos/cmake/kokkos_arch.cmake
@@ -67,7 +67,7 @@ KOKKOS_ARCH_OPTION(ZEN3            HOST "AMD Zen3 architecture")
 KOKKOS_ARCH_OPTION(VEGA900         GPU  "AMD GPU MI25 GFX900")
 KOKKOS_ARCH_OPTION(VEGA906         GPU  "AMD GPU MI50/MI60 GFX906")
 KOKKOS_ARCH_OPTION(VEGA908         GPU  "AMD GPU MI100 GFX908")
-KOKKOS_ARCH_OPTION(VEGA90A         GPU  "" )
+KOKKOS_ARCH_OPTION(VEGA90A         GPU  "AMD GPU MI200 GFX90A")
 KOKKOS_ARCH_OPTION(INTEL_GEN       GPU  "Intel GPUs Gen9+")
 KOKKOS_ARCH_OPTION(INTEL_DG1       GPU  "Intel Iris XeMAX GPU")
 KOKKOS_ARCH_OPTION(INTEL_GEN9      GPU  "Intel GPU Gen9")
@@ -92,8 +92,15 @@ IF(KOKKOS_ENABLE_COMPILER_WARNINGS)
     LIST(REMOVE_ITEM COMMON_WARNINGS "-Wsign-compare")
   ENDIF()
 
+  IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
+    LIST(APPEND COMMON_WARNINGS "-Wimplicit-fallthrough")
+  ENDIF()
+
   SET(GNU_WARNINGS "-Wempty-body" "-Wclobbered" "-Wignored-qualifiers"
     ${COMMON_WARNINGS})
+  IF(KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7)
+    LIST(APPEND GNU_WARNINGS "-Wimplicit-fallthrough")
+  ENDIF()
 
   COMPILER_SPECIFIC_FLAGS(
     COMPILER_ID CMAKE_CXX_COMPILER_ID
@@ -211,7 +218,7 @@ IF (KOKKOS_ARCH_A64FX)
     NVHPC   NO-VALUE-SPECIFIED
     DEFAULT -march=armv8.2-a+sve
     Clang   -march=armv8.2-a+sve -msve-vector-bits=512
-    GCC     -march=armv8.2-a+sve -msve-vector-bits=512
+    GNU     -march=armv8.2-a+sve -msve-vector-bits=512
   )
 ENDIF()
 
@@ -388,7 +395,7 @@ ENDIF()
 IF (KOKKOS_ENABLE_HIP)
   IF (KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE)
     COMPILER_SPECIFIC_FLAGS(
-      DEFAULT -fgpu-rdc
+      DEFAULT -fgpu-rdc -DDESUL_HIP_RDC
     )
   ELSE()
     COMPILER_SPECIFIC_FLAGS(
@@ -399,7 +406,7 @@ ENDIF()
 
 IF (KOKKOS_ENABLE_SYCL)
   COMPILER_SPECIFIC_FLAGS(
-    DEFAULT -fsycl -fno-sycl-id-queries-fit-in-int
+    DEFAULT -fsycl -fno-sycl-id-queries-fit-in-int -fsycl-dead-args-optimization
   )
   COMPILER_SPECIFIC_OPTIONS(
     DEFAULT -fsycl-unnamed-lambda
@@ -419,9 +426,14 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
       UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE)
     ELSE()
       SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE)
-      GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
-      IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
-        GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
+      IF(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
+        string(REPLACE "sm_" "" CMAKE_ARCH ${FLAG})
+        SET(CMAKE_CUDA_ARCHITECTURES ${CMAKE_ARCH} PARENT_SCOPE)
+      ELSE()
+        GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
+        IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
+          GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
+        ENDIF()
       ENDIF()
     ENDIF()
   ENDIF()
@@ -473,8 +485,8 @@ ENDFUNCTION()
 #to the corresponding flag name if ON
 CHECK_AMDGPU_ARCH(VEGA900 gfx900) # Radeon Instinct MI25
 CHECK_AMDGPU_ARCH(VEGA906 gfx906) # Radeon Instinct MI50 and MI60
-CHECK_AMDGPU_ARCH(VEGA908 gfx908)
-CHECK_AMDGPU_ARCH(VEGA90A gfx90a)
+CHECK_AMDGPU_ARCH(VEGA908 gfx908) # Radeon Instinct MI100
+CHECK_AMDGPU_ARCH(VEGA90A gfx90a) # Radeon Instinct MI200
 
 IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
   IF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
@@ -545,36 +557,34 @@ IF (KOKKOS_ENABLE_SYCL)
   IF(CUDA_ARCH_ALREADY_SPECIFIED)
     IF(KOKKOS_ENABLE_UNSUPPORTED_ARCHS)
       COMPILER_SPECIFIC_FLAGS(
-        DEFAULT -fsycl-targets=nvptx64-nvidia-cuda-sycldevice
+        DEFAULT -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend "${CUDA_ARCH_FLAG}=${KOKKOS_CUDA_ARCH_FLAG}"
       )
-      # FIXME_SYCL The CUDA backend doesn't support printf yet.
-      GLOBAL_SET(KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF ON)
     ELSE()
       MESSAGE(SEND_ERROR "Setting a CUDA architecture for SYCL is only allowed with Kokkos_ENABLE_UNSUPPORTED_ARCHS=ON!")
     ENDIF()
   ELSEIF(KOKKOS_ARCH_INTEL_GEN)
     COMPILER_SPECIFIC_FLAGS(
-      DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9-"
+      DEFAULT -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9-"
     )
   ELSEIF(KOKKOS_ARCH_INTEL_GEN9)
     COMPILER_SPECIFIC_FLAGS(
-      DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9"
+      DEFAULT -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9"
     )
   ELSEIF(KOKKOS_ARCH_INTEL_GEN11)
     COMPILER_SPECIFIC_FLAGS(
-      DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen11"
+      DEFAULT -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen11"
     )
   ELSEIF(KOKKOS_ARCH_INTEL_GEN12LP)
     COMPILER_SPECIFIC_FLAGS(
-      DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen12lp"
+      DEFAULT -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen12lp"
     )
   ELSEIF(KOKKOS_ARCH_INTEL_DG1)
     COMPILER_SPECIFIC_FLAGS(
-      DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device dg1"
+      DEFAULT -fsycl-targets=spir64_gen -Xsycl-target-backend "-device dg1"
     )
   ELSEIF(KOKKOS_ARCH_INTEL_XEHP)
     COMPILER_SPECIFIC_FLAGS(
-      DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device xehp"
+      DEFAULT -fsycl-targets=spir64_gen -Xsycl-target-backend "-device xehp"
     )
   ENDIF()
 ENDIF()
@@ -644,30 +654,33 @@ IF(KOKKOS_ENABLE_CUDA AND NOT CUDA_ARCH_ALREADY_SPECIFIED)
   ENDIF()
 ENDIF()
 
-IF (KOKKOS_ENABLE_CUDA)
- #Regardless of version, make sure we define the general architecture name
- IF (KOKKOS_ARCH_KEPLER30 OR KOKKOS_ARCH_KEPLER32 OR KOKKOS_ARCH_KEPLER35 OR KOKKOS_ARCH_KEPLER37)
-   SET(KOKKOS_ARCH_KEPLER ON)
- ENDIF()
+#Regardless of version, make sure we define the general architecture name
+IF (KOKKOS_ARCH_KEPLER30 OR KOKKOS_ARCH_KEPLER32 OR KOKKOS_ARCH_KEPLER35 OR KOKKOS_ARCH_KEPLER37)
+  SET(KOKKOS_ARCH_KEPLER ON)
+ENDIF()
 
- #Regardless of version, make sure we define the general architecture name
- IF (KOKKOS_ARCH_MAXWELL50 OR KOKKOS_ARCH_MAXWELL52 OR KOKKOS_ARCH_MAXWELL53)
-   SET(KOKKOS_ARCH_MAXWELL ON)
- ENDIF()
+#Regardless of version, make sure we define the general architecture name
+IF (KOKKOS_ARCH_MAXWELL50 OR KOKKOS_ARCH_MAXWELL52 OR KOKKOS_ARCH_MAXWELL53)
+  SET(KOKKOS_ARCH_MAXWELL ON)
+ENDIF()
 
- #Regardless of version, make sure we define the general architecture name
- IF (KOKKOS_ARCH_PASCAL60 OR KOKKOS_ARCH_PASCAL61)
-   SET(KOKKOS_ARCH_PASCAL ON)
- ENDIF()
+#Regardless of version, make sure we define the general architecture name
+IF (KOKKOS_ARCH_PASCAL60 OR KOKKOS_ARCH_PASCAL61)
+  SET(KOKKOS_ARCH_PASCAL ON)
+ENDIF()
 
-  #Regardless of version, make sure we define the general architecture name
-  IF (KOKKOS_ARCH_VOLTA70 OR KOKKOS_ARCH_VOLTA72)
-    SET(KOKKOS_ARCH_VOLTA ON)
-  ENDIF()
+#Regardless of version, make sure we define the general architecture name
+IF (KOKKOS_ARCH_VOLTA70 OR KOKKOS_ARCH_VOLTA72)
+  SET(KOKKOS_ARCH_VOLTA ON)
+ENDIF()
 
-  IF (KOKKOS_ARCH_AMPERE80 OR KOKKOS_ARCH_AMPERE86)
-    SET(KOKKOS_ARCH_AMPERE ON)
-  ENDIF()
+IF (KOKKOS_ARCH_AMPERE80 OR KOKKOS_ARCH_AMPERE86)
+  SET(KOKKOS_ARCH_AMPERE ON)
+ENDIF()
+
+#Regardless of version, make sure we define the general architecture name
+IF (KOKKOS_ARCH_VEGA900 OR KOKKOS_ARCH_VEGA906 OR KOKKOS_ARCH_VEGA908 OR KOKKOS_ARCH_VEGA90A)
+  SET(KOKKOS_ARCH_VEGA ON)
 ENDIF()
 
 #CMake verbose is kind of pointless
@@ -701,9 +714,6 @@ IF(NOT _DEVICE_PARALLEL)
   SET(_DEFAULT_DEVICE_MEMSPACE "NoTypeDefined")
 ENDIF()
 MESSAGE(STATUS "    Device Parallel: ${_DEVICE_PARALLEL}")
-IF(KOKKOS_ENABLE_PTHREAD)
-  SET(KOKKOS_ENABLE_THREADS ON)
-ENDIF()
 
 FOREACH (_BACKEND OpenMP Threads HPX)
   STRING(TOUPPER ${_BACKEND} UC_BACKEND)
diff --git a/packages/kokkos/cmake/kokkos_compiler_id.cmake b/packages/kokkos/cmake/kokkos_compiler_id.cmake
index 5afed4fb0e7ba0cd2bca8250b6f58e4434f483ec..f0c906e6566a0c947a9fc3c933fccafc5392e787 100644
--- a/packages/kokkos/cmake/kokkos_compiler_id.cmake
+++ b/packages/kokkos/cmake/kokkos_compiler_id.cmake
@@ -20,31 +20,36 @@ MACRO(kokkos_internal_have_compiler_nvcc)
 ENDMACRO()
 
 IF(Kokkos_ENABLE_CUDA)
-  # find kokkos_launch_compiler
-  FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER
-      NAMES           kokkos_launch_compiler
-      HINTS           ${PROJECT_SOURCE_DIR}
-      PATHS           ${PROJECT_SOURCE_DIR}
-      PATH_SUFFIXES   bin)
-
-  FIND_PROGRAM(Kokkos_NVCC_WRAPPER
-      NAMES           nvcc_wrapper
-      HINTS           ${PROJECT_SOURCE_DIR}
-      PATHS           ${PROJECT_SOURCE_DIR}
-      PATH_SUFFIXES   bin)
-
-  # check if compiler was set to nvcc_wrapper
-  kokkos_internal_have_compiler_nvcc(${CMAKE_CXX_COMPILER})
-  # if launcher was found and nvcc_wrapper was not specified as
-  # compiler, set to use launcher. Will ensure CMAKE_CXX_COMPILER
-  # is replaced by nvcc_wrapper
-  IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
-    # the first argument to launcher is always the C++ compiler defined by cmake
-    # if the second argument matches the C++ compiler, it forwards the rest of the
-    # args to nvcc_wrapper
-    kokkos_internal_have_compiler_nvcc(
-      ${Kokkos_COMPILE_LAUNCHER} ${Kokkos_NVCC_WRAPPER} ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER} -DKOKKOS_DEPENDENCE)
-    SET(INTERNAL_USE_COMPILER_LAUNCHER true)
+  # kokkos_enable_options is not yet called so use lower case here
+  IF(Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
+    kokkos_internal_have_compiler_nvcc(${CMAKE_CUDA_COMPILER})
+  ELSE()
+    # find kokkos_launch_compiler
+    FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER
+        NAMES           kokkos_launch_compiler
+        HINTS           ${PROJECT_SOURCE_DIR}
+        PATHS           ${PROJECT_SOURCE_DIR}
+        PATH_SUFFIXES   bin)
+
+    FIND_PROGRAM(Kokkos_NVCC_WRAPPER
+        NAMES           nvcc_wrapper
+        HINTS           ${PROJECT_SOURCE_DIR}
+        PATHS           ${PROJECT_SOURCE_DIR}
+        PATH_SUFFIXES   bin)
+
+    # check if compiler was set to nvcc_wrapper
+    kokkos_internal_have_compiler_nvcc(${CMAKE_CXX_COMPILER})
+    # if launcher was found and nvcc_wrapper was not specified as
+    # compiler, set to use launcher. Will ensure CMAKE_CXX_COMPILER
+    # is replaced by nvcc_wrapper
+    IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
+      # the first argument to launcher is always the C++ compiler defined by cmake
+      # if the second argument matches the C++ compiler, it forwards the rest of the
+      # args to nvcc_wrapper
+      kokkos_internal_have_compiler_nvcc(
+        ${Kokkos_COMPILE_LAUNCHER} ${Kokkos_NVCC_WRAPPER} ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER} -DKOKKOS_DEPENDENCE)
+      SET(INTERNAL_USE_COMPILER_LAUNCHER true)
+    ENDIF()
   ENDIF()
 ENDIF()
 
@@ -102,6 +107,11 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
   IF (INTERNAL_HAVE_INTEL_COMPILER) #not actually Clang
     SET(KOKKOS_CLANG_IS_INTEL TRUE)
     SET(KOKKOS_CXX_COMPILER_ID IntelLLVM CACHE STRING INTERNAL FORCE)
+    EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
+                  OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION
+                  OUTPUT_STRIP_TRAILING_WHITESPACE)
+    STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+"
+           KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
   ENDIF()
 ENDIF()
 
@@ -133,12 +143,14 @@ ENDIF()
 
 # Enforce the minimum compilers supported by Kokkos.
 SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos.  Required compiler versions:")
-SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    Clang      4.0.0 or higher")
-SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    GCC        5.3.0 or higher")
-SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    Intel     17.0.0 or higher")
-SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    NVCC      9.2.88 or higher")
-SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    HIPCC      4.2.0 or higher")
-SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    PGI         17.4 or higher\n")
+SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    Clang        4.0.0 or higher")
+SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    GCC          5.3.0 or higher")
+SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    Intel       17.0.0 or higher")
+SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    IntelLLVM 2022.0.0 or higher")
+SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    NVCC        9.2.88 or higher")
+SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    HIPCC        4.5.0 or higher")
+SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n    PGI           17.4 or higher")
+SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\nCompiler: ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION}\n")
 
 IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
   IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 4.0.0)
@@ -152,13 +164,18 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
   IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 17.0.0)
     MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
   ENDIF()
+ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND Kokkos_ENABLE_SYCL)
+  IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2022.0.0)
+    MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
+  ENDIF()
 ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
   IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 9.2.88)
     MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
   ENDIF()
   SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE)
 ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
-  IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 4.2.0)
+  # Note that ROCm 4.5 reports as version 4.4
+  IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 4.4.0)
     MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
   ENDIF()
 ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI)
diff --git a/packages/kokkos/cmake/kokkos_enable_devices.cmake b/packages/kokkos/cmake/kokkos_enable_devices.cmake
index 7fd0794036454da9d8a246fd4a3a19fe2e5cf0ef..dc3ee8c84255613c1ee6c5e6f847883bb7a15cf0 100644
--- a/packages/kokkos/cmake/kokkos_enable_devices.cmake
+++ b/packages/kokkos/cmake/kokkos_enable_devices.cmake
@@ -19,10 +19,13 @@ KOKKOS_CFG_DEPENDS(DEVICES NONE)
 KOKKOS_DEPRECATED_LIST(DEVICES ENABLE)
 
 
-KOKKOS_DEVICE_OPTION(PTHREAD       OFF HOST "Whether to build Pthread backend")
-IF (KOKKOS_ENABLE_PTHREAD)
-  #patch the naming here
+KOKKOS_DEVICE_OPTION(THREADS OFF HOST "Whether to build C++ threads backend")
+IF(Kokkos_ENABLE_PTHREAD)  # for backward compatibility
+  SET(Kokkos_ENABLE_THREADS ON CACHE BOOL "Whether to build C++ threads backend" FORCE)
   SET(KOKKOS_ENABLE_THREADS ON)
+  LIST(APPEND KOKKOS_ENABLED_DEVICES THREADS)
+  SET(KOKKOS_HAS_HOST ON)
+  MESSAGE(DEPRECATION "The Kokkos_ENABLE_PTHREAD option is deprecated. Use Kokkos_ENABLE_THREADS instead!")
 ENDIF()
 
 # detect clang++ / cl / clang-cl clashes
@@ -119,9 +122,6 @@ KOKKOS_DEVICE_OPTION(CUDA ${CUDA_DEFAULT} DEVICE "Whether to build CUDA backend"
 
 IF (KOKKOS_ENABLE_CUDA)
   GLOBAL_SET(KOKKOS_DONT_ALLOW_EXTENSIONS "CUDA enabled")
-  IF(WIN32 AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
-    GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS -x cu)
-  ENDIF()
 ## Cuda has extra setup requirements, turn on Kokkos_Setup_Cuda.hpp in macros
   LIST(APPEND DEVICE_SETUP_LIST Cuda)
 ENDIF()
diff --git a/packages/kokkos/cmake/kokkos_enable_options.cmake b/packages/kokkos/cmake/kokkos_enable_options.cmake
index 4cb8bd20f5ecb3e519ef64d9e1c31c0a5cb7e431..a581003b9e9e251ce04a5d541e3ca3382c0cf2e3 100644
--- a/packages/kokkos/cmake/kokkos_enable_options.cmake
+++ b/packages/kokkos/cmake/kokkos_enable_options.cmake
@@ -56,6 +56,8 @@ KOKKOS_ENABLE_OPTION(PROFILING_LOAD_PRINT OFF "Whether to print information abou
 KOKKOS_ENABLE_OPTION(TUNING               OFF "Whether to create bindings for tuning tools")
 KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops")
 KOKKOS_ENABLE_OPTION(LAUNCH_COMPILER      ON  "Whether to potentially use the launch compiler")
+KOKKOS_ENABLE_OPTION(COMPILE_AS_CMAKE_LANGUAGE OFF "Whether to use native cmake language support")
+KOKKOS_ENABLE_OPTION(HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF "Whether multiple kernels are instantiated at compile time - improve performance but increase compile time")
 
 # This option will go away eventually, but allows fallback to old implementation when needed.
 KOKKOS_ENABLE_OPTION(IMPL_DESUL_ATOMICS   ON  "Whether to use desul based atomics - option only during beta")
diff --git a/packages/kokkos/cmake/kokkos_tpls.cmake b/packages/kokkos/cmake/kokkos_tpls.cmake
index 51bad521c4878c00b6b8c7587d7233c26a1d4ba9..54c6b520b4a3b1ed5648df76465d465e0d1eeaab 100644
--- a/packages/kokkos/cmake/kokkos_tpls.cmake
+++ b/packages/kokkos/cmake/kokkos_tpls.cmake
@@ -60,12 +60,7 @@ SET(HPX_DEFAULT OFF)
 ENDIF()
 KOKKOS_TPL_OPTION(HPX ${HPX_DEFAULT})
 
-IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_PTHREAD)
-SET(PTHREAD_DEFAULT ON)
-ELSE()
-SET(PTHREAD_DEFAULT OFF)
-ENDIF()
-KOKKOS_TPL_OPTION(PTHREAD ${PTHREAD_DEFAULT} TRIBITS Pthread)
+KOKKOS_TPL_OPTION(THREADS ${Kokkos_ENABLE_THREADS} TRIBITS Pthread)
 
 IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_quadmath)
   SET(LIBQUADMATH_DEFAULT ON)
@@ -76,13 +71,17 @@ KOKKOS_TPL_OPTION(LIBQUADMATH ${LIBQUADMATH_DEFAULT} TRIBITS quadmath)
 
 #Make sure we use our local FindKokkosCuda.cmake
 KOKKOS_IMPORT_TPL(HPX INTERFACE)
-KOKKOS_IMPORT_TPL(CUDA INTERFACE)
+IF (NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
+  KOKKOS_IMPORT_TPL(CUDA INTERFACE)
+ENDIF()
 KOKKOS_IMPORT_TPL(HWLOC)
 KOKKOS_IMPORT_TPL(LIBNUMA)
 KOKKOS_IMPORT_TPL(LIBRT)
 KOKKOS_IMPORT_TPL(LIBDL)
 KOKKOS_IMPORT_TPL(MEMKIND)
-KOKKOS_IMPORT_TPL(PTHREAD INTERFACE)
+IF (NOT WIN32)
+  KOKKOS_IMPORT_TPL(THREADS INTERFACE)
+ENDIF()
 KOKKOS_IMPORT_TPL(ROCM INTERFACE)
 KOKKOS_IMPORT_TPL(LIBQUADMATH)
 
diff --git a/packages/kokkos/cmake/kokkos_tribits.cmake b/packages/kokkos/cmake/kokkos_tribits.cmake
index afa036066afeef954c5fed457782546565b7cfa5..1ec45d19bc73965cc0558976a9677d71bfd65534 100644
--- a/packages/kokkos/cmake/kokkos_tribits.cmake
+++ b/packages/kokkos/cmake/kokkos_tribits.cmake
@@ -6,6 +6,12 @@ INCLUDE(GNUInstallDirs)
 
 MESSAGE(STATUS "The project name is: ${PROJECT_NAME}")
 
+IF(GTest_FOUND)
+  SET(KOKKOS_GTEST_LIB GTest::gtest)
+ELSE()  # fallback to internal gtest
+  SET(KOKKOS_GTEST_LIB kokkos_gtest)
+ENDIF()
+
 FUNCTION(VERIFY_EMPTY CONTEXT)
   if(${ARGN})
     MESSAGE(FATAL_ERROR "Kokkos does not support all of Tribits. Unhandled arguments in ${CONTEXT}:\n${ARGN}")
@@ -128,6 +134,8 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE ROOT_NAME)
       "SOURCES;TESTONLYLIBS"
       ${ARGN})
 
+    SET_SOURCE_FILES_PROPERTIES(${PARSE_SOURCES} PROPERTIES LANGUAGE ${KOKKOS_COMPILE_LANGUAGE})
+
     SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME})
     ADD_EXECUTABLE(${EXE_NAME} ${PARSE_SOURCES})
     IF (PARSE_TESTONLYLIBS)
@@ -155,7 +163,7 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME)
         TRIBITS_ADD_EXECUTABLE_AND_TEST(
             ${ROOT_NAME}
             SOURCES ${PARSE_SOURCES}
-            TESTONLYLIBS kokkos_gtest
+            TESTONLYLIBS ${KOKKOS_GTEST_LIB}
             NUM_MPI_PROCS 1
             COMM serial mpi
             ARGS ${PARSE_ARGS}
@@ -247,7 +255,7 @@ MACRO(KOKKOS_ADD_TEST_EXECUTABLE ROOT_NAME)
   KOKKOS_ADD_EXECUTABLE(${ROOT_NAME}
     SOURCES ${PARSE_SOURCES}
     ${PARSE_UNPARSED_ARGUMENTS}
-    TESTONLYLIBS kokkos_gtest
+    TESTONLYLIBS ${KOKKOS_GTEST_LIB}
   )
   SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME})
 ENDMACRO()
@@ -353,7 +361,7 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
     ""
     ${ARGN})
 
-  IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18")
+  IF((NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18"))
     #I can use link options
     #check for CXX linkage using the simple 3.18 way
     TARGET_LINK_OPTIONS(
@@ -370,12 +378,12 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
 
   TARGET_COMPILE_OPTIONS(
     ${LIBRARY_NAME} PUBLIC
-    $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_COMPILE_OPTIONS}>
+    $<$<COMPILE_LANGUAGE:${KOKKOS_COMPILE_LANGUAGE}>:${KOKKOS_COMPILE_OPTIONS}>
   )
 
   TARGET_COMPILE_DEFINITIONS(
     ${LIBRARY_NAME} PUBLIC
-    $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_COMPILE_DEFINITIONS}>
+    $<$<COMPILE_LANGUAGE:${KOKKOS_COMPILE_LANGUAGE}>:${KOKKOS_COMPILE_DEFINITIONS}>
   )
 
   TARGET_LINK_LIBRARIES(
@@ -385,7 +393,7 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
   IF (KOKKOS_ENABLE_CUDA)
     TARGET_COMPILE_OPTIONS(
       ${LIBRARY_NAME}
-      PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_CUDA_OPTIONS}>
+      PUBLIC $<$<COMPILE_LANGUAGE:${KOKKOS_COMPILE_LANGUAGE}>:${KOKKOS_CUDA_OPTIONS}>
     )
     SET(NODEDUP_CUDAFE_OPTIONS)
     FOREACH(OPT ${KOKKOS_CUDAFE_OPTIONS})
@@ -393,14 +401,14 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
     ENDFOREACH()
     TARGET_COMPILE_OPTIONS(
       ${LIBRARY_NAME}
-      PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${NODEDUP_CUDAFE_OPTIONS}>
+      PUBLIC $<$<COMPILE_LANGUAGE:${KOKKOS_COMPILE_LANGUAGE}>:${NODEDUP_CUDAFE_OPTIONS}>
     )
   ENDIF()
 
   IF (KOKKOS_ENABLE_HIP)
     TARGET_COMPILE_OPTIONS(
       ${LIBRARY_NAME}
-      PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_AMDGPU_OPTIONS}>
+      PUBLIC $<$<COMPILE_LANGUAGE:${KOKKOS_COMPILE_LANGUAGE}>:${KOKKOS_AMDGPU_OPTIONS}>
     )
   ENDIF()
 
@@ -418,7 +426,7 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
     ENDFOREACH()
     TARGET_COMPILE_OPTIONS(
       ${LIBRARY_NAME}
-      PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${NODEDUP_XCOMPILER_OPTIONS}>
+      PUBLIC $<$<COMPILE_LANGUAGE:${KOKKOS_COMPILE_LANGUAGE}>:${NODEDUP_XCOMPILER_OPTIONS}>
     )
   ENDIF()
 
@@ -447,6 +455,9 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME)
   IF(PARSE_SOURCES)
     LIST(REMOVE_DUPLICATES PARSE_SOURCES)
   ENDIF()
+  FOREACH(source ${PARSE_SOURCES})
+    set_source_files_properties(${source} PROPERTIES LANGUAGE ${KOKKOS_COMPILE_LANGUAGE})
+  ENDFOREACH()
 
   IF(PARSE_STATIC)
     SET(LINK_TYPE STATIC)
diff --git a/packages/kokkos/containers/performance_tests/CMakeLists.txt b/packages/kokkos/containers/performance_tests/CMakeLists.txt
index 43c66c24fd8b83f579ed1961fc996c9b82e1d073..4f1eeacdad6183bc0f2b27614ddf79ff202426e3 100644
--- a/packages/kokkos/containers/performance_tests/CMakeLists.txt
+++ b/packages/kokkos/containers/performance_tests/CMakeLists.txt
@@ -4,16 +4,11 @@ KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_
 KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
 
 foreach(Tag Threads;OpenMP;Cuda;HPX;HIP)
-  # Because there is always an exception to the rule
-  if(Tag STREQUAL "Threads")
-    set(DEVICE "PTHREAD")
-  else()
-    string(TOUPPER ${Tag} DEVICE)
-  endif()
+  string(TOUPPER ${Tag} DEVICE)
   string(TOLOWER ${Tag} dir)
 
   if(Kokkos_ENABLE_${DEVICE})
-	    message(STATUS "Sources Test${Tag}.cpp")
+    message(STATUS "Sources Test${Tag}.cpp")
 
     set(SOURCES
         TestMain.cpp
diff --git a/packages/kokkos/containers/performance_tests/Makefile b/packages/kokkos/containers/performance_tests/Makefile
index cbb8490798fd69586355cd3a0f449a8585d55565..123c3153a09ad829a043ff99c7d3d7da0d1eb404 100644
--- a/packages/kokkos/containers/performance_tests/Makefile
+++ b/packages/kokkos/containers/performance_tests/Makefile
@@ -37,7 +37,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
 	TEST_TARGETS += test-hip
 endif
 
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
 	OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o
 	TARGETS += KokkosContainers_PerformanceTest_Threads
 	TEST_TARGETS += test-threads
diff --git a/packages/kokkos/containers/src/CMakeLists.txt b/packages/kokkos/containers/src/CMakeLists.txt
index 98655896d4f351418fc60e5330cd194fa2358d0e..cdbc6527fda54fc2a53dc2832afd67c3af347713 100644
--- a/packages/kokkos/containers/src/CMakeLists.txt
+++ b/packages/kokkos/containers/src/CMakeLists.txt
@@ -10,8 +10,8 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 SET(KOKKOS_CONTAINERS_SRCS)
 APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.cpp)
 SET(KOKKOS_CONTAINER_HEADERS)
-APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.hpp)
-APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
+APPEND_GLOB(KOKKOS_CONTAINERS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.hpp)
+APPEND_GLOB(KOKKOS_CONTAINERS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
 
 
 INSTALL (
@@ -23,7 +23,7 @@ INSTALL (
 KOKKOS_ADD_LIBRARY(
   kokkoscontainers
   SOURCES ${KOKKOS_CONTAINERS_SRCS}
-  HEADERS ${KOKKOS_CONTAINER_HEADERS}
+  HEADERS ${KOKKOS_CONTAINERS_HEADERS}
 )
 
 KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscontainers
diff --git a/packages/kokkos/containers/src/Kokkos_Bitset.hpp b/packages/kokkos/containers/src/Kokkos_Bitset.hpp
index c5b66f05a3ce0b7778fdcbc8e7a3e766301273d0..ea73c4f536916c7aa5928253a7fbd1bfdf04b458 100644
--- a/packages/kokkos/containers/src/Kokkos_Bitset.hpp
+++ b/packages/kokkos/containers/src/Kokkos_Bitset.hpp
@@ -73,7 +73,7 @@ void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src);
 template <typename Device>
 class Bitset {
  public:
-  using execution_space = Device;
+  using execution_space = typename Device::execution_space;
   using size_type       = unsigned int;
 
   static constexpr unsigned BIT_SCAN_REVERSE   = 1u;
@@ -142,11 +142,12 @@ class Bitset {
 
     if (m_last_block_mask) {
       // clear the unused bits in the last block
-      using raw_deep_copy =
-          Kokkos::Impl::DeepCopy<typename execution_space::memory_space,
-                                 Kokkos::HostSpace>;
-      raw_deep_copy(m_blocks.data() + (m_blocks.extent(0) - 1u),
-                    &m_last_block_mask, sizeof(unsigned));
+      Kokkos::Impl::DeepCopy<typename Device::memory_space, Kokkos::HostSpace>(
+          m_blocks.data() + (m_blocks.extent(0) - 1u), &m_last_block_mask,
+          sizeof(unsigned));
+      Kokkos::fence(
+          "Bitset::set: fence after clearing unused bits copying from "
+          "HostSpace");
     }
   }
 
@@ -189,8 +190,12 @@ class Bitset {
   KOKKOS_FORCEINLINE_FUNCTION
   bool test(unsigned i) const {
     if (i < m_size) {
+#ifdef KOKKOS_ENABLE_SYCL
+      const unsigned block = Kokkos::atomic_load(&m_blocks[i >> block_shift]);
+#else
       const unsigned block = volatile_load(&m_blocks[i >> block_shift]);
-      const unsigned mask  = 1u << static_cast<int>(i & block_mask);
+#endif
+      const unsigned mask = 1u << static_cast<int>(i & block_mask);
       return block & mask;
     }
     return false;
@@ -213,7 +218,11 @@ class Bitset {
     const unsigned block_idx =
         (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0;
     const unsigned offset = hint & block_mask;
-    unsigned block        = volatile_load(&m_blocks[block_idx]);
+#ifdef KOKKOS_ENABLE_SYCL
+    unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
+#else
+    unsigned block = volatile_load(&m_blocks[block_idx]);
+#endif
     block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1))
                 ? block
                 : block & m_last_block_mask;
@@ -231,7 +240,11 @@ class Bitset {
       unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const {
     const unsigned block_idx = hint >> block_shift;
     const unsigned offset    = hint & block_mask;
-    unsigned block           = volatile_load(&m_blocks[block_idx]);
+#ifdef KOKKOS_ENABLE_SYCL
+    unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
+#else
+    unsigned block = volatile_load(&m_blocks[block_idx]);
+#endif
     block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1))
                 ? ~block
                 : ~block & m_last_block_mask;
@@ -268,7 +281,7 @@ class Bitset {
     block = Impl::rotate_right(block, offset);
     return (((!(scan_direction & BIT_SCAN_REVERSE)
                   ? Impl::bit_scan_forward(block)
-                  : ::Kokkos::log2(block)) +
+                  : Impl::int_log2(block)) +
              offset) &
             block_mask) +
            block_start;
@@ -288,7 +301,7 @@ class Bitset {
  private:
   unsigned m_size;
   unsigned m_last_block_mask;
-  View<unsigned*, execution_space, MemoryTraits<RandomAccess> > m_blocks;
+  View<unsigned*, Device, MemoryTraits<RandomAccess> > m_blocks;
 
  private:
   template <typename DDevice>
@@ -313,7 +326,7 @@ class Bitset {
 template <typename Device>
 class ConstBitset {
  public:
-  using execution_space = Device;
+  using execution_space = typename Device::execution_space;
   using size_type       = unsigned int;
 
  private:
@@ -369,7 +382,7 @@ class ConstBitset {
 
  private:
   unsigned m_size;
-  View<const unsigned*, execution_space, MemoryTraits<RandomAccess> > m_blocks;
+  View<const unsigned*, Device, MemoryTraits<RandomAccess> > m_blocks;
 
  private:
   template <typename DDevice>
@@ -394,11 +407,12 @@ void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src) {
         "Error: Cannot deep_copy bitsets of different sizes!");
   }
 
-  using raw_deep_copy =
-      Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
-                             typename SrcDevice::memory_space>;
-  raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(),
-                sizeof(unsigned) * src.m_blocks.extent(0));
+  Kokkos::fence("Bitset::deep_copy: fence before copy operation");
+  Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
+                         typename SrcDevice::memory_space>(
+      dst.m_blocks.data(), src.m_blocks.data(),
+      sizeof(unsigned) * src.m_blocks.extent(0));
+  Kokkos::fence("Bitset::deep_copy: fence after copy operation");
 }
 
 template <typename DstDevice, typename SrcDevice>
@@ -408,11 +422,12 @@ void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
         "Error: Cannot deep_copy bitsets of different sizes!");
   }
 
-  using raw_deep_copy =
-      Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
-                             typename SrcDevice::memory_space>;
-  raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(),
-                sizeof(unsigned) * src.m_blocks.extent(0));
+  Kokkos::fence("Bitset::deep_copy: fence before copy operation");
+  Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
+                         typename SrcDevice::memory_space>(
+      dst.m_blocks.data(), src.m_blocks.data(),
+      sizeof(unsigned) * src.m_blocks.extent(0));
+  Kokkos::fence("Bitset::deep_copy: fence after copy operation");
 }
 
 template <typename DstDevice, typename SrcDevice>
@@ -422,11 +437,12 @@ void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
         "Error: Cannot deep_copy bitsets of different sizes!");
   }
 
-  using raw_deep_copy =
-      Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
-                             typename SrcDevice::memory_space>;
-  raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(),
-                sizeof(unsigned) * src.m_blocks.extent(0));
+  Kokkos::fence("Bitset::deep_copy: fence before copy operation");
+  Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
+                         typename SrcDevice::memory_space>(
+      dst.m_blocks.data(), src.m_blocks.data(),
+      sizeof(unsigned) * src.m_blocks.extent(0));
+  Kokkos::fence("Bitset::deep_copy: fence after copy operation");
 }
 
 }  // namespace Kokkos
diff --git a/packages/kokkos/containers/src/Kokkos_DualView.hpp b/packages/kokkos/containers/src/Kokkos_DualView.hpp
index f55d0f2b7f3f10b43ea4ee076dc4dea191010449..8c80ec55b5b3177b44c3e61befe1c7bb5f3b8a78 100644
--- a/packages/kokkos/containers/src/Kokkos_DualView.hpp
+++ b/packages/kokkos/containers/src/Kokkos_DualView.hpp
@@ -260,9 +260,13 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
            const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
            const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
       : modified_flags(t_modified_flags("DualView::modified_flags")),
-        d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7),
-        h_view(create_mirror_view(d_view))  // without UVM, host View mirrors
-  {}
+        d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7) {
+    // without UVM, host View mirrors
+    if (Kokkos::Impl::has_type<Impl::WithoutInitializing_t, P...>::value)
+      h_view = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, d_view);
+    else
+      h_view = Kokkos::create_mirror_view(d_view);
+  }
 
   //! Copy constructor (shallow copy)
   template <class SS, class LS, class DS, class MS>
@@ -895,23 +899,22 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
   /// This discards any existing contents of the objects, and resets
   /// their modified flags.  It does <i>not</i> copy the old contents
   /// of either View into the new View objects.
-  void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-               const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-               const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-               const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-               const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-               const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-               const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-               const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+  template <class... I>
+  void impl_realloc(const size_t n0, const size_t n1, const size_t n2,
+                    const size_t n3, const size_t n4, const size_t n5,
+                    const size_t n6, const size_t n7, const I&... arg_prop) {
     const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7};
     const bool sizeMismatch =
         Impl::size_mismatch(h_view, h_view.rank_dynamic, new_extents);
 
     if (sizeMismatch) {
-      ::Kokkos::realloc(d_view, n0, n1, n2, n3, n4, n5, n6, n7);
-      h_view = create_mirror_view(d_view);
-    } else
+      ::Kokkos::realloc(arg_prop..., d_view, n0, n1, n2, n3, n4, n5, n6, n7);
+      h_view = create_mirror_view(arg_prop..., typename t_host::memory_space(),
+                                  d_view);
+    } else if (!Kokkos::Impl::has_type<Kokkos::Impl::WithoutInitializing_t,
+                                       I...>::value) {
       ::Kokkos::deep_copy(d_view, typename t_dev::value_type{});
+    }
 
     /* Reset dirty flags */
     if (modified_flags.data() == nullptr) {
@@ -920,18 +923,38 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
       modified_flags(1) = modified_flags(0) = 0;
   }
 
+  void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+    impl_realloc(n0, n1, n2, n3, n4, n5, n6, n7);
+  }
+
+  template <typename I>
+  std::enable_if_t<Impl::is_view_ctor_property<I>::value> realloc(
+      const I& arg_prop, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+    impl_realloc(n0, n1, n2, n3, n4, n5, n6, n7, arg_prop);
+  }
+
   /// \brief Resize both views, copying old contents into new if necessary.
   ///
   /// This method only copies the old contents into the new View
   /// objects for the device which was last marked as modified.
-  void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-              const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-              const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-              const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-              const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-              const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-              const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-              const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+  template <class... I>
+  void impl_resize(const size_t n0, const size_t n1, const size_t n2,
+                   const size_t n3, const size_t n4, const size_t n5,
+                   const size_t n6, const size_t n7, const I&... arg_prop) {
     const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7};
     const bool sizeMismatch =
         Impl::size_mismatch(h_view, h_view.rank_dynamic, new_extents);
@@ -942,8 +965,9 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
     if (modified_flags(1) >= modified_flags(0)) {
       /* Resize on Device */
       if (sizeMismatch) {
-        ::Kokkos::resize(d_view, n0, n1, n2, n3, n4, n5, n6, n7);
-        h_view = create_mirror_view(d_view);
+        ::Kokkos::resize(arg_prop..., d_view, n0, n1, n2, n3, n4, n5, n6, n7);
+        h_view = create_mirror_view(arg_prop...,
+                                    typename t_host::memory_space(), d_view);
 
         /* Mark Device copy as modified */
         modified_flags(1) = modified_flags(1) + 1;
@@ -951,8 +975,9 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
     } else {
       /* Realloc on Device */
       if (sizeMismatch) {
-        ::Kokkos::resize(h_view, n0, n1, n2, n3, n4, n5, n6, n7);
-        d_view = create_mirror_view(typename t_dev::execution_space(), h_view);
+        ::Kokkos::resize(arg_prop..., h_view, n0, n1, n2, n3, n4, n5, n6, n7);
+        d_view = create_mirror_view(arg_prop..., typename t_dev::memory_space(),
+                                    h_view);
 
         /* Mark Host copy as modified */
         modified_flags(0) = modified_flags(0) + 1;
@@ -960,6 +985,30 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
     }
   }
 
+  void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+    impl_resize(n0, n1, n2, n3, n4, n5, n6, n7);
+  }
+
+  template <class I>
+  std::enable_if_t<Impl::is_view_ctor_property<I>::value> resize(
+      const I& arg_prop, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+    impl_resize(n0, n1, n2, n3, n4, n5, n6, n7, arg_prop);
+  }
+
   //@}
   //! \name Methods for getting capacity, stride, or dimension(s).
   //@{
@@ -1081,12 +1130,29 @@ void resize(DualView<Properties...>& dv, Args&&... args) noexcept(
   dv.resize(std::forward<Args>(args)...);
 }
 
+template <class I, class... Properties, class... Args>
+std::enable_if_t<Impl::is_view_ctor_property<I>::value> resize(
+    const I& arg_prop, DualView<Properties...>& dv,
+    Args&&... args) noexcept(noexcept(dv.resize(arg_prop,
+                                                std::forward<Args>(args)...))) {
+  dv.resize(arg_prop, std::forward<Args>(args)...);
+}
+
 template <class... Properties, class... Args>
 void realloc(DualView<Properties...>& dv, Args&&... args) noexcept(
     noexcept(dv.realloc(std::forward<Args>(args)...))) {
   dv.realloc(std::forward<Args>(args)...);
 }
 
+template <class I, class... Properties, class... Args>
+std::enable_if_t<Impl::is_view_ctor_property<I>::value> realloc(
+    const I& arg_prop, DualView<Properties...>& dv,
+    Args&&... args) noexcept(noexcept(dv.realloc(arg_prop,
+                                                 std::forward<Args>(
+                                                     args)...))) {
+  dv.realloc(arg_prop, std::forward<Args>(args)...);
+}
+
 }  // end namespace Kokkos
 
 #endif
diff --git a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp
index b673c53a4ef8e8a760c613332418ae5d600a6812..176129d25434c940721861bb558c003ed233beae 100644
--- a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp
+++ b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp
@@ -284,18 +284,17 @@ KOKKOS_INLINE_FUNCTION void dyn_rank_view_verify_operator_bounds(
   }
 
   if (!dyn_rank_view_verify_operator_bounds<0>(rank, map, args...)) {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    enum { LEN = 1024 };
-    char buffer[LEN];
-    const std::string label = tracker.template get_label<MemorySpace>();
-    int n = snprintf(buffer, LEN, "DynRankView bounds error of view %s (",
-                     label.c_str());
-    dyn_rank_view_error_operator_bounds<0>(buffer + n, LEN - n, map, args...);
-    Kokkos::Impl::throw_runtime_exception(std::string(buffer));
-#else
-    (void)tracker;
-    Kokkos::abort("DynRankView bounds error");
-#endif
+    KOKKOS_IF_ON_HOST(
+        (enum {LEN = 1024}; char buffer[LEN];
+         const std::string label = tracker.template get_label<MemorySpace>();
+         int n = snprintf(buffer, LEN, "DynRankView bounds error of view %s (",
+                          label.c_str());
+         dyn_rank_view_error_operator_bounds<0>(buffer + n, LEN - n, map,
+                                                args...);
+         Kokkos::Impl::throw_runtime_exception(std::string(buffer));))
+
+    KOKKOS_IF_ON_DEVICE(
+        ((void)tracker; Kokkos::abort("DynRankView bounds error");))
   }
 }
 
@@ -576,18 +575,22 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
 #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK)
 
 // rank of the calling operator - included as first argument in ARG
-#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG)                          \
-  Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \
-                             typename traits::memory_space>::check();  \
-  Kokkos::Impl::dyn_rank_view_verify_operator_bounds<                  \
-      typename traits::memory_space>                                   \
+#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG)                             \
+  Kokkos::Impl::runtime_check_memory_access_violation<                    \
+      typename traits::memory_space>(                                     \
+      "Kokkos::DynRankView ERROR: attempt to access inaccessible memory " \
+      "space");                                                           \
+  Kokkos::Impl::dyn_rank_view_verify_operator_bounds<                     \
+      typename traits::memory_space>                                      \
       ARG;
 
 #else
 
-#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG)                          \
-  Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \
-                             typename traits::memory_space>::check();
+#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG)                             \
+  Kokkos::Impl::runtime_check_memory_access_violation<                    \
+      typename traits::memory_space>(                                     \
+      "Kokkos::DynRankView ERROR: attempt to access inaccessible memory " \
+      "space");
 
 #endif
 
@@ -1682,10 +1685,46 @@ struct DynRankViewRemap {
 
 namespace Kokkos {
 
+namespace Impl {
+
+/* \brief Returns a View of the requested rank, aliasing the
+   underlying memory, to facilitate implementation of deep_copy() and
+   other routines that are defined on View */
+template <unsigned N, typename T, typename... Args>
+auto as_view_of_rank_n(DynRankView<T, Args...> v) {
+  if (v.rank() != N) {
+    Kokkos::Impl::throw_runtime_exception(
+        "Converting DynRankView of rank " + std::to_string(v.rank()) +
+        " to a View of mis-matched rank " + std::to_string(N));
+  }
+
+  return View<typename RankDataType<T, N>::type, Args...>(v.data(), v.layout());
+}
+
+template <typename Function, typename... Args>
+void apply_to_view_of_static_rank(Function&& f, DynRankView<Args...> a) {
+  switch (rank(a)) {
+    case 0: f(as_view_of_rank_n<0>(a)); break;
+    case 1: f(as_view_of_rank_n<1>(a)); break;
+    case 2: f(as_view_of_rank_n<2>(a)); break;
+    case 3: f(as_view_of_rank_n<3>(a)); break;
+    case 4: f(as_view_of_rank_n<4>(a)); break;
+    case 5: f(as_view_of_rank_n<5>(a)); break;
+    case 6: f(as_view_of_rank_n<6>(a)); break;
+    case 7: f(as_view_of_rank_n<7>(a)); break;
+    default:
+      Kokkos::Impl::throw_runtime_exception(
+          "Trying to apply a function to a view of unexpected rank " +
+          std::to_string(rank(a)));
+  }
+}
+
+}  // namespace Impl
+
 /** \brief  Deep copy a value from Host memory into a view.  */
-template <class DT, class... DP>
+template <class ExecSpace, class DT, class... DP>
 inline void deep_copy(
-    const DynRankView<DT, DP...>& dst,
+    const ExecSpace& e, const DynRankView<DT, DP...>& dst,
     typename ViewTraits<DT, DP...>::const_value_type& value,
     typename std::enable_if<std::is_same<
         typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
@@ -1695,42 +1734,50 @@ inline void deep_copy(
                    typename ViewTraits<DT, DP...>::value_type>::value,
       "deep_copy requires non-const type");
 
-  Kokkos::fence(
-      "Kokkos::deep_copy(DynRankView, value_type): fence before filling view");
-  Kokkos::Impl::DynRankViewFill<DynRankView<DT, DP...> >(dst, value);
-  Kokkos::fence(
-      "Kokkos::deep_copy(DynRankView, value_type): fence after filling view");
+  Impl::apply_to_view_of_static_rank(
+      [=](auto view) { deep_copy(e, view, value); }, dst);
+}
+
+template <class DT, class... DP>
+inline void deep_copy(
+    const DynRankView<DT, DP...>& dst,
+    typename ViewTraits<DT, DP...>::const_value_type& value,
+    typename std::enable_if<std::is_same<
+        typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
+        nullptr) {
+  Impl::apply_to_view_of_static_rank([=](auto view) { deep_copy(view, value); },
+                                     dst);
 }
 
 /** \brief  Deep copy into a value in Host memory from a view.  */
+template <class ExecSpace, class ST, class... SP>
+inline void deep_copy(
+    const ExecSpace& e,
+    typename ViewTraits<ST, SP...>::non_const_value_type& dst,
+    const DynRankView<ST, SP...>& src,
+    typename std::enable_if<std::is_same<
+        typename ViewTraits<ST, SP...>::specialize, void>::value>::type* = 0) {
+  deep_copy(e, dst, Impl::as_view_of_rank_n<0>(src));
+}
+
 template <class ST, class... SP>
 inline void deep_copy(
     typename ViewTraits<ST, SP...>::non_const_value_type& dst,
     const DynRankView<ST, SP...>& src,
     typename std::enable_if<std::is_same<
         typename ViewTraits<ST, SP...>::specialize, void>::value>::type* = 0) {
-  if (src.rank() != 0) {
-    Kokkos::abort("");
-  }
-
-  using src_traits       = ViewTraits<ST, SP...>;
-  using src_memory_space = typename src_traits::memory_space;
-  Kokkos::fence(
-      "Kokkos::deep_copy(value_type, DynRankView): fence before copying "
-      "value");
-  Kokkos::Impl::DeepCopy<HostSpace, src_memory_space>(&dst, src.data(),
-                                                      sizeof(ST));
-  Kokkos::fence(
-      "Kokkos::deep_copy(value_type, DynRankView): fence after copying value");
+  deep_copy(dst, Impl::as_view_of_rank_n<0>(src));
 }
 
 //----------------------------------------------------------------------------
 /** \brief  A deep copy between views of the default specialization, compatible
  * type, same rank, same contiguous layout.
+ *
+ * A rank mismatch will error out in the attempt to convert to a View
  */
-template <class DstType, class SrcType>
+template <class ExecSpace, class DstType, class SrcType>
 inline void deep_copy(
-    const DstType& dst, const SrcType& src,
+    const ExecSpace& exec_space, const DstType& dst, const SrcType& src,
     typename std::enable_if<
         (std::is_same<typename DstType::traits::specialize, void>::value &&
          std::is_same<typename SrcType::traits::specialize, void>::value &&
@@ -1741,134 +1788,96 @@ inline void deep_copy(
                    typename DstType::traits::non_const_value_type>::value,
       "deep_copy requires non-const destination type");
 
-  using dst_type = DstType;
-  using src_type = SrcType;
-
-  using dst_execution_space = typename dst_type::execution_space;
-  using src_execution_space = typename src_type::execution_space;
-  using dst_memory_space    = typename dst_type::memory_space;
-  using src_memory_space    = typename src_type::memory_space;
-
-  enum {
-    DstExecCanAccessSrc =
-        Kokkos::SpaceAccessibility<dst_execution_space,
-                                   src_memory_space>::accessible
-  };
+  switch (rank(dst)) {
+    case 0:
+      deep_copy(exec_space, Impl::as_view_of_rank_n<0>(dst),
+                Impl::as_view_of_rank_n<0>(src));
+      break;
+    case 1:
+      deep_copy(exec_space, Impl::as_view_of_rank_n<1>(dst),
+                Impl::as_view_of_rank_n<1>(src));
+      break;
+    case 2:
+      deep_copy(exec_space, Impl::as_view_of_rank_n<2>(dst),
+                Impl::as_view_of_rank_n<2>(src));
+      break;
+    case 3:
+      deep_copy(exec_space, Impl::as_view_of_rank_n<3>(dst),
+                Impl::as_view_of_rank_n<3>(src));
+      break;
+    case 4:
+      deep_copy(exec_space, Impl::as_view_of_rank_n<4>(dst),
+                Impl::as_view_of_rank_n<4>(src));
+      break;
+    case 5:
+      deep_copy(exec_space, Impl::as_view_of_rank_n<5>(dst),
+                Impl::as_view_of_rank_n<5>(src));
+      break;
+    case 6:
+      deep_copy(exec_space, Impl::as_view_of_rank_n<6>(dst),
+                Impl::as_view_of_rank_n<6>(src));
+      break;
+    case 7:
+      deep_copy(exec_space, Impl::as_view_of_rank_n<7>(dst),
+                Impl::as_view_of_rank_n<7>(src));
+      break;
+    default:
+      Kokkos::Impl::throw_runtime_exception(
+          "Calling DynRankView deep_copy with a view of unexpected rank " +
+          std::to_string(rank(dst)));
+  }
+}
 
-  enum {
-    SrcExecCanAccessDst =
-        Kokkos::SpaceAccessibility<src_execution_space,
-                                   dst_memory_space>::accessible
-  };
+template <class DstType, class SrcType>
+inline void deep_copy(
+    const DstType& dst, const SrcType& src,
+    typename std::enable_if<
+        (std::is_same<typename DstType::traits::specialize, void>::value &&
+         std::is_same<typename SrcType::traits::specialize, void>::value &&
+         (Kokkos::is_dyn_rank_view<DstType>::value ||
+          Kokkos::is_dyn_rank_view<SrcType>::value))>::type* = nullptr) {
+  static_assert(
+      std::is_same<typename DstType::traits::value_type,
+                   typename DstType::traits::non_const_value_type>::value,
+      "deep_copy requires non-const destination type");
 
-  if ((void*)dst.data() != (void*)src.data()) {
-    // Concern: If overlapping views then a parallel copy will be erroneous.
-    // ...
-
-    // If same type, equal layout, equal dimensions, equal span, and contiguous
-    // memory then can byte-wise copy
-    if (rank(src) == 0 && rank(dst) == 0) {
-      using value_type = typename dst_type::value_type;
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
-          "copying rank-0 views");
-      Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
-          dst.data(), src.data(), sizeof(value_type));
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
-          "copying rank-0 views");
-    } else if (std::is_same<
-                   typename DstType::traits::value_type,
-                   typename SrcType::traits::non_const_value_type>::value &&
-               ((std::is_same<typename DstType::traits::array_layout,
-                              typename SrcType::traits::array_layout>::value &&
-                 (std::is_same<typename DstType::traits::array_layout,
-                               typename Kokkos::LayoutLeft>::value ||
-                  std::is_same<typename DstType::traits::array_layout,
-                               typename Kokkos::LayoutRight>::value)) ||
-                (rank(dst) == 1 && rank(src) == 1)) &&
-               dst.span_is_contiguous() && src.span_is_contiguous() &&
-               dst.span() == src.span() && dst.extent(0) == src.extent(0) &&
-
-               dst.extent(1) == src.extent(1) &&
-               dst.extent(2) == src.extent(2) &&
-               dst.extent(3) == src.extent(3) &&
-               dst.extent(4) == src.extent(4) &&
-               dst.extent(5) == src.extent(5) &&
-               dst.extent(6) == src.extent(6) &&
-               dst.extent(7) == src.extent(7)) {
-      const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
-          "copying rank-1 views");
-      Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
-          dst.data(), src.data(), nbytes);
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
-          "copying rank-1 views");
-    } else if (std::is_same<
-                   typename DstType::traits::value_type,
-                   typename SrcType::traits::non_const_value_type>::value &&
-               ((std::is_same<typename DstType::traits::array_layout,
-                              typename SrcType::traits::array_layout>::value &&
-                 std::is_same<typename DstType::traits::array_layout,
-                              typename Kokkos::LayoutStride>::value) ||
-                (rank(dst) == 1 && rank(src) == 1)) &&
-               dst.span_is_contiguous() && src.span_is_contiguous() &&
-               dst.span() == src.span() && dst.extent(0) == src.extent(0) &&
-               dst.extent(1) == src.extent(1) &&
-               dst.extent(2) == src.extent(2) &&
-               dst.extent(3) == src.extent(3) &&
-               dst.extent(4) == src.extent(4) &&
-               dst.extent(5) == src.extent(5) &&
-               dst.extent(6) == src.extent(6) &&
-               dst.extent(7) == src.extent(7) &&
-               dst.stride_0() == src.stride_0() &&
-               dst.stride_1() == src.stride_1() &&
-               dst.stride_2() == src.stride_2() &&
-               dst.stride_3() == src.stride_3() &&
-               dst.stride_4() == src.stride_4() &&
-               dst.stride_5() == src.stride_5() &&
-               dst.stride_6() == src.stride_6() &&
-               dst.stride_7() == src.stride_7()) {
-      const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
-          "copying rank-1 views");
-      Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
-          dst.data(), src.data(), nbytes);
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
-          "copying rank-1 views");
-    } else if (DstExecCanAccessSrc) {
-      // Copying data between views in accessible memory spaces and either
-      // non-contiguous or incompatible shape.
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
-          "remapping views of incompatible shape");
-      Kokkos::Impl::DynRankViewRemap<dst_type, src_type>(dst, src);
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
-          "remapping views of incompatible shape");
-    } else if (SrcExecCanAccessDst) {
-      // Copying data between views in accessible memory spaces and either
-      // non-contiguous or incompatible shape.
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
-          "remapping views of incompatible shape");
-      Kokkos::Impl::DynRankViewRemap<dst_type, src_type, src_execution_space>(
-          dst, src);
-      Kokkos::fence(
-          "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
-          "remapping views of incompatible shape");
-    } else {
+  switch (rank(dst)) {
+    case 0:
+      deep_copy(Impl::as_view_of_rank_n<0>(dst),
+                Impl::as_view_of_rank_n<0>(src));
+      break;
+    case 1:
+      deep_copy(Impl::as_view_of_rank_n<1>(dst),
+                Impl::as_view_of_rank_n<1>(src));
+      break;
+    case 2:
+      deep_copy(Impl::as_view_of_rank_n<2>(dst),
+                Impl::as_view_of_rank_n<2>(src));
+      break;
+    case 3:
+      deep_copy(Impl::as_view_of_rank_n<3>(dst),
+                Impl::as_view_of_rank_n<3>(src));
+      break;
+    case 4:
+      deep_copy(Impl::as_view_of_rank_n<4>(dst),
+                Impl::as_view_of_rank_n<4>(src));
+      break;
+    case 5:
+      deep_copy(Impl::as_view_of_rank_n<5>(dst),
+                Impl::as_view_of_rank_n<5>(src));
+      break;
+    case 6:
+      deep_copy(Impl::as_view_of_rank_n<6>(dst),
+                Impl::as_view_of_rank_n<6>(src));
+      break;
+    case 7:
+      deep_copy(Impl::as_view_of_rank_n<7>(dst),
+                Impl::as_view_of_rank_n<7>(src));
+      break;
+    default:
       Kokkos::Impl::throw_runtime_exception(
-          "deep_copy given views that would require a temporary allocation");
-    }
-  } else {
-    Kokkos::fence(
-        "Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence due to same "
-        "src and dst");
+          "Calling DynRankView deep_copy with a view of unexpected rank " +
+          std::to_string(rank(dst)));
   }
 }
 
@@ -2053,6 +2062,24 @@ create_mirror_view_and_copy(
 namespace Kokkos {
 /** \brief  Resize a view with copying old data to new data at the corresponding
  * indices. */
+template <class... I, class T, class... P>
+inline void impl_resize(DynRankView<T, P...>& v, const size_t n0,
+                        const size_t n1, const size_t n2, const size_t n3,
+                        const size_t n4, const size_t n5, const size_t n6,
+                        const size_t n7, const I&... arg_prop) {
+  using drview_type = DynRankView<T, P...>;
+
+  static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
+                "Can only resize managed views");
+
+  drview_type v_resized(view_alloc(v.label(), arg_prop...), n0, n1, n2, n3, n4,
+                        n5, n6, n7);
+
+  Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v);
+
+  v = v_resized;
+}
+
 template <class T, class... P>
 inline void resize(DynRankView<T, P...>& v,
                    const size_t n0 = KOKKOS_INVALID_INDEX,
@@ -2063,20 +2090,42 @@ inline void resize(DynRankView<T, P...>& v,
                    const size_t n5 = KOKKOS_INVALID_INDEX,
                    const size_t n6 = KOKKOS_INVALID_INDEX,
                    const size_t n7 = KOKKOS_INVALID_INDEX) {
+  impl_resize(v, n0, n1, n2, n3, n4, n5, n6, n7);
+}
+
+template <class I, class T, class... P>
+inline std::enable_if_t<Impl::is_view_ctor_property<I>::value> resize(
+    const I& arg_prop, DynRankView<T, P...>& v,
+    const size_t n0 = KOKKOS_INVALID_INDEX,
+    const size_t n1 = KOKKOS_INVALID_INDEX,
+    const size_t n2 = KOKKOS_INVALID_INDEX,
+    const size_t n3 = KOKKOS_INVALID_INDEX,
+    const size_t n4 = KOKKOS_INVALID_INDEX,
+    const size_t n5 = KOKKOS_INVALID_INDEX,
+    const size_t n6 = KOKKOS_INVALID_INDEX,
+    const size_t n7 = KOKKOS_INVALID_INDEX) {
+  impl_resize(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop);
+}
+
+/** \brief  Resize a view with copying old data to new data at the corresponding
+ * indices. */
+template <class... I, class T, class... P>
+inline void impl_realloc(DynRankView<T, P...>& v, const size_t n0,
+                         const size_t n1, const size_t n2, const size_t n3,
+                         const size_t n4, const size_t n5, const size_t n6,
+                         const size_t n7, const I&... arg_prop) {
   using drview_type = DynRankView<T, P...>;
 
   static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
-                "Can only resize managed views");
-
-  drview_type v_resized(v.label(), n0, n1, n2, n3, n4, n5, n6, n7);
+                "Can only realloc managed views");
 
-  Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v);
+  const std::string label = v.label();
 
-  v = v_resized;
+  v = drview_type();  // Deallocate first, if the only view to allocation
+  v = drview_type(view_alloc(label, arg_prop...), n0, n1, n2, n3, n4, n5, n6,
+                  n7);
 }
 
-/** \brief  Resize a view with copying old data to new data at the corresponding
- * indices. */
 template <class T, class... P>
 inline void realloc(DynRankView<T, P...>& v,
                     const size_t n0 = KOKKOS_INVALID_INDEX,
@@ -2087,15 +2136,21 @@ inline void realloc(DynRankView<T, P...>& v,
                     const size_t n5 = KOKKOS_INVALID_INDEX,
                     const size_t n6 = KOKKOS_INVALID_INDEX,
                     const size_t n7 = KOKKOS_INVALID_INDEX) {
-  using drview_type = DynRankView<T, P...>;
-
-  static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
-                "Can only realloc managed views");
-
-  const std::string label = v.label();
+  impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7);
+}
 
-  v = drview_type();  // Deallocate first, if the only view to allocation
-  v = drview_type(label, n0, n1, n2, n3, n4, n5, n6, n7);
+template <class I, class T, class... P>
+inline std::enable_if_t<Impl::is_view_ctor_property<I>::value> realloc(
+    const I& arg_prop, DynRankView<T, P...>& v,
+    const size_t n0 = KOKKOS_INVALID_INDEX,
+    const size_t n1 = KOKKOS_INVALID_INDEX,
+    const size_t n2 = KOKKOS_INVALID_INDEX,
+    const size_t n3 = KOKKOS_INVALID_INDEX,
+    const size_t n4 = KOKKOS_INVALID_INDEX,
+    const size_t n5 = KOKKOS_INVALID_INDEX,
+    const size_t n6 = KOKKOS_INVALID_INDEX,
+    const size_t n7 = KOKKOS_INVALID_INDEX) {
+  impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop);
 }
 
 }  // namespace Kokkos
diff --git a/packages/kokkos/containers/src/Kokkos_DynamicView.hpp b/packages/kokkos/containers/src/Kokkos_DynamicView.hpp
index 2c764f535c585a4f545300d619b83917f327f414..91904d7cc986589bcea7ecf8680fe9eca0be896c 100644
--- a/packages/kokkos/containers/src/Kokkos_DynamicView.hpp
+++ b/packages/kokkos/containers/src/Kokkos_DynamicView.hpp
@@ -173,7 +173,9 @@ struct ChunkedArrayManager {
     void execute() {
       // Destroy the array of chunk pointers.
       // Two entries beyond the max chunks are allocation counters.
-      for (unsigned i = 0; i < m_chunk_max; i++) {
+      uintptr_t const len =
+          *reinterpret_cast<uintptr_t*>(m_chunks + m_chunk_max);
+      for (unsigned i = 0; i < len; i++) {
         Space().deallocate(m_label.c_str(), m_chunks[i],
                            sizeof(value_type) * m_chunk_size);
       }
@@ -279,21 +281,6 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
   static_assert(std::is_same<typename traits::specialize, void>::value,
                 "DynamicView only implemented for non-specialized View type");
 
-  template <class Space, bool = Kokkos::Impl::MemorySpaceAccess<
-                             Space, device_space>::accessible>
-  struct verify_space {
-    KOKKOS_FORCEINLINE_FUNCTION static void check() {}
-  };
-
-  template <class Space>
-  struct verify_space<Space, false> {
-    KOKKOS_FORCEINLINE_FUNCTION static void check() {
-      Kokkos::abort(
-          "Kokkos::DynamicView ERROR: attempt to access inaccessible memory "
-          "space");
-    };
-  };
-
  private:
   device_accessor m_chunks;
   host_accessor m_chunks_host;
@@ -418,8 +405,10 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
     static_assert(Kokkos::Impl::are_integral<I0, Args...>::value,
                   "Indices must be integral type");
 
-    DynamicView::template verify_space<
-        Kokkos::Impl::ActiveExecutionMemorySpace>::check();
+    Kokkos::Impl::runtime_check_memory_access_violation<
+        typename traits::memory_space>(
+        "Kokkos::DynamicView ERROR: attempt to access inaccessible memory "
+        "space");
 
     // Which chunk is being indexed.
     const uintptr_t ic = uintptr_t(i0 >> m_chunk_shift);
diff --git a/packages/kokkos/containers/src/Kokkos_ErrorReporter.hpp b/packages/kokkos/containers/src/Kokkos_ErrorReporter.hpp
index 18f026dc6ffcffc6c0b1884358ebf5a85012d40e..629b437c22d4e2c5517a07ee42042e383b570833 100644
--- a/packages/kokkos/containers/src/Kokkos_ErrorReporter.hpp
+++ b/packages/kokkos/containers/src/Kokkos_ErrorReporter.hpp
@@ -103,13 +103,13 @@ class ErrorReporter {
   }
 
  private:
-  using reports_view_t     = Kokkos::View<report_type *, execution_space>;
-  using reports_dualview_t = Kokkos::DualView<report_type *, execution_space>;
+  using reports_view_t     = Kokkos::View<report_type *, device_type>;
+  using reports_dualview_t = Kokkos::DualView<report_type *, device_type>;
 
   using host_mirror_space = typename reports_dualview_t::host_mirror_space;
-  Kokkos::View<int, execution_space> m_numReportsAttempted;
+  Kokkos::View<int, device_type> m_numReportsAttempted;
   reports_dualview_t m_reports;
-  Kokkos::DualView<int *, execution_space> m_reporters;
+  Kokkos::DualView<int *, device_type> m_reporters;
 };
 
 template <typename ReportType, typename DeviceType>
@@ -157,12 +157,10 @@ void ErrorReporter<ReportType, DeviceType>::getReports(
                           typename DeviceType::execution_space>::HostMirror
         &reports_out) {
   int num_reports = getNumReports();
-  reporters_out =
-      typename Kokkos::View<int *, typename DeviceType::execution_space>::
-          HostMirror("ErrorReport::reporters_out", num_reports);
-  reports_out = typename Kokkos::
-      View<report_type *, typename DeviceType::execution_space>::HostMirror(
-          "ErrorReport::reports_out", num_reports);
+  reporters_out   = typename Kokkos::View<int *, DeviceType>::HostMirror(
+      "ErrorReport::reporters_out", num_reports);
+  reports_out = typename Kokkos::View<report_type *, DeviceType>::HostMirror(
+      "ErrorReport::reports_out", num_reports);
 
   if (num_reports > 0) {
     m_reports.template sync<host_mirror_space>();
diff --git a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp
index 57bf745d4038de73b71654e518aa855e0faa1698..9d97dc08f874b775d4c55b627bd4a7acbbade824 100644
--- a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp
+++ b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp
@@ -99,36 +99,32 @@ KOKKOS_INLINE_FUNCTION void offsetview_verify_operator_bounds(
     Kokkos::Impl::SharedAllocationTracker const& tracker, const MapType& map,
     const BeginsType& begins, Args... args) {
   if (!offsetview_verify_operator_bounds<0>(map, begins, args...)) {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    enum { LEN = 1024 };
-    char buffer[LEN];
-    const std::string label = tracker.template get_label<MemorySpace>();
-    int n =
-        snprintf(buffer, LEN, "OffsetView bounds error of view labeled %s (",
-                 label.c_str());
-    offsetview_error_operator_bounds<0>(buffer + n, LEN - n, map, begins,
-                                        args...);
-    Kokkos::Impl::throw_runtime_exception(std::string(buffer));
-#else
-    /* Check #1: is there a SharedAllocationRecord?
-      (we won't use it, but if its not there then there isn't
-       a corresponding SharedAllocationHeader containing a label).
-      This check should cover the case of Views that don't
-      have the Unmanaged trait but were initialized by pointer. */
-    if (tracker.has_record()) {
-      Kokkos::Impl::operator_bounds_error_on_device(map);
-    } else {
-      Kokkos::abort("OffsetView bounds error");
-    }
-#endif
+    KOKKOS_IF_ON_HOST(
+        (enum {LEN = 1024}; char buffer[LEN];
+         const std::string label = tracker.template get_label<MemorySpace>();
+         int n                   = snprintf(buffer, LEN,
+                          "OffsetView bounds error of view labeled %s (",
+                          label.c_str());
+         offsetview_error_operator_bounds<0>(buffer + n, LEN - n, map, begins,
+                                             args...);
+         Kokkos::Impl::throw_runtime_exception(std::string(buffer));))
+
+    KOKKOS_IF_ON_DEVICE((
+        /* Check #1: is there a SharedAllocationRecord?
+          (we won't use it, but if it is not there then there isn't
+           a corresponding SharedAllocationHeader containing a label).
+          This check should cover the case of Views that don't
+          have the Unmanaged trait but were initialized by pointer. */
+        if (tracker.has_record()) {
+          Kokkos::Impl::operator_bounds_error_on_device(map);
+        } else { Kokkos::abort("OffsetView bounds error"); }))
   }
 }
 
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-KOKKOS_INLINE_FUNCTION
-void runtime_check_rank_host(const size_t rank_dynamic, const size_t rank,
-                             const index_list_type minIndices,
-                             const std::string& label) {
+inline void runtime_check_rank_host(const size_t rank_dynamic,
+                                    const size_t rank,
+                                    const index_list_type minIndices,
+                                    const std::string& label) {
   bool isBad = false;
   std::string message =
       "Kokkos::Experimental::OffsetView ERROR: for OffsetView labeled '" +
@@ -155,7 +151,6 @@ void runtime_check_rank_host(const size_t rank_dynamic, const size_t rank,
 
   if (isBad) Kokkos::abort(message.c_str());
 }
-#endif
 
 KOKKOS_INLINE_FUNCTION
 void runtime_check_rank_device(const size_t rank_dynamic, const size_t rank,
@@ -378,18 +373,22 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
 
 #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK)
 
-#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY(ARG)                    \
-  Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \
-                             typename traits::memory_space>::check();  \
-  Kokkos::Experimental::Impl::offsetview_verify_operator_bounds<       \
-      typename traits::memory_space>                                   \
+#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY(ARG)                      \
+  Kokkos::Impl::runtime_check_memory_access_violation<                   \
+      typename traits::memory_space>(                                    \
+      "Kokkos::OffsetView ERROR: attempt to access inaccessible memory " \
+      "space");                                                          \
+  Kokkos::Experimental::Impl::offsetview_verify_operator_bounds<         \
+      typename traits::memory_space>                                     \
       ARG;
 
 #else
 
-#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY(ARG)                    \
-  Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \
-                             typename traits::memory_space>::check();
+#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY(ARG)                      \
+  Kokkos::Impl::runtime_check_memory_access_violation<                   \
+      typename traits::memory_space>(                                    \
+      "Kokkos::OffsetView ERROR: attempt to access inaccessible memory " \
+      "space");
 
 #endif
  public:
@@ -863,14 +862,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
                   "Incompatible OffsetView copy construction");
     Mapping::assign(m_map, aview.impl_map(), m_track);
 
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-    Kokkos::Experimental::Impl::runtime_check_rank_host(
-        traits::rank_dynamic, Rank, minIndices, label());
-#else
-    Kokkos::Experimental::Impl::runtime_check_rank_device(traits::rank_dynamic,
-                                                          Rank, minIndices);
+    KOKKOS_IF_ON_HOST((Kokkos::Experimental::Impl::runtime_check_rank_host(
+                           traits::rank_dynamic, Rank, minIndices, label());))
 
-#endif
+    KOKKOS_IF_ON_DEVICE((Kokkos::Experimental::Impl::runtime_check_rank_device(
+                             traits::rank_dynamic, Rank, minIndices);))
 
     for (size_t i = 0; i < minIndices.size(); ++i) {
       m_begins[i] = minIndices.begin()[i];
@@ -885,15 +881,6 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
     static_assert(Mapping::is_assignable,
                   "Incompatible OffsetView copy construction");
     Mapping::assign(m_map, aview.impl_map(), m_track);
-
-    //#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-    //        Kokkos::Experimental::Impl::runtime_check_rank_host(traits::rank_dynamic,
-    //        Rank, minIndices, label());
-    //#else
-    //        Kokkos::Experimental::Impl::runtime_check_rank_device(traits::rank_dynamic,
-    //        Rank, minIndices);
-    //
-    //#endif
   }
 
   // may assign unmanaged from managed.
@@ -941,12 +928,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
     return *(a.begin() + pos);
   }
 
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
   // Check that begins < ends for all elements
   // B, E can be begins_type and/or index_list_type
   template <typename B, typename E>
-  KOKKOS_INLINE_FUNCTION static subtraction_failure
-  runtime_check_begins_ends_host(const B& begins, const E& ends) {
+  static subtraction_failure runtime_check_begins_ends_host(const B& begins,
+                                                            const E& ends) {
     std::string message;
     if (begins.size() != Rank)
       message +=
@@ -1015,7 +1001,6 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
 
     return subtraction_failure::none;
   }
-#endif  // KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
 
   // Check the begins < ends for all elements
   template <typename B, typename E>
@@ -1049,6 +1034,14 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
     return subtraction_failure::none;
   }
 
+  template <typename B, typename E>
+  KOKKOS_INLINE_FUNCTION static subtraction_failure runtime_check_begins_ends(
+      const B& begins, const E& ends) {
+    KOKKOS_IF_ON_HOST((return runtime_check_begins_ends_host(begins, ends);))
+    KOKKOS_IF_ON_DEVICE(
+        (return runtime_check_begins_ends_device(begins, ends);))
+  }
+
   // Constructor around unmanaged data after checking begins < ends for all
   // elements
   // Each of B, E can be begins_type and/or index_list_type
@@ -1081,54 +1074,26 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
   KOKKOS_INLINE_FUNCTION
   OffsetView(const pointer_type& p, const begins_type& begins_,
              const begins_type& ends_)
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-      : OffsetView(p, begins_, ends_,
-                   runtime_check_begins_ends_host(begins_, ends_))
-#else
       : OffsetView(p, begins_, ends_,
-                   runtime_check_begins_ends_device(begins_, ends_))
-#endif
-  {
-  }
+                   runtime_check_begins_ends(begins_, ends_)) {}
 
   KOKKOS_INLINE_FUNCTION
   OffsetView(const pointer_type& p, const begins_type& begins_,
              index_list_type ends_)
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
       : OffsetView(p, begins_, ends_,
-                   runtime_check_begins_ends_host(begins_, ends_))
-#else
-      : OffsetView(p, begins_, ends_,
-                   runtime_check_begins_ends_device(begins_, ends_))
-#endif
-  {
-  }
+                   runtime_check_begins_ends(begins_, ends_)) {}
 
   KOKKOS_INLINE_FUNCTION
   OffsetView(const pointer_type& p, index_list_type begins_,
              const begins_type& ends_)
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
       : OffsetView(p, begins_, ends_,
-                   runtime_check_begins_ends_host(begins_, ends_))
-#else
-      : OffsetView(p, begins_, ends_,
-                   runtime_check_begins_ends_device(begins_, ends_))
-#endif
-  {
-  }
+                   runtime_check_begins_ends(begins_, ends_)) {}
 
   KOKKOS_INLINE_FUNCTION
   OffsetView(const pointer_type& p, index_list_type begins_,
              index_list_type ends_)
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-      : OffsetView(p, begins_, ends_,
-                   runtime_check_begins_ends_host(begins_, ends_))
-#else
       : OffsetView(p, begins_, ends_,
-                   runtime_check_begins_ends_device(begins_, ends_))
-#endif
-  {
-  }
+                   runtime_check_begins_ends(begins_, ends_)) {}
 
   //----------------------------------------
   // Allocation tracking properties
@@ -1265,14 +1230,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
     // Setup and initialization complete, start tracking
     m_track.assign_allocated_record_to_uninitialized(record);
 
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-    Kokkos::Experimental::Impl::runtime_check_rank_host(
-        traits::rank_dynamic, Rank, minIndices, label());
-#else
-    Kokkos::Experimental::Impl::runtime_check_rank_device(traits::rank_dynamic,
-                                                          Rank, minIndices);
+    KOKKOS_IF_ON_HOST((Kokkos::Experimental::Impl::runtime_check_rank_host(
+                           traits::rank_dynamic, Rank, minIndices, label());))
 
-#endif
+    KOKKOS_IF_ON_DEVICE((Kokkos::Experimental::Impl::runtime_check_rank_device(
+                             traits::rank_dynamic, Rank, minIndices);))
   }
 };
 
@@ -1887,12 +1849,12 @@ struct MirrorOffsetViewType {
   // The array_layout
   using array_layout = typename src_view_type::array_layout;
   // The data type (we probably want it non-const since otherwise we can't even
-  // deep_copy to it.
+  // deep_copy to it.)
   using data_type = typename src_view_type::non_const_data_type;
   // The destination view type if it is not the same memory space
   using dest_view_type =
       Kokkos::Experimental::OffsetView<data_type, array_layout, Space>;
-  // If it is the same memory_space return the existsing view_type
+  // If it is the same memory_space return the existing view_type
   // This will also keep the unmanaged trait if necessary
   using view_type = typename std::conditional<is_same_memspace, src_view_type,
                                               dest_view_type>::type;
@@ -1912,7 +1874,7 @@ struct MirrorOffsetType {
   // The array_layout
   using array_layout = typename src_view_type::array_layout;
   // The data type (we probably want it non-const since otherwise we can't even
-  // deep_copy to it.
+  // deep_copy to it.)
   using data_type = typename src_view_type::non_const_data_type;
   // The destination view type if it is not the same memory space
   using view_type =
diff --git a/packages/kokkos/containers/src/Kokkos_ScatterView.hpp b/packages/kokkos/containers/src/Kokkos_ScatterView.hpp
index 79bc43b7393d85a1214e0ca3a8dc15861281e44e..024b4618a46c23b2aee0ff18e0f5d34b24cf8267 100644
--- a/packages/kokkos/containers/src/Kokkos_ScatterView.hpp
+++ b/packages/kokkos/containers/src/Kokkos_ScatterView.hpp
@@ -861,18 +861,54 @@ class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated,
     if (view.data() != internal_view.data()) reset(exec_space);
   }
 
-  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
-              const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
-              const size_t n6 = 0, const size_t n7 = 0) {
+  void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
     ::Kokkos::resize(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
   }
 
-  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
-               const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
-               const size_t n6 = 0, const size_t n7 = 0) {
+  template <class I>
+  std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> resize(
+      const I& arg_prop, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+    ::Kokkos::resize(arg_prop, internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
+  }
+
+  void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
     ::Kokkos::realloc(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
   }
 
+  template <class I>
+  std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> realloc(
+      const I& arg_prop, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+    ::Kokkos::realloc(arg_prop, internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
+  }
+
  protected:
   template <typename... Args>
   KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(Args... args) const {
@@ -1097,20 +1133,54 @@ class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op,
         internal_view.size() - view.size(), internal_view.label());
   }
 
-  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
-              const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
-              const size_t n6 = 0) {
+  void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
     ::Kokkos::resize(internal_view, unique_token.size(), n0, n1, n2, n3, n4, n5,
                      n6);
   }
 
-  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
-               const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
-               const size_t n6 = 0) {
+  template <class I>
+  std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> resize(
+      const I& arg_prop, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+    ::Kokkos::resize(arg_prop, internal_view, unique_token.size(), n0, n1, n2,
+                     n3, n4, n5, n6);
+  }
+
+  void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
     ::Kokkos::realloc(internal_view, unique_token.size(), n0, n1, n2, n3, n4,
                       n5, n6);
   }
 
+  template <class I>
+  std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> realloc(
+      const I& arg_prop, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+      const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+    ::Kokkos::realloc(arg_prop, internal_view, unique_token.size(), n0, n1, n2,
+                      n3, n4, n5, n6);
+  }
+
  protected:
   template <typename... Args>
   KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int rank,
@@ -1328,9 +1398,13 @@ class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op,
         internal_view.size() - view.size(), internal_view.label());
   }
 
-  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
-              const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
-              const size_t n6 = 0) {
+  void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+              const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
     size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
     const int i     = internal_view.rank - 1;
     arg_N[i]        = unique_token.size();
@@ -1339,9 +1413,13 @@ class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op,
                      arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
   }
 
-  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
-               const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
-               const size_t n6 = 0) {
+  void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+               const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
     size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
     const int i     = internal_view.rank - 1;
     arg_N[i]        = unique_token.size();
@@ -1518,6 +1596,15 @@ void realloc(
   scatter_view.realloc(is...);
 }
 
+template <typename I, typename DT, typename LY, typename ES, typename OP,
+          typename CT, typename DP, typename... IS>
+std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> realloc(
+    const I& arg_prop,
+    Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
+    IS... is) {
+  scatter_view.realloc(arg_prop, is...);
+}
+
 template <typename DT, typename LY, typename ES, typename OP, typename CT,
           typename DP, typename... IS>
 void resize(
@@ -1526,6 +1613,15 @@ void resize(
   scatter_view.resize(is...);
 }
 
+template <typename I, typename DT, typename LY, typename ES, typename OP,
+          typename CT, typename DP, typename... IS>
+std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> resize(
+    const I& arg_prop,
+    Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
+    IS... is) {
+  scatter_view.resize(arg_prop, is...);
+}
+
 }  // namespace Kokkos
 
 #endif
diff --git a/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp
index a1601eee35869f5c26249dbf2ed325c4e84d5420..fbef0a0131faa4af8824ce806d901e426132890c 100644
--- a/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp
+++ b/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp
@@ -203,7 +203,7 @@ template <typename Key, typename Value,
           typename Device = Kokkos::DefaultExecutionSpace,
           typename Hasher = pod_hash<typename std::remove_const<Key>::type>,
           typename EqualTo =
-              pod_equal_to<typename std::remove_const<Key>::type> >
+              pod_equal_to<typename std::remove_const<Key>::type>>
 class UnorderedMap {
  private:
   using host_mirror_space =
@@ -268,20 +268,19 @@ class UnorderedMap {
 
   using key_type_view = std::conditional_t<
       is_insertable_map, View<key_type *, device_type>,
-      View<const key_type *, device_type, MemoryTraits<RandomAccess> > >;
+      View<const key_type *, device_type, MemoryTraits<RandomAccess>>>;
 
   using value_type_view = std::conditional_t<
       is_insertable_map || is_modifiable_map,
       View<impl_value_type *, device_type>,
-      View<const impl_value_type *, device_type, MemoryTraits<RandomAccess> > >;
+      View<const impl_value_type *, device_type, MemoryTraits<RandomAccess>>>;
 
   using size_type_view = std::conditional_t<
       is_insertable_map, View<size_type *, device_type>,
-      View<const size_type *, device_type, MemoryTraits<RandomAccess> > >;
+      View<const size_type *, device_type, MemoryTraits<RandomAccess>>>;
 
-  using bitset_type =
-      std::conditional_t<is_insertable_map, Bitset<execution_space>,
-                         ConstBitset<execution_space> >;
+  using bitset_type = std::conditional_t<is_insertable_map, Bitset<Device>,
+                                         ConstBitset<Device>>;
 
   enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
   enum { num_scalars = 3 };
@@ -310,8 +309,13 @@ class UnorderedMap {
                      capacity() + 1)  // +1 so that the *_at functions can
                                       // always return a valid reference
         ,
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
         m_keys("UnorderedMap keys", capacity() + 1),
         m_values("UnorderedMap values", (is_set ? 1 : capacity() + 1)),
+#else
+        m_keys("UnorderedMap keys", capacity()),
+        m_values("UnorderedMap values", (is_set ? 0 : capacity())),
+#endif
         m_scalars("UnorderedMap scalars") {
     if (!is_insertable_map) {
       throw std::runtime_error(
@@ -341,17 +345,24 @@ class UnorderedMap {
       const key_type tmp = key_type();
       Kokkos::deep_copy(m_keys, tmp);
     }
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
     if (is_set) {
       const impl_value_type tmp = impl_value_type();
       Kokkos::deep_copy(m_values, tmp);
     }
+#endif
     Kokkos::deep_copy(m_scalars, 0);
     m_size = 0;
   }
 
   KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
     return (m_keys.is_allocated() && m_values.is_allocated() &&
             m_scalars.is_allocated());
+#else
+    return (m_keys.is_allocated() && (is_set || m_values.is_allocated()) &&
+            m_scalars.is_allocated());
+#endif
   }
 
   /// \brief Change the capacity of the the map
@@ -424,9 +435,6 @@ class UnorderedMap {
           "Kokkos::UnorderedMap::begin_erase: fence before setting erasable "
           "flag");
       set_flag(erasable_idx);
-      execution_space().fence(
-          "Kokkos::UnorderedMap::begin_erase: fence after setting erasable "
-          "flag");
     }
     return result;
   }
@@ -520,19 +528,35 @@ class UnorderedMap {
       // Continue searching the unordered list for this key,
       // list will only be appended during insert phase.
       // Need volatile_load as other threads may be appending.
+
+      // FIXME_SYCL replacement for memory_fence
+#ifdef KOKKOS_ENABLE_SYCL
+      size_type curr = Kokkos::atomic_load(curr_ptr);
+#else
       size_type curr = volatile_load(curr_ptr);
+#endif
 
       KOKKOS_NONTEMPORAL_PREFETCH_LOAD(
           &m_keys[curr != invalid_index ? curr : 0]);
 #if defined(__MIC__)
 #pragma noprefetch
 #endif
-      while (curr != invalid_index &&
-             !m_equal_to(volatile_load(&m_keys[curr]), k)) {
+      while (curr != invalid_index && !m_equal_to(
+#ifdef KOKKOS_ENABLE_SYCL
+                                          Kokkos::atomic_load(&m_keys[curr])
+#else
+                                          volatile_load(&m_keys[curr])
+#endif
+                                              ,
+                                          k)) {
         result.increment_list_position();
         index_hint = curr;
         curr_ptr   = &m_next_index[curr];
-        curr       = volatile_load(curr_ptr);
+#ifdef KOKKOS_ENABLE_SYCL
+        curr = Kokkos::atomic_load(curr_ptr);
+#else
+        curr = volatile_load(curr_ptr);
+#endif
         KOKKOS_NONTEMPORAL_PREFETCH_LOAD(
             &m_keys[curr != invalid_index ? curr : 0]);
       }
@@ -572,15 +596,26 @@ class UnorderedMap {
             new_index = index_hint;
             // Set key and value
             KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_keys[new_index]);
+// FIXME_SYCL replacement for memory_fence
+#ifdef KOKKOS_ENABLE_SYCL
+            Kokkos::atomic_store(&m_keys[new_index], k);
+#else
             m_keys[new_index] = k;
+#endif
 
             if (!is_set) {
               KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_values[new_index]);
+#ifdef KOKKOS_ENABLE_SYCL
+              Kokkos::atomic_store(&m_values[new_index], v);
+#else
               m_values[new_index] = v;
+#endif
             }
 
+#ifndef KOKKOS_ENABLE_SYCL
             // Do not proceed until key and value are updated in global memory
             memory_fence();
+#endif
           }
         } else if (failed_insert_ref) {
           not_done = false;
@@ -660,13 +695,31 @@ class UnorderedMap {
   /// kernel.
   ///
   /// 'const value_type' via Cuda texture fetch must return by value.
-  KOKKOS_FORCEINLINE_FUNCTION
-  std::conditional_t<(is_set || has_const_value), impl_value_type,
-                     impl_value_type &>
+  template <typename Dummy = value_type>
+  KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
+      !std::is_void<Dummy>::value,  // !is_set
+      std::conditional_t<has_const_value, impl_value_type, impl_value_type &>>
   value_at(size_type i) const {
-    return m_values[is_set ? 0 : (i < capacity() ? i : capacity())];
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
+    return m_values[i < capacity() ? i : capacity()];
+#else
+    KOKKOS_EXPECTS(i < capacity());
+    return m_values[i];
+#endif
   }
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
+  template <typename Dummy = value_type>
+  KOKKOS_DEPRECATED_WITH_COMMENT(
+      "Calling value_at for value_type==void is deprecated!")
+  KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
+      std::is_void<Dummy>::value,  // is_set
+      std::conditional_t<has_const_value, impl_value_type,
+                         impl_value_type &>> value_at(size_type /*i*/) const {
+    return m_values[0];
+  }
+#endif
+
   /// \brief Get the key with \c i as its direct index.
   ///
   /// \param i [in] Index directly into the array of entries.
@@ -675,7 +728,12 @@ class UnorderedMap {
   /// kernel.
   KOKKOS_FORCEINLINE_FUNCTION
   key_type key_at(size_type i) const {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
     return m_keys[i < capacity() ? i : capacity()];
+#else
+    KOKKOS_EXPECTS(i < capacity());
+    return m_keys[i];
+#endif
   }
 
   KOKKOS_FORCEINLINE_FUNCTION
@@ -766,6 +824,9 @@ class UnorderedMap {
       raw_deep_copy(tmp.m_scalars.data(), src.m_scalars.data(),
                     sizeof(int) * num_scalars);
 
+      Kokkos::fence(
+          "Kokkos::UnorderedMap::create_copy_view: fence after copy to tmp");
+
       *this = tmp;
     }
   }
@@ -780,6 +841,9 @@ class UnorderedMap {
                                Kokkos::HostSpace>;
     const int true_ = true;
     raw_deep_copy(m_scalars.data() + flag, &true_, sizeof(int));
+    Kokkos::fence(
+        "Kokkos::UnorderedMap::set_flag: fence after copying flag from "
+        "HostSpace");
   }
 
   void reset_flag(int flag) const {
@@ -788,6 +852,9 @@ class UnorderedMap {
                                Kokkos::HostSpace>;
     const int false_ = false;
     raw_deep_copy(m_scalars.data() + flag, &false_, sizeof(int));
+    Kokkos::fence(
+        "Kokkos::UnorderedMap::reset_flag: fence after copying flag from "
+        "HostSpace");
   }
 
   bool get_flag(int flag) const {
@@ -796,6 +863,9 @@ class UnorderedMap {
                                typename device_type::memory_space>;
     int result = false;
     raw_deep_copy(&result, m_scalars.data() + flag, sizeof(int));
+    Kokkos::fence(
+        "Kokkos::UnorderedMap::get_flag: fence after copy to return value in "
+        "HostSpace");
     return result;
   }
 
diff --git a/packages/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp b/packages/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp
index fdd78e4e5f99dc4748c093d274c1e62f9316261a..5fe3ab0df563db75a0ee6f34128fe16745a1258c 100644
--- a/packages/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp
+++ b/packages/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp
@@ -106,8 +106,8 @@ uint32_t MurmurHash3_x86_32(const void* key, int len, uint32_t seed) {
   uint32_t k1 = 0;
 
   switch (len & 3) {
-    case 3: k1 ^= tail[2] << 16;
-    case 2: k1 ^= tail[1] << 8;
+    case 3: k1 ^= tail[2] << 16; KOKKOS_IMPL_FALLTHROUGH
+    case 2: k1 ^= tail[1] << 8; KOKKOS_IMPL_FALLTHROUGH
     case 1:
       k1 ^= tail[0];
       k1 *= c1;
diff --git a/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp b/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
index 00d3eafd231eabd8af444c8508acb42668ff883d..9fb6a4e1ce043f4c75421b026734e3d14015475f 100644
--- a/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
+++ b/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
@@ -47,6 +47,8 @@
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
+#include <Kokkos_Core.hpp>
+#include <Kokkos_StaticCrsGraph.hpp>
 
 namespace Kokkos {
 
diff --git a/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp b/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp
index d7c4a5d1ffdf9969e3c158473e7fb5754113a665..80494139d2705e06df25d8a6bb6486a1c7829594 100644
--- a/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp
+++ b/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp
@@ -45,7 +45,7 @@
 #ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP
 #define KOKKOS_UNORDERED_MAP_IMPL_HPP
 
-#include <Kokkos_Core_fwd.hpp>
+#include <Kokkos_Core.hpp>
 #include <cstdint>
 
 #include <cstdio>
@@ -144,7 +144,7 @@ struct UnorderedMapHistogram {
   using execution_space = typename map_type::execution_space;
   using size_type       = typename map_type::size_type;
 
-  using histogram_view      = View<int[100], execution_space>;
+  using histogram_view      = View<int[100], typename map_type::device_type>;
   using host_histogram_view = typename histogram_view::HostMirror;
 
   map_type m_map;
@@ -170,8 +170,8 @@ struct UnorderedMapHistogram {
   }
 
   void print_length(std::ostream& out) {
-    host_histogram_view host_copy = create_mirror_view(m_length);
-    Kokkos::deep_copy(host_copy, m_length);
+    host_histogram_view host_copy =
+        create_mirror_view_and_copy(Kokkos::HostSpace{}, m_length);
 
     for (int i = 0, size = host_copy.extent(0); i < size; ++i) {
       out << host_copy[i] << " , ";
@@ -180,8 +180,8 @@ struct UnorderedMapHistogram {
   }
 
   void print_distance(std::ostream& out) {
-    host_histogram_view host_copy = create_mirror_view(m_distance);
-    Kokkos::deep_copy(host_copy, m_distance);
+    host_histogram_view host_copy =
+        create_mirror_view_and_copy(Kokkos::HostSpace{}, m_distance);
 
     for (int i = 0, size = host_copy.extent(0); i < size; ++i) {
       out << host_copy[i] << " , ";
@@ -190,8 +190,8 @@ struct UnorderedMapHistogram {
   }
 
   void print_block_distance(std::ostream& out) {
-    host_histogram_view host_copy = create_mirror_view(m_block_distance);
-    Kokkos::deep_copy(host_copy, m_block_distance);
+    host_histogram_view host_copy =
+        create_mirror_view_and_copy(Kokkos::HostSpace{}, m_block_distance);
 
     for (int i = 0, size = host_copy.extent(0); i < size; ++i) {
       out << host_copy[i] << " , ";
diff --git a/packages/kokkos/containers/unit_tests/CMakeLists.txt b/packages/kokkos/containers/unit_tests/CMakeLists.txt
index 947d222c273dc4d87823ad3560a1af6c62a1e52b..f16572b60300562eabd01563ee2469cfa899bf65 100644
--- a/packages/kokkos/containers/unit_tests/CMakeLists.txt
+++ b/packages/kokkos/containers/unit_tests/CMakeLists.txt
@@ -5,15 +5,10 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
 KOKKOS_INCLUDE_DIRECTORIES(${KOKKOS_SOURCE_DIR}/core/unit_test/category_files)
 
 foreach(Tag Threads;Serial;OpenMP;HPX;Cuda;HIP;SYCL)
-  # Because there is always an exception to the rule
-  if(Tag STREQUAL "Threads")
-    set(DEVICE "PTHREAD")
-  else()
-    string(TOUPPER ${Tag} DEVICE)
-  endif()
+  string(TOUPPER ${Tag} DEVICE)
   string(TOLOWER ${Tag} dir)
   # Add test for that backend if it is enabled
-  if(Kokkos_ENABLE_${DEVICE})
+  if(KOKKOS_ENABLE_${DEVICE})
     set(UnitTestSources UnitTestMain.cpp)
     set(dir ${CMAKE_CURRENT_BINARY_DIR}/${dir})
     file(MAKE_DIRECTORY ${dir})
@@ -28,6 +23,7 @@ foreach(Tag Threads;Serial;OpenMP;HPX;Cuda;HIP;SYCL)
         OffsetView
         ScatterView
         StaticCrsGraph
+        WithoutInitializing
         UnorderedMap
         Vector
         ViewCtorPropEmbeddedDim
@@ -42,6 +38,11 @@ foreach(Tag Threads;Serial;OpenMP;HPX;Cuda;HIP;SYCL)
       configure_file(${dir}/dummy.cpp ${file})
       list(APPEND UnitTestSources ${file})
     endforeach()
+    #fatal error C1128: number of sections exceeded object file format limit: compile with /bigobj
+    if(KOKKOS_ENABLE_CUDA AND WIN32)
+     LIST(REMOVE_ITEM UnitTestSources ${dir}/TestCuda_DynViewAPI_generic.cpp)
+    endif()
+
     KOKKOS_ADD_EXECUTABLE_AND_TEST(UnitTest_${Tag} SOURCES ${UnitTestSources})
   endif()
 endforeach()
diff --git a/packages/kokkos/containers/unit_tests/Makefile b/packages/kokkos/containers/unit_tests/Makefile
index 82669fe1ab7532b69556cafbb7131b595f9e5f8e..a6235983d56554f58a624585f9f124d6eccb529f 100644
--- a/packages/kokkos/containers/unit_tests/Makefile
+++ b/packages/kokkos/containers/unit_tests/Makefile
@@ -60,7 +60,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 	TEST_TARGETS += test-cuda
 endif
 
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
 	OBJ_THREADS = UnitTestMain.o gtest-all.o
 	OBJ_THREADS += TestThreads_Bitset.o
 	OBJ_THREADS += TestThreads_DualView.o
diff --git a/packages/kokkos/containers/unit_tests/TestDualView.hpp b/packages/kokkos/containers/unit_tests/TestDualView.hpp
index e22564aa5c24e569ae98d972fe5526a35cc741a6..75829e0769312c2844c239bfe3ac81a6966048e2 100644
--- a/packages/kokkos/containers/unit_tests/TestDualView.hpp
+++ b/packages/kokkos/containers/unit_tests/TestDualView.hpp
@@ -258,7 +258,7 @@ struct test_dual_view_deep_copy {
   }
 };
 
-template <typename Scalar, class Device>
+template <typename Scalar, class Device, bool Initialize>
 struct test_dualview_resize {
   using scalar_type     = Scalar;
   using execution_space = Device;
@@ -274,7 +274,10 @@ struct test_dualview_resize {
 
     /* Covers case "Resize on Device" */
     a.modify_device();
-    Kokkos::resize(a, factor * n, factor * m);
+    if (Initialize)
+      Kokkos::resize(Kokkos::WithoutInitializing, a, factor * n, factor * m);
+    else
+      Kokkos::resize(a, factor * n, factor * m);
     ASSERT_EQ(a.extent(0), n * factor);
     ASSERT_EQ(a.extent(1), m * factor);
 
@@ -300,12 +303,15 @@ struct test_dualview_resize {
 
     // Check
     ASSERT_EQ(a_h_sum, a_d_sum);
-    ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1));
+    ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1)));
 
     /* Covers case "Resize on Host" */
     a.modify_host();
 
-    Kokkos::resize(a, n / factor, m / factor);
+    if (Initialize)
+      Kokkos::resize(Kokkos::WithoutInitializing, a, n / factor, m / factor);
+    else
+      Kokkos::resize(a, n / factor, m / factor);
     ASSERT_EQ(a.extent(0), n / factor);
     ASSERT_EQ(a.extent(1), m / factor);
 
@@ -330,7 +336,7 @@ struct test_dualview_resize {
       }
 
     // Check
-    ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1));
+    ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1)));
     ASSERT_EQ(a_h_sum, a_d_sum);
 
   }  // end run_me
@@ -340,7 +346,7 @@ struct test_dualview_resize {
   }
 };
 
-template <typename Scalar, class Device>
+template <typename Scalar, class Device, bool Initialize>
 struct test_dualview_realloc {
   using scalar_type     = Scalar;
   using execution_space = Device;
@@ -351,7 +357,10 @@ struct test_dualview_realloc {
     const unsigned int m = 5;
 
     ViewType a("A", n, m);
-    Kokkos::realloc(a, n, m);
+    if (Initialize)
+      Kokkos::realloc(Kokkos::WithoutInitializing, a, n, m);
+    else
+      Kokkos::realloc(a, n, m);
 
     Kokkos::deep_copy(a.d_view, 1);
     a.modify_device();
@@ -375,7 +384,7 @@ struct test_dualview_realloc {
       }
 
     // Check
-    ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1));
+    ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1)));
     ASSERT_EQ(a_h_sum, a_d_sum);
   }  // end run_me
 
@@ -405,12 +414,14 @@ void test_dualview_deep_copy() {
 
 template <typename Scalar, typename Device>
 void test_dualview_realloc() {
-  Impl::test_dualview_realloc<Scalar, Device>();
+  Impl::test_dualview_realloc<Scalar, Device, false>();
+  Impl::test_dualview_realloc<Scalar, Device, true>();
 }
 
 template <typename Scalar, typename Device>
 void test_dualview_resize() {
-  Impl::test_dualview_resize<Scalar, Device>();
+  Impl::test_dualview_resize<Scalar, Device, false>();
+  Impl::test_dualview_resize<Scalar, Device, true>();
 }
 
 TEST(TEST_CATEGORY, dualview_combination) {
diff --git a/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp
index a8d62bd24cad46531f2b4814f4d832c08758fe10..321f1228a6d4dcfeb296c307ee0fa99283023b58 100644
--- a/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp
+++ b/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp
@@ -712,7 +712,8 @@ class TestDynViewAPI {
   using host_view_space = typename View0::host_mirror_space;
 
   static void run_tests() {
-    run_test_resize_realloc();
+    run_test_resize_realloc<false>();
+    run_test_resize_realloc<true>();
     run_test_mirror();
     run_test_mirror_and_copy();
     run_test_scalar();
@@ -722,6 +723,7 @@ class TestDynViewAPI {
     run_test_subview();
     run_test_subview_strided();
     run_test_vector();
+    run_test_as_view_of_rank_n();
   }
 
   static void run_operator_test_rank12345() {
@@ -738,21 +740,28 @@ class TestDynViewAPI {
     TestViewOperator_LeftAndRight<int, device, 6>::testit(2, 3, 4, 2, 3, 4);
   }
 
+  template <bool Initialize>
   static void run_test_resize_realloc() {
     dView0 drv0("drv0", 10, 20, 30);
-    ASSERT_EQ(drv0.rank(), 3);
-
-    Kokkos::resize(drv0, 5, 10);
-    ASSERT_EQ(drv0.rank(), 2);
-    ASSERT_EQ(drv0.extent(0), 5);
-    ASSERT_EQ(drv0.extent(1), 10);
-    ASSERT_EQ(drv0.extent(2), 1);
-
-    Kokkos::realloc(drv0, 10, 20);
-    ASSERT_EQ(drv0.rank(), 2);
-    ASSERT_EQ(drv0.extent(0), 10);
-    ASSERT_EQ(drv0.extent(1), 20);
-    ASSERT_EQ(drv0.extent(2), 1);
+    ASSERT_EQ(drv0.rank(), 3u);
+
+    if (Initialize)
+      Kokkos::resize(Kokkos::WithoutInitializing, drv0, 5, 10);
+    else
+      Kokkos::resize(drv0, 5, 10);
+    ASSERT_EQ(drv0.rank(), 2u);
+    ASSERT_EQ(drv0.extent(0), 5u);
+    ASSERT_EQ(drv0.extent(1), 10u);
+    ASSERT_EQ(drv0.extent(2), 1u);
+
+    if (Initialize)
+      Kokkos::realloc(Kokkos::WithoutInitializing, drv0, 10, 20);
+    else
+      Kokkos::realloc(drv0, 10, 20);
+    ASSERT_EQ(drv0.rank(), 2u);
+    ASSERT_EQ(drv0.extent(0), 10u);
+    ASSERT_EQ(drv0.extent(1), 20u);
+    ASSERT_EQ(drv0.extent(2), 1u);
   }
 
   static void run_test_mirror() {
@@ -961,6 +970,199 @@ class TestDynViewAPI {
     }
   }
 
+  static void run_test_as_view_of_rank_n() {
+    Kokkos::View<int, Kokkos::HostSpace> error_flag_host("error_flag");
+    error_flag_host() = 0;
+    auto error_flag =
+        Kokkos::create_mirror_view_and_copy(DeviceType(), error_flag_host);
+
+    dView0 d("d");
+
+#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
+
+    // Rank 0
+    Kokkos::resize(d);
+
+    auto policy0 = Kokkos::RangePolicy<DeviceType>(DeviceType(), 0, 1);
+
+    View0 v0 = Kokkos::Impl::as_view_of_rank_n<0>(d);
+    // Assign values after calling as_view_of_rank_n() function under
+    // test to ensure aliasing
+    Kokkos::parallel_for(
+        policy0, KOKKOS_LAMBDA(int) { d() = 13; });
+    ASSERT_EQ(v0.size(), d.size());
+    ASSERT_EQ(v0.data(), d.data());
+    Kokkos::parallel_for(
+        policy0, KOKKOS_LAMBDA(int) {
+          if (d() != v0()) error_flag() = 1;
+        });
+    Kokkos::deep_copy(error_flag_host, error_flag);
+    ASSERT_EQ(error_flag_host(), 0);
+
+    // Rank 1
+    Kokkos::resize(d, 1);
+
+    auto policy1 =
+        Kokkos::RangePolicy<DeviceType>(DeviceType(), 0, d.extent(0));
+
+    View1 v1 = Kokkos::Impl::as_view_of_rank_n<1>(d);
+    Kokkos::parallel_for(
+        policy1, KOKKOS_LAMBDA(int i0) { d(i0) = i0; });
+    for (unsigned int rank = 0; rank < d.rank(); ++rank)
+      ASSERT_EQ(v1.extent(rank), d.extent(rank));
+    ASSERT_EQ(v1.data(), d.data());
+    Kokkos::parallel_for(
+        policy1, KOKKOS_LAMBDA(int i0) {
+          if (d(i0) != v1(i0)) error_flag() = 1;
+        });
+    Kokkos::deep_copy(error_flag_host, error_flag);
+    ASSERT_EQ(error_flag_host(), 0);
+
+    // Rank 2
+    Kokkos::resize(d, 1, 2);
+
+    auto policy2 = Kokkos::MDRangePolicy<DeviceType, Kokkos::Rank<2>>(
+        {0, 0}, {d.extent(0), d.extent(1)});
+
+    View2 v2 = Kokkos::Impl::as_view_of_rank_n<2>(d);
+    Kokkos::parallel_for(
+        policy2, KOKKOS_LAMBDA(int i0, int i1) { d(i0, i1) = i0 + 10 * i1; });
+    for (unsigned int rank = 0; rank < d.rank(); ++rank)
+      ASSERT_EQ(v2.extent(rank), d.extent(rank));
+    ASSERT_EQ(v2.data(), d.data());
+    Kokkos::parallel_for(
+        policy2, KOKKOS_LAMBDA(int i0, int i1) {
+          if (d(i0, i1) != v2(i0, i1)) error_flag() = 1;
+        });
+    Kokkos::deep_copy(error_flag_host, error_flag);
+    ASSERT_EQ(error_flag_host(), 0);
+
+    // Rank 3
+    Kokkos::resize(d, 1, 2, 3);
+
+    auto policy3 = Kokkos::MDRangePolicy<DeviceType, Kokkos::Rank<3>>(
+        {0, 0, 0}, {d.extent(0), d.extent(1), d.extent(2)});
+
+    View3 v3 = Kokkos::Impl::as_view_of_rank_n<3>(d);
+    Kokkos::parallel_for(
+        policy3, KOKKOS_LAMBDA(int i0, int i1, int i2) {
+          d(i0, i1, i2) = i0 + 10 * i1 + 100 * i2;
+        });
+    for (unsigned int rank = 0; rank < d.rank(); ++rank)
+      ASSERT_EQ(v3.extent(rank), d.extent(rank));
+    ASSERT_EQ(v3.data(), d.data());
+    Kokkos::parallel_for(
+        policy3, KOKKOS_LAMBDA(int i0, int i1, int i2) {
+          if (d(i0, i1, i2) != v3(i0, i1, i2)) error_flag() = 1;
+        });
+    Kokkos::deep_copy(error_flag_host, error_flag);
+    ASSERT_EQ(error_flag_host(), 0);
+
+    // Rank 4
+    Kokkos::resize(d, 1, 2, 3, 4);
+
+    auto policy4 = Kokkos::MDRangePolicy<DeviceType, Kokkos::Rank<4>>(
+        {0, 0, 0, 0}, {d.extent(0), d.extent(1), d.extent(2), d.extent(3)});
+
+    View4 v4 = Kokkos::Impl::as_view_of_rank_n<4>(d);
+    Kokkos::parallel_for(
+        policy4, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) {
+          d(i0, i1, i2, i3) = i0 + 10 * i1 + 100 * i2 + 1000 * i3;
+        });
+    for (unsigned int rank = 0; rank < d.rank(); ++rank)
+      ASSERT_EQ(v4.extent(rank), d.extent(rank));
+    ASSERT_EQ(v4.data(), d.data());
+    Kokkos::parallel_for(
+        policy4, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) {
+          if (d(i0, i1, i2, i3) != v4(i0, i1, i2, i3)) error_flag() = 1;
+        });
+    Kokkos::deep_copy(error_flag_host, error_flag);
+    ASSERT_EQ(error_flag_host(), 0);
+
+    // Rank 5
+    Kokkos::resize(d, 1, 2, 3, 4, 5);
+
+    auto policy5 = Kokkos::MDRangePolicy<DeviceType, Kokkos::Rank<5>>(
+        {0, 0, 0, 0, 0},
+        {d.extent(0), d.extent(1), d.extent(2), d.extent(3), d.extent(4)});
+
+    View5 v5 = Kokkos::Impl::as_view_of_rank_n<5>(d);
+    Kokkos::parallel_for(
+        policy5, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) {
+          d(i0, i1, i2, i3, i4) =
+              i0 + 10 * i1 + 100 * i2 + 1000 * i3 + 10000 * i4;
+        });
+    for (unsigned int rank = 0; rank < d.rank(); ++rank)
+      ASSERT_EQ(v5.extent(rank), d.extent(rank));
+    ASSERT_EQ(v5.data(), d.data());
+    Kokkos::parallel_for(
+        policy5, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) {
+          if (d(i0, i1, i2, i3, i4) != v5(i0, i1, i2, i3, i4)) error_flag() = 1;
+        });
+    Kokkos::deep_copy(error_flag_host, error_flag);
+    ASSERT_EQ(error_flag_host(), 0);
+
+    // Rank 6
+    Kokkos::resize(d, 1, 2, 3, 4, 5, 6);
+
+    auto policy6 = Kokkos::MDRangePolicy<DeviceType, Kokkos::Rank<6>>(
+        {0, 0, 0, 0, 0, 0}, {d.extent(0), d.extent(1), d.extent(2), d.extent(3),
+                             d.extent(4), d.extent(5)});
+
+    View6 v6 = Kokkos::Impl::as_view_of_rank_n<6>(d);
+    Kokkos::parallel_for(
+        policy6, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
+          d(i0, i1, i2, i3, i4, i5) =
+              i0 + 10 * i1 + 100 * i2 + 1000 * i3 + 10000 * i4 + 100000 * i5;
+        });
+    for (unsigned int rank = 0; rank < d.rank(); ++rank)
+      ASSERT_EQ(v6.extent(rank), d.extent(rank));
+    ASSERT_EQ(v6.data(), d.data());
+    Kokkos::parallel_for(
+        policy6, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
+          if (d(i0, i1, i2, i3, i4, i5) != v6(i0, i1, i2, i3, i4, i5))
+            error_flag() = 1;
+        });
+    Kokkos::deep_copy(error_flag_host, error_flag);
+    ASSERT_EQ(error_flag_host(), 0);
+
+    // Rank 7
+    Kokkos::resize(d, 1, 2, 3, 4, 5, 6, 7);
+
+    // MDRangePolicy only accepts Rank < 7
+#if 0
+    auto policy7 = Kokkos::MDRangePolicy<DeviceType, Kokkos::Rank<7>>(
+        {0, 0, 0, 0, 0, 0, 0},
+        {d.extent(0), d.extent(1), d.extent(2), d.extent(3), d.extent(4),
+         d.extent(5), d.extent(6)});
+
+    View7 v7 = Kokkos::Impl::as_view_of_rank_n<7>(d);
+    Kokkos::parallel_for(
+        policy7,
+        KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5, int i6) {
+          d(i0, i1, i2, i3, i4, i5, i6) = i0 + 10 * i1 + 100 * i2 + 1000 * i3 +
+                                          10000 * i4 + 100000 * i5 +
+                                          1000000 * i6;
+        });
+    for (unsigned int rank = 0; rank < d.rank(); ++rank)
+      ASSERT_EQ(v7.extent(rank), d.extent(rank));
+    ASSERT_EQ(v7.data(), d.data());
+    Kokkos::parallel_for(
+        policy7,
+        KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5, int i6) {
+          if (d(i0, i1, i2, i3, i4, i5, i6) != v7(i0, i1, i2, i3, i4, i5, i6))
+            error_flag() = 1;
+        });
+    Kokkos::deep_copy(error_flag_host, error_flag);
+    ASSERT_EQ(error_flag_host(), 0);
+#endif  // MDRangePolict Rank < 7
+
+#endif  // defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
+
+    // Error checking test
+    EXPECT_ANY_THROW({ auto v_copy = Kokkos::Impl::as_view_of_rank_n<2>(d); });
+  }
+
   static void run_test_scalar() {
     using hView0 = typename dView0::HostMirror;  // HostMirror of DynRankView is
                                                  // a DynRankView
@@ -1071,10 +1273,10 @@ class TestDynViewAPI {
     dView0 d_uninitialized(
         Kokkos::view_alloc(Kokkos::WithoutInitializing, "uninit"), 10, 20);
     ASSERT_NE(d_uninitialized.data(), nullptr);
-    ASSERT_EQ(d_uninitialized.rank(), 2);
-    ASSERT_EQ(d_uninitialized.extent(0), 10);
-    ASSERT_EQ(d_uninitialized.extent(1), 20);
-    ASSERT_EQ(d_uninitialized.extent(2), 1);
+    ASSERT_EQ(d_uninitialized.rank(), 2u);
+    ASSERT_EQ(d_uninitialized.extent(0), 10u);
+    ASSERT_EQ(d_uninitialized.extent(1), 20u);
+    ASSERT_EQ(d_uninitialized.extent(2), 1u);
 
     dView0 dx, dy, dz;
     hView0 hx, hy, hz;
@@ -1107,8 +1309,8 @@ class TestDynViewAPI {
     ASSERT_EQ(dy.extent(0), unsigned(N1));  // Okay with UVM
     ASSERT_EQ(hx.extent(0), unsigned(N1));
     ASSERT_EQ(hy.extent(0), unsigned(N1));
-    ASSERT_EQ(dx.rank(), 3);  // Okay with UVM
-    ASSERT_EQ(hx.rank(), 3);
+    ASSERT_EQ(dx.rank(), 3u);  // Okay with UVM
+    ASSERT_EQ(hx.rank(), 3u);
 
     dx = dView0("dx", N0, N1, N2, N3);
     dy = dView0("dy", N0, N1, N2, N3);
@@ -1119,15 +1321,15 @@ class TestDynViewAPI {
     ASSERT_EQ(dy.extent(0), unsigned(N0));
     ASSERT_EQ(hx.extent(0), unsigned(N0));
     ASSERT_EQ(hy.extent(0), unsigned(N0));
-    ASSERT_EQ(dx.rank(), 4);
-    ASSERT_EQ(dy.rank(), 4);
-    ASSERT_EQ(hx.rank(), 4);
-    ASSERT_EQ(hy.rank(), 4);
+    ASSERT_EQ(dx.rank(), 4u);
+    ASSERT_EQ(dy.rank(), 4u);
+    ASSERT_EQ(hx.rank(), 4u);
+    ASSERT_EQ(hy.rank(), 4u);
 
-    ASSERT_EQ(dx.use_count(), size_t(1));
+    ASSERT_EQ(dx.use_count(), 1);
 
     dView0_unmanaged unmanaged_dx = dx;
-    ASSERT_EQ(dx.use_count(), size_t(1));
+    ASSERT_EQ(dx.use_count(), 1);
 
     dView0_unmanaged unmanaged_from_ptr_dx = dView0_unmanaged(
         dx.data(), dx.extent(0), dx.extent(1), dx.extent(2), dx.extent(3));
@@ -1139,24 +1341,24 @@ class TestDynViewAPI {
     }
 
     const_dView0 const_dx = dx;
-    ASSERT_EQ(dx.use_count(), size_t(2));
+    ASSERT_EQ(dx.use_count(), 2);
 
     {
       const_dView0 const_dx2;
       const_dx2 = const_dx;
-      ASSERT_EQ(dx.use_count(), size_t(3));
+      ASSERT_EQ(dx.use_count(), 3);
 
       const_dx2 = dy;
-      ASSERT_EQ(dx.use_count(), size_t(2));
+      ASSERT_EQ(dx.use_count(), 2);
 
       const_dView0 const_dx3(dx);
-      ASSERT_EQ(dx.use_count(), size_t(3));
+      ASSERT_EQ(dx.use_count(), 3);
 
       dView0_unmanaged dx4_unmanaged(dx);
-      ASSERT_EQ(dx.use_count(), size_t(3));
+      ASSERT_EQ(dx.use_count(), 3);
     }
 
-    ASSERT_EQ(dx.use_count(), size_t(2));
+    ASSERT_EQ(dx.use_count(), 2);
 
     ASSERT_NE(dx.data(), nullptr);
     ASSERT_NE(const_dx.data(), nullptr);
@@ -1336,18 +1538,18 @@ class TestDynViewAPI {
 
     // View - DynRankView Interoperability tests
     // deep_copy from view to dynrankview
-    const int testdim = 4;
+    constexpr size_t testdim = 4;
     dView0 dxx("dxx", testdim);
     View1 vxx("vxx", testdim);
     auto hvxx = Kokkos::create_mirror_view(vxx);
-    for (int i = 0; i < testdim; ++i) {
+    for (size_t i = 0; i < testdim; ++i) {
       hvxx(i) = i;
     }
     Kokkos::deep_copy(vxx, hvxx);
     Kokkos::deep_copy(dxx, vxx);
     auto hdxx = Kokkos::create_mirror_view(dxx);
     Kokkos::deep_copy(hdxx, dxx);
-    for (int i = 0; i < testdim; ++i) {
+    for (size_t i = 0; i < testdim; ++i) {
       ASSERT_EQ(hvxx(i), hdxx(i));
     }
 
@@ -1362,7 +1564,7 @@ class TestDynViewAPI {
     ASSERT_EQ(rank(hdxx), rank(hvdxx));
     ASSERT_EQ(hvdxx.extent(0), testdim);
     ASSERT_EQ(hdxx.extent(0), hvdxx.extent(0));
-    for (int i = 0; i < testdim; ++i) {
+    for (size_t i = 0; i < testdim; ++i) {
       ASSERT_EQ(hvxx(i), hvdxx(i));
     }
   }
@@ -1432,51 +1634,51 @@ class TestDynViewAPI {
     unsigned order[] = {6, 5, 4, 3, 2, 1, 0},
              dimen[] = {N0, N1, N2, 2, 2, 2, 2};  // LayoutRight equivalent
     sdView d7("d7", Kokkos::LayoutStride::order_dimensions(7, order, dimen));
-    ASSERT_EQ(d7.rank(), 7);
+    ASSERT_EQ(d7.rank(), 7u);
 
     sdView ds0 = Kokkos::subdynrankview(d7, 1, 1, 1, 1, 1, 1, 1);
-    ASSERT_EQ(ds0.rank(), 0);
+    ASSERT_EQ(ds0.rank(), 0u);
 
     // Basic test - ALL
     sdView dsALL = Kokkos::subdynrankview(
         d7, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(),
         Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
-    ASSERT_EQ(dsALL.rank(), 7);
+    ASSERT_EQ(dsALL.rank(), 7u);
 
     //  Send a value to final rank returning rank 6 subview
     sdView dsm1 =
         Kokkos::subdynrankview(d7, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(),
                                Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), 1);
-    ASSERT_EQ(dsm1.rank(), 6);
+    ASSERT_EQ(dsm1.rank(), 6u);
 
     //  Send a std::pair as argument to a rank
     sdView dssp = Kokkos::subdynrankview(
         d7, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(),
         Kokkos::ALL(), Kokkos::ALL(), std::pair<unsigned, unsigned>(1, 2));
-    ASSERT_EQ(dssp.rank(), 7);
+    ASSERT_EQ(dssp.rank(), 7u);
 
     //  Send a kokkos::pair as argument to a rank; take default layout as input
     dView0 dd0("dd0", N0, N1, N2, 2, 2, 2, 2);  // default layout
-    ASSERT_EQ(dd0.rank(), 7);
+    ASSERT_EQ(dd0.rank(), 7u);
     sdView dtkp = Kokkos::subdynrankview(
         dd0, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(),
         Kokkos::ALL(), Kokkos::ALL(), Kokkos::pair<unsigned, unsigned>(0, 1));
-    ASSERT_EQ(dtkp.rank(), 7);
+    ASSERT_EQ(dtkp.rank(), 7u);
 
     // Return rank 7 subview, taking a pair as one argument, layout stride input
     sdView ds7 = Kokkos::subdynrankview(
         d7, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(),
         Kokkos::ALL(), Kokkos::ALL(), Kokkos::pair<unsigned, unsigned>(0, 1));
-    ASSERT_EQ(ds7.rank(), 7);
+    ASSERT_EQ(ds7.rank(), 7u);
 
     // Default Layout DynRankView
     dView dv6("dv6", N0, N1, N2, N3, 2, 2);
-    ASSERT_EQ(dv6.rank(), 6);
+    ASSERT_EQ(dv6.rank(), 6u);
 
     // DynRankView with LayoutRight
     using drView = Kokkos::DynRankView<T, Kokkos::LayoutRight, device>;
     drView dr5("dr5", N0, N1, N2, 2, 2);
-    ASSERT_EQ(dr5.rank(), 5);
+    ASSERT_EQ(dr5.rank(), 5u);
 
     // LayoutStride but arranged as LayoutRight
     // NOTE: unused arg_layout dimensions must be set toKOKKOS_INVALID_INDEX so
@@ -1489,7 +1691,7 @@ class TestDynViewAPI {
     ls.dimension[6] = KOKKOS_INVALID_INDEX;
     ls.dimension[7] = KOKKOS_INVALID_INDEX;
     sdView d5("d5", ls);
-    ASSERT_EQ(d5.rank(), 5);
+    ASSERT_EQ(d5.rank(), 5u);
 
     //  LayoutStride arranged as LayoutRight - commented out as example that
     //  fails unit test
@@ -1522,7 +1724,7 @@ class TestDynViewAPI {
     sdView ds5 = Kokkos::subdynrankview(d5, Kokkos::ALL(), Kokkos::ALL(),
                                         Kokkos::ALL(), Kokkos::ALL(),
                                         Kokkos::pair<unsigned, unsigned>(0, 1));
-    ASSERT_EQ(ds5.rank(), 5);
+    ASSERT_EQ(ds5.rank(), 5u);
 
     // Pass in extra ALL arguments beyond the rank of the DynRank View.
     // This behavior is allowed - ignore the extra ALL arguments when
@@ -1554,7 +1756,7 @@ class TestDynViewAPI {
                                Kokkos::ALL(), 0, Kokkos::ALL());
 
     ASSERT_EQ(ds4.rank(), ds4plus.rank());
-    ASSERT_EQ(ds4.rank(), 4);
+    ASSERT_EQ(ds4.rank(), 4u);
     ASSERT_EQ(ds4.extent(0), ds4plus.extent(0));
     ASSERT_EQ(ds4.extent(4), ds4plus.extent(4));
     ASSERT_EQ(ds4.extent(5), ds4plus.extent(5));
@@ -1601,8 +1803,8 @@ class TestDynViewAPI {
     ASSERT_EQ(yl4.extent(1), xl4.extent(3));
     ASSERT_EQ(yr4.extent(0), xr4.extent(1));
     ASSERT_EQ(yr4.extent(1), xr4.extent(3));
-    ASSERT_EQ(yl4.rank(), 2);
-    ASSERT_EQ(yr4.rank(), 2);
+    ASSERT_EQ(yl4.rank(), 2u);
+    ASSERT_EQ(yr4.rank(), 2u);
 
     ASSERT_EQ(&yl4(4, 4) - &xl4(1, 4, 2, 4), 0);
     ASSERT_EQ(&yr4(4, 4) - &xr4(1, 4, 2, 4), 0);
diff --git a/packages/kokkos/containers/unit_tests/TestDynamicView.hpp b/packages/kokkos/containers/unit_tests/TestDynamicView.hpp
index 023bf92f62b48bc46878209e6c5ef6eccedeb726..5345f8ea2459f69df6ba5074216db0a594407d5d 100644
--- a/packages/kokkos/containers/unit_tests/TestDynamicView.hpp
+++ b/packages/kokkos/containers/unit_tests/TestDynamicView.hpp
@@ -93,7 +93,7 @@ struct TestDynamicView {
         ASSERT_TRUE(d3.is_allocated());
       }
       view_type da("da", 1024, arg_total_size);
-      ASSERT_EQ(da.size(), 0);
+      ASSERT_EQ(da.size(), 0u);
       // Init
       unsigned da_size = arg_total_size / 8;
       da.resize_serial(da_size);
@@ -145,7 +145,7 @@ struct TestDynamicView {
     //   Case 2: min_chunk_size is NOT a power of 2
     {
       view_type da("da", 1023, arg_total_size);
-      ASSERT_EQ(da.size(), 0);
+      ASSERT_EQ(da.size(), 0u);
       // Init
       unsigned da_size = arg_total_size / 8;
       da.resize_serial(da_size);
@@ -197,7 +197,7 @@ struct TestDynamicView {
     //   Case 3: resize reduces the size
     {
       view_type da("da", 1023, arg_total_size);
-      ASSERT_EQ(da.size(), 0);
+      ASSERT_EQ(da.size(), 0u);
       // Init
       unsigned da_size = arg_total_size / 2;
       da.resize_serial(da_size);
diff --git a/packages/kokkos/containers/unit_tests/TestErrorReporter.hpp b/packages/kokkos/containers/unit_tests/TestErrorReporter.hpp
index a90885bd33a8731667e20804d3c70fb5b8f35c37..c5394d81a613bbd7fa1c42ea17eb392d3fd95693 100644
--- a/packages/kokkos/containers/unit_tests/TestErrorReporter.hpp
+++ b/packages/kokkos/containers/unit_tests/TestErrorReporter.hpp
@@ -50,10 +50,6 @@
 #include <Kokkos_Core.hpp>
 #include <Kokkos_ErrorReporter.hpp>
 
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
 namespace Test {
 
 // Just save the data in the report.  Informative text goes in the
@@ -174,7 +170,8 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase<DeviceType> {
   KOKKOS_INLINE_FUNCTION
   void operator()(const int work_idx) const {
     if (driver_base::error_condition(work_idx)) {
-      double val = M_PI * static_cast<double>(work_idx);
+      double val =
+          Kokkos::Experimental::pi_v<double> * static_cast<double>(work_idx);
       typename driver_base::report_type report = {work_idx, -2 * work_idx, val};
       driver_base::m_errorReporter.add_report(work_idx, report);
     }
@@ -200,7 +197,8 @@ struct ErrorReporterDriverUseLambda
         Kokkos::RangePolicy<execution_space>(0, test_size),
         KOKKOS_CLASS_LAMBDA(const int work_idx) {
           if (driver_base::error_condition(work_idx)) {
-            double val = M_PI * static_cast<double>(work_idx);
+            double val = Kokkos::Experimental::pi_v<double> *
+                         static_cast<double>(work_idx);
             typename driver_base::report_type report = {work_idx, -2 * work_idx,
                                                         val};
             driver_base::m_errorReporter.add_report(work_idx, report);
@@ -224,7 +222,8 @@ struct ErrorReporterDriverNativeOpenMP
 #pragma omp parallel for
     for (int work_idx = 0; work_idx < test_size; ++work_idx) {
       if (driver_base::error_condition(work_idx)) {
-        double val = M_PI * static_cast<double>(work_idx);
+        double val =
+            Kokkos::Experimental::pi_v<double> * static_cast<double>(work_idx);
         typename driver_base::report_type report = {work_idx, -2 * work_idx,
                                                     val};
         driver_base::m_errorReporter.add_report(work_idx, report);
diff --git a/packages/kokkos/containers/unit_tests/TestOffsetView.hpp b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp
index 24a43e1ebc72820dbd84dd6e2931837cabfaecba..a127c250e1c87f6fb13d7a4f1029403bfee16c7b 100644
--- a/packages/kokkos/containers/unit_tests/TestOffsetView.hpp
+++ b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp
@@ -91,8 +91,8 @@ void test_offsetview_construction() {
   ASSERT_EQ(ov.begin(1), -2);
   ASSERT_EQ(ov.end(1), 3);
 
-  ASSERT_EQ(ov.extent(0), 5);
-  ASSERT_EQ(ov.extent(1), 5);
+  ASSERT_EQ(ov.extent(0), 5u);
+  ASSERT_EQ(ov.extent(1), 5u);
 
 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
   {
@@ -357,7 +357,6 @@ void test_offsetview_unmanaged_construction() {
     ASSERT_EQ(bb, ii);
   }
 
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
   {
     using offset_view_type = Kokkos::Experimental::OffsetView<Scalar*, Device>;
 
@@ -397,7 +396,6 @@ void test_offsetview_unmanaged_construction() {
     ASSERT_THROW(offset_view_type(&s, {0, 0, 0}, {1, 1, 1}),
                  std::runtime_error);
   }
-#endif  // KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
 }
 
 template <typename Scalar, typename Device>
diff --git a/packages/kokkos/containers/unit_tests/TestScatterView.hpp b/packages/kokkos/containers/unit_tests/TestScatterView.hpp
index 342ce2af48afe2cba3737db653f67957d04a51d4..9fddfdcca0e1cbc850e244abfb5d24f7991633d6 100644
--- a/packages/kokkos/containers/unit_tests/TestScatterView.hpp
+++ b/packages/kokkos/containers/unit_tests/TestScatterView.hpp
@@ -67,6 +67,8 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
 
   using orig_view_type = Kokkos::View<NumberType * [12], Layout, DeviceType>;
 
+  using size_type = typename Kokkos::HostSpace::size_type;
+
   scatter_view_type scatter_view;
   int scatterSize;
 
@@ -79,21 +81,7 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
     auto host_view =
         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
     Kokkos::fence();
-    for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
-         ++i) {
-      host_view(i, 0)  = 0.0;
-      host_view(i, 1)  = 0.0;
-      host_view(i, 2)  = 0.0;
-      host_view(i, 3)  = 0.0;
-      host_view(i, 4)  = 0.0;
-      host_view(i, 5)  = 0.0;
-      host_view(i, 6)  = 0.0;
-      host_view(i, 7)  = 0.0;
-      host_view(i, 8)  = 0.0;
-      host_view(i, 9)  = 0.0;
-      host_view(i, 10) = 0.0;
-      host_view(i, 11) = 0.0;
-    }
+    Kokkos::deep_copy(host_view, 0);
     Kokkos::fence();
     Kokkos::deep_copy(orig, host_view);
   }
@@ -171,34 +159,40 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
     auto host_view =
         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
     Kokkos::fence();
-    for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
-         ++i) {
-      auto val0  = host_view(i, 0);
-      auto val1  = host_view(i, 1);
-      auto val2  = host_view(i, 2);
-      auto val3  = host_view(i, 3);
-      auto val4  = host_view(i, 4);
-      auto val5  = host_view(i, 5);
-      auto val6  = host_view(i, 6);
-      auto val7  = host_view(i, 7);
-      auto val8  = host_view(i, 8);
-      auto val9  = host_view(i, 9);
-      auto val10 = host_view(i, 10);
-      auto val11 = host_view(i, 11);
-      EXPECT_NEAR(val0, NumberType(80), 1e-14);
-      EXPECT_NEAR(val1, NumberType(20), 1e-14);
-      EXPECT_NEAR(val2, NumberType(-20), 1e-14);
-      EXPECT_NEAR(val3, NumberType(20), 1e-14);
-      EXPECT_NEAR(val4, NumberType(-20), 1e-14);
-      EXPECT_NEAR(val5, NumberType(-100), 1e-14);
-      EXPECT_NEAR(val6, NumberType(40), 1e-14);
-      EXPECT_NEAR(val7, NumberType(20), 1e-14);
-      EXPECT_NEAR(val8, NumberType(-20), 1e-14);
-      EXPECT_NEAR(val9, NumberType(-20), 1e-14);
-      EXPECT_NEAR(val10, NumberType(20), 1e-14);
-      EXPECT_NEAR(val11, NumberType(-60), 1e-14);
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
+      for (size_type j = 0; j < host_view.extent(1); ++j) {
+        EXPECT_NEAR(host_view(i, j), NumberType(ref[j]), 1e-14)
+            << "Data differs at indices " << i << ", " << j;
+      }
+    }
+  }
+
+  // check for correct padding
+  void validateResultsForSubview(
+      orig_view_type orig, std::pair<size_type, size_type>& subRangeDim0,
+      std::pair<size_type, size_type>& subRangeDim1) {
+    auto host_view =
+        Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
+    Kokkos::fence();
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
+      for (size_type j = 0; j < host_view.extent(1); ++j) {
+        auto val = host_view(i, j);
+        if ((i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) &&
+            (j >= std::get<0>(subRangeDim1) && j < std::get<1>(subRangeDim1))) {
+          // is in subview
+          EXPECT_NEAR(val, NumberType(ref[j]), 1e-14)
+              << "Data differs at indices " << i << ", " << j;
+        } else {
+          // is outside of subview
+          EXPECT_NEAR(val, NumberType(0), 1e-14)
+              << "Data differs at indices " << i << ", " << j;
+        }
+      }
     }
   }
+
+ private:
+  NumberType ref[12] = {80, 20, -20, 20, -20, -100, 40, 20, -20, -20, 20, -60};
 };
 
 template <typename DeviceType, typename Layout, typename Duplication,
@@ -214,6 +208,8 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
 
   using orig_view_type = Kokkos::View<NumberType * [3], Layout, DeviceType>;
 
+  using size_type = typename Kokkos::HostSpace::size_type;
+
   scatter_view_type scatter_view;
   int scatterSize;
 
@@ -226,8 +222,7 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
     auto host_view =
         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
     Kokkos::fence();
-    for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
-         ++i) {
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
       host_view(i, 0) = 1.0;
       host_view(i, 1) = 1.0;
       host_view(i, 2) = 1.0;
@@ -260,14 +255,45 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
     auto host_view =
         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
     Kokkos::fence();
-    for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
-         ++i) {
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
       auto val0 = host_view(i, 0);
       auto val1 = host_view(i, 1);
       auto val2 = host_view(i, 2);
-      EXPECT_TRUE(std::fabs((val0 - 65536.0) / 65536.0) < 1e-14);
-      EXPECT_TRUE(std::fabs((val1 - 256.0) / 256.0) < 1e-14);
-      EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14);
+      EXPECT_TRUE(std::fabs((val0 - 65536.0) / 65536.0) < 1e-14)
+          << "Data differs at index " << i;
+      EXPECT_TRUE(std::fabs((val1 - 256.0) / 256.0) < 1e-14)
+          << "Data differs at index " << i;
+      EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14)
+          << "Data differs at index " << i;
+    }
+  }
+
+  // check for correct padding
+  void validateResultsForSubview(
+      orig_view_type orig, std::pair<size_type, size_type>& subRangeDim0,
+      std::pair<size_type, size_type>& subRangeDim1) {
+    (void)subRangeDim1;
+    auto host_view =
+        Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
+    Kokkos::fence();
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
+      auto val0 = host_view(i, 0);
+      auto val1 = host_view(i, 1);
+      auto val2 = host_view(i, 2);
+      if (i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) {
+        // is in subview
+        EXPECT_TRUE(std::fabs((val0 - 65536.0) / 65536.0) < 1e-14);
+        EXPECT_TRUE(std::fabs((val1 - 256.0) / 256.0) < 1e-14);
+        EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14);
+      } else {
+        // is outside of subview
+        EXPECT_NEAR(val0, NumberType(1), 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_NEAR(val1, NumberType(1), 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_NEAR(val2, NumberType(1), 1e-14)
+            << "Data differs at index " << i;
+      }
     }
   }
 };
@@ -285,6 +311,8 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
 
   using orig_view_type = Kokkos::View<NumberType * [3], Layout, DeviceType>;
 
+  using size_type = typename Kokkos::HostSpace::size_type;
+
   scatter_view_type scatter_view;
   int scatterSize;
 
@@ -297,8 +325,7 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
     auto host_view =
         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
     Kokkos::fence();
-    for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
-         ++i) {
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
       host_view(i, 0) = 999999.0;
       host_view(i, 1) = 999999.0;
       host_view(i, 2) = 999999.0;
@@ -331,14 +358,48 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
     auto host_view =
         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
     Kokkos::fence();
-    for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
-         ++i) {
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
       auto val0 = host_view(i, 0);
       auto val1 = host_view(i, 1);
       auto val2 = host_view(i, 2);
-      EXPECT_TRUE(std::fabs((val0 - 4.0) / 4.0) < 1e-14);
-      EXPECT_TRUE(std::fabs((val1 - 2.0) / 2.0) < 1e-14);
-      EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14);
+      EXPECT_TRUE(std::fabs((val0 - 4.0) / 4.0) < 1e-14)
+          << "Data differs at index " << i;
+      EXPECT_TRUE(std::fabs((val1 - 2.0) / 2.0) < 1e-14)
+          << "Data differs at index " << i;
+      EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14)
+          << "Data differs at index " << i;
+    }
+  }
+
+  // check for correct padding
+  void validateResultsForSubview(
+      orig_view_type orig, std::pair<size_type, size_type>& subRangeDim0,
+      std::pair<size_type, size_type>& subRangeDim1) {
+    (void)subRangeDim1;
+    auto host_view =
+        Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
+    Kokkos::fence();
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
+      auto val0 = host_view(i, 0);
+      auto val1 = host_view(i, 1);
+      auto val2 = host_view(i, 2);
+      if (i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) {
+        // is in subview
+        EXPECT_TRUE(std::fabs((val0 - 4.0) / 4.0) < 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_TRUE(std::fabs((val1 - 2.0) / 2.0) < 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14)
+            << "Data differs at index " << i;
+      } else {
+        // is outside of subview
+        EXPECT_NEAR(val0, NumberType(999999), 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_NEAR(val1, NumberType(999999), 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_NEAR(val2, NumberType(999999), 1e-14)
+            << "Data differs at index " << i;
+      }
     }
   }
 };
@@ -356,6 +417,8 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
 
   using orig_view_type = Kokkos::View<NumberType * [3], Layout, DeviceType>;
 
+  using size_type = typename Kokkos::HostSpace::size_type;
+
   scatter_view_type scatter_view;
   int scatterSize;
 
@@ -368,8 +431,7 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
     auto host_view =
         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
     Kokkos::fence();
-    for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
-         ++i) {
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
       host_view(i, 0) = 0.0;
       host_view(i, 1) = 0.0;
       host_view(i, 2) = 0.0;
@@ -401,14 +463,104 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
     auto host_view =
         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
     Kokkos::fence();
-    for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
-         ++i) {
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
       auto val0 = host_view(i, 0);
       auto val1 = host_view(i, 1);
       auto val2 = host_view(i, 2);
-      EXPECT_TRUE(std::fabs((val0 - 16.0) / 16.0) < 1e-14);
-      EXPECT_TRUE(std::fabs((val1 - 8.0) / 8.0) < 1e-14);
-      EXPECT_TRUE(std::fabs((val2 - 4.0) / 4.0) < 1e-14);
+      EXPECT_TRUE(std::fabs((val0 - 16.0) / 16.0) < 1e-14)
+          << "Data differs at index " << i;
+      EXPECT_TRUE(std::fabs((val1 - 8.0) / 8.0) < 1e-14)
+          << "Data differs at index " << i;
+      EXPECT_TRUE(std::fabs((val2 - 4.0) / 4.0) < 1e-14)
+          << "Data differs at index " << i;
+    }
+  }
+
+  // check for correct padding
+  void validateResultsForSubview(
+      orig_view_type orig, std::pair<size_type, size_type>& subRangeDim0,
+      std::pair<size_type, size_type>& subRangeDim1) {
+    (void)subRangeDim1;
+    auto host_view =
+        Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
+    Kokkos::fence();
+    for (size_type i = 0; i < host_view.extent(0); ++i) {
+      auto val0 = host_view(i, 0);
+      auto val1 = host_view(i, 1);
+      auto val2 = host_view(i, 2);
+      if (i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) {
+        // is in subview
+        EXPECT_TRUE(std::fabs((val0 - 16.0) / 16.0) < 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_TRUE(std::fabs((val1 - 8.0) / 8.0) < 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_TRUE(std::fabs((val2 - 4.0) / 4.0) < 1e-14)
+            << "Data differs at index " << i;
+      } else {
+        // is outside of subview
+        EXPECT_NEAR(val0, NumberType(0), 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_NEAR(val1, NumberType(0), 1e-14)
+            << "Data differs at index " << i;
+        EXPECT_NEAR(val2, NumberType(0), 1e-14)
+            << "Data differs at index " << i;
+      }
+    }
+  }
+};
+
+// Test ScatterView on subview
+template <typename DeviceType, typename Layout, typename Op,
+          typename NumberType>
+struct test_default_scatter_sub_view {
+ public:
+  using default_duplication = Kokkos::Impl::Experimental::DefaultDuplication<
+      typename DeviceType::execution_space>;
+  using Duplication  = typename default_duplication::type;
+  using Contribution = typename Kokkos::Impl::Experimental::DefaultContribution<
+      typename DeviceType::execution_space, Duplication>::type;
+  using scatter_view_def =
+      typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
+                                          Contribution, Op,
+                                          NumberType>::scatter_view_type;
+  using orig_view_def =
+      typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
+                                          Contribution, Op,
+                                          NumberType>::orig_view_type;
+
+  using size_type = typename Kokkos::HostSpace::size_type;
+
+  void run_test(int n) {
+    // Test creation via create_scatter_view overload 1
+    {
+      orig_view_def original_view("original_view", n);
+
+      auto rangeDim0 = std::pair<size_type, size_type>(0 + 1, n - 1);
+      auto rangeDim1 =
+          std::pair<size_type, size_type>(0, original_view.extent(1));
+
+      auto original_sub_view =
+          Kokkos::subview(original_view, rangeDim0, rangeDim1);
+
+      scatter_view_def scatter_view =
+          Kokkos::Experimental::create_scatter_view(Op{}, original_sub_view);
+
+      test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
+                                 Op, NumberType>
+          scatter_view_test_impl(scatter_view);
+      scatter_view_test_impl.initialize(original_view);
+      scatter_view_test_impl.run_parallel(original_sub_view.extent(0));
+
+      Kokkos::Experimental::contribute(original_sub_view, scatter_view);
+      scatter_view.reset_except(original_sub_view);
+
+      scatter_view_test_impl.run_parallel(original_sub_view.extent(0));
+
+      Kokkos::Experimental::contribute(original_sub_view, scatter_view);
+      Kokkos::fence();
+
+      scatter_view_test_impl.validateResultsForSubview(original_view, rangeDim0,
+                                                       rangeDim1);
     }
   }
 };
@@ -674,14 +826,24 @@ void test_scatter_view(int64_t n) {
         test_default_sv;
     test_default_sv.run_test(n);
   }
+
+  // run same test but on a subview (this covers support for padded
+  // ScatterViews)
+  {
+    test_default_scatter_sub_view<DeviceType, Kokkos::LayoutRight, ScatterType,
+                                  NumberType>
+        test_default_scatter_view_subview;
+    test_default_scatter_view_subview.run_test(n);
+  }
+
   TestDuplicatedScatterView<DeviceType, ScatterType, NumberType> duptest(n);
 }
 
 TEST(TEST_CATEGORY, scatterview) {
   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum, double>(
       10);
-  test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum,
-                    unsigned int>(10);
+
+  test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum, int>(10);
   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(10);
   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(10);
   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMax>(10);
@@ -698,10 +860,11 @@ TEST(TEST_CATEGORY, scatterview) {
 #endif
 
 #endif
+
   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum, double>(
       big_n);
-  test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum,
-                    unsigned int>(big_n);
+  test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum, int>(
+      big_n);
   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(big_n);
   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(big_n);
   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMax>(big_n);
@@ -712,8 +875,7 @@ TEST(TEST_CATEGORY, scatterview_devicetype) {
       Kokkos::Device<TEST_EXECSPACE, typename TEST_EXECSPACE::memory_space>;
 
   test_scatter_view<device_type, Kokkos::Experimental::ScatterSum, double>(10);
-  test_scatter_view<device_type, Kokkos::Experimental::ScatterSum,
-                    unsigned int>(10);
+  test_scatter_view<device_type, Kokkos::Experimental::ScatterSum, int>(10);
   test_scatter_view<device_type, Kokkos::Experimental::ScatterProd>(10);
   test_scatter_view<device_type, Kokkos::Experimental::ScatterMin>(10);
   test_scatter_view<device_type, Kokkos::Experimental::ScatterMax>(10);
@@ -734,7 +896,7 @@ TEST(TEST_CATEGORY, scatterview_devicetype) {
     test_scatter_view<device_device_type, Kokkos::Experimental::ScatterSum,
                       double>(10);
     test_scatter_view<device_device_type, Kokkos::Experimental::ScatterSum,
-                      unsigned int>(10);
+                      int>(10);
     test_scatter_view<device_device_type, Kokkos::Experimental::ScatterProd>(
         10);
     test_scatter_view<device_device_type, Kokkos::Experimental::ScatterMin>(10);
@@ -743,8 +905,8 @@ TEST(TEST_CATEGORY, scatterview_devicetype) {
         Kokkos::Device<device_execution_space, host_accessible_space>;
     test_scatter_view<host_device_type, Kokkos::Experimental::ScatterSum,
                       double>(10);
-    test_scatter_view<host_device_type, Kokkos::Experimental::ScatterSum,
-                      unsigned int>(10);
+    test_scatter_view<host_device_type, Kokkos::Experimental::ScatterSum, int>(
+        10);
     test_scatter_view<host_device_type, Kokkos::Experimental::ScatterProd>(10);
     test_scatter_view<host_device_type, Kokkos::Experimental::ScatterMin>(10);
     test_scatter_view<host_device_type, Kokkos::Experimental::ScatterMax>(10);
diff --git a/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp b/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
index c9a3eed90c372fcd4211d0a46868fe8bcc061614..907a2d61d6d07cce48e2a6a6d937d783f09516c7 100644
--- a/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
+++ b/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
@@ -105,8 +105,8 @@ void run_test_graph() {
     auto rowView = hx.rowConst(i);
     ASSERT_EQ(rowView.length, graph[i].size());
     for (size_t j = 0; j < rowView.length; ++j) {
-      ASSERT_EQ(rowView.colidx(j), graph[i][j]);
-      ASSERT_EQ(rowView(j), graph[i][j]);
+      ASSERT_EQ(rowView.colidx(j), (size_t)graph[i][j]);
+      ASSERT_EQ(rowView(j), (size_t)graph[i][j]);
     }
   }
 }
diff --git a/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp
index 8009b996566322147bcd5cfe257dd858b72819bb..1550ca7b5be4ac9514e7488381883eb6a98c37c2 100644
--- a/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp
+++ b/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp
@@ -294,9 +294,7 @@ void test_deep_copy(uint32_t num_nodes) {
   }
 }
 
-// FIXME_SYCL wrong results on Nvidia GPUs but correct on Host and Intel GPUs
-// WORKAROUND MSVC
-#if !defined(_WIN32) && !defined(KOKKOS_ENABLE_SYCL)
+#if !defined(_WIN32)
 TEST(TEST_CATEGORY, UnorderedMap_insert) {
   for (int i = 0; i < 500; ++i) {
     test_insert<TEST_EXECSPACE>(100000, 90000, 100, true);
diff --git a/packages/kokkos/containers/unit_tests/TestVector.hpp b/packages/kokkos/containers/unit_tests/TestVector.hpp
index 33b265e0774aa4eae38fd62e47d8e9b59864572a..57b92c38f81798331ef183facc76a99bbe45e0be 100644
--- a/packages/kokkos/containers/unit_tests/TestVector.hpp
+++ b/packages/kokkos/containers/unit_tests/TestVector.hpp
@@ -60,7 +60,7 @@ struct test_vector_insert {
 
   template <typename Vector>
   void run_test(Vector& a) {
-    int n = a.size();
+    auto n = a.size();
 
     auto it = a.begin();
     if (n > 0) {
@@ -97,7 +97,7 @@ struct test_vector_insert {
 #endif
 
     ASSERT_EQ(a.size(), n + 1 + n + 5);
-    ASSERT_EQ(std::distance(it_return, a.begin() + 17), 0);
+    ASSERT_EQ(std::distance(it_return, a.begin() + 17), 0u);
 
     Vector b;
 
@@ -109,7 +109,7 @@ struct test_vector_insert {
 #else
     b.insert(b.begin(), 7, 9);
 #endif
-    ASSERT_EQ(b.size(), 7);
+    ASSERT_EQ(b.size(), 7u);
     ASSERT_EQ(b[0], scalar_type(9));
 
     it = a.begin();
@@ -121,7 +121,7 @@ struct test_vector_insert {
     it_return = a.insert(it, b.begin(), b.end());
 #endif
     ASSERT_EQ(a.size(), n + 1 + n + 5 + 7);
-    ASSERT_EQ(std::distance(it_return, a.begin() + 27 + n), 0);
+    ASSERT_EQ(std::distance(it_return, a.begin() + 27 + n), 0u);
 
     // Testing insert at end via all three function interfaces
     a.insert(a.end(), 11);
diff --git a/packages/kokkos/containers/unit_tests/TestWithoutInitializing.hpp b/packages/kokkos/containers/unit_tests/TestWithoutInitializing.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..feae32179b2f1dce72e0346146338658f638e2c7
--- /dev/null
+++ b/packages/kokkos/containers/unit_tests/TestWithoutInitializing.hpp
@@ -0,0 +1,183 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+#include <Kokkos_DualView.hpp>
+#include <Kokkos_DynRankView.hpp>
+#include <Kokkos_ScatterView.hpp>
+
+#include <../../core/unit_test/tools/include/ToolTestingUtilities.hpp>
+
+TEST(TEST_CATEGORY, resize_realloc_no_init_dualview) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels());
+  Kokkos::DualView<int*** * [1][2][3][4], TEST_EXECSPACE> bla("bla", 5, 6, 7,
+                                                              8);
+
+  auto success = validate_absence(
+      [&]() {
+        Kokkos::resize(Kokkos::WithoutInitializing, bla, 5, 6, 7, 9);
+        Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 8, 8, 8);
+      },
+      [&](BeginParallelForEvent event) {
+        if (event.descriptor().find("initialization") != std::string::npos)
+          return MatchDiagnostic{true, {"Found begin event"}};
+        return MatchDiagnostic{false};
+      },
+      [&](EndParallelForEvent event) {
+        if (event.descriptor().find("initialization") != std::string::npos)
+          return MatchDiagnostic{true, {"Found end event"}};
+        return MatchDiagnostic{false};
+      });
+  ASSERT_TRUE(success);
+  listen_tool_events(Config::DisableAll());
+}
+
+TEST(TEST_CATEGORY, resize_realloc_no_alloc_dualview) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels(),
+                     Config::EnableAllocs());
+  Kokkos::DualView<int*** * [1][2][3][4], TEST_EXECSPACE> bla("bla", 8, 7, 6,
+                                                              5);
+
+  auto success = validate_absence(
+      [&]() {
+        Kokkos::resize(bla, 8, 7, 6, 5);
+        Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 7, 6, 5);
+      },
+      [&](BeginParallelForEvent) {
+        return MatchDiagnostic{true, {"Found begin event"}};
+      },
+      [&](EndParallelForEvent) {
+        return MatchDiagnostic{true, {"Found end event"}};
+      },
+      [&](AllocateDataEvent) {
+        return MatchDiagnostic{true, {"Found alloc event"}};
+      },
+      [&](DeallocateDataEvent) {
+        return MatchDiagnostic{true, {"Found dealloc event"}};
+      });
+  ASSERT_TRUE(success);
+  listen_tool_events(Config::DisableAll());
+}
+
+TEST(TEST_CATEGORY, resize_realloc_no_init_dynrankview) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels());
+  Kokkos::DynRankView<int, TEST_EXECSPACE> bla("bla", 5, 6, 7, 8);
+
+  auto success = validate_absence(
+      [&]() {
+        Kokkos::resize(Kokkos::WithoutInitializing, bla, 5, 6, 7, 9);
+        Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 8, 8, 8);
+      },
+      [&](BeginParallelForEvent event) {
+        if (event.descriptor().find("initialization") != std::string::npos)
+          return MatchDiagnostic{true, {"Found begin event"}};
+        return MatchDiagnostic{false};
+      },
+      [&](EndParallelForEvent event) {
+        if (event.descriptor().find("initialization") != std::string::npos)
+          return MatchDiagnostic{true, {"Found end event"}};
+        return MatchDiagnostic{false};
+      });
+  ASSERT_TRUE(success);
+  listen_tool_events(Config::DisableAll());
+}
+
+TEST(TEST_CATEGORY, resize_realloc_no_init_scatterview) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels());
+  Kokkos::Experimental::ScatterView<
+      int*** * [1][2][3], typename TEST_EXECSPACE::array_layout, TEST_EXECSPACE>
+      bla("bla", 4, 5, 6, 7);
+
+  auto success = validate_absence(
+      [&]() {
+        Kokkos::resize(Kokkos::WithoutInitializing, bla, 4, 5, 6, 8);
+        Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 8, 8, 8);
+      },
+      [&](BeginParallelForEvent event) {
+        if (event.descriptor().find("initialization") != std::string::npos)
+          return MatchDiagnostic{true, {"Found begin event"}};
+        return MatchDiagnostic{false};
+      },
+      [&](EndParallelForEvent event) {
+        if (event.descriptor().find("initialization") != std::string::npos)
+          return MatchDiagnostic{true, {"Found end event"}};
+        return MatchDiagnostic{false};
+      });
+  ASSERT_TRUE(success);
+  listen_tool_events(Config::DisableAll());
+}
+
+TEST(TEST_CATEGORY, resize_realloc_no_alloc_scatterview) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels(),
+                     Config::EnableAllocs());
+  Kokkos::Experimental::ScatterView<
+      int*** * [1][2][3], typename TEST_EXECSPACE::array_layout, TEST_EXECSPACE>
+      bla("bla", 7, 6, 5, 4);
+
+  auto success = validate_absence(
+      [&]() {
+        Kokkos::resize(bla, 7, 6, 5, 4);
+        Kokkos::realloc(Kokkos::WithoutInitializing, bla, 7, 6, 5, 4);
+      },
+      [&](BeginParallelForEvent) {
+        return MatchDiagnostic{true, {"Found begin event"}};
+      },
+      [&](EndParallelForEvent) {
+        return MatchDiagnostic{true, {"Found end event"}};
+      },
+      [&](AllocateDataEvent) {
+        return MatchDiagnostic{true, {"Found alloc event"}};
+      },
+      [&](DeallocateDataEvent) {
+        return MatchDiagnostic{true, {"Found dealloc event"}};
+      });
+  ASSERT_TRUE(success);
+  listen_tool_events(Config::DisableAll());
+}
diff --git a/packages/kokkos/core/CMakeLists.txt b/packages/kokkos/core/CMakeLists.txt
index 68d3f83319995037aaa9528a93ee30c024c3ac39..bb60c368f7ce7e6bd935dcd277426571fd863d6f 100644
--- a/packages/kokkos/core/CMakeLists.txt
+++ b/packages/kokkos/core/CMakeLists.txt
@@ -10,5 +10,3 @@ KOKKOS_ADD_TEST_DIRECTORIES(unit_test)
 KOKKOS_ADD_TEST_DIRECTORIES(perf_test)
 
 KOKKOS_SUBPACKAGE_POSTPROCESS()
-
-
diff --git a/packages/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp b/packages/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp
index 5b7c2a7a03907f8f0c854482c06ef155441c097d..b1a7bf1e8376d3f4402820207387231ecb9252a8 100644
--- a/packages/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp
+++ b/packages/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp
@@ -5,37 +5,49 @@
 namespace Test {
 
 namespace {
-template <class ExecSpace>
-struct SpaceInstance {
-  static ExecSpace create() { return ExecSpace(); }
-  static void destroy(ExecSpace&) {}
-  static bool overlap() { return false; }
-};
+
+template <class ExecutionSpace>
+bool is_overlapping(const ExecutionSpace&) {
+  return false;
+}
 
 #ifndef KOKKOS_ENABLE_DEBUG
 #ifdef KOKKOS_ENABLE_CUDA
 template <>
-struct SpaceInstance<Kokkos::Cuda> {
-  static Kokkos::Cuda create() {
-    cudaStream_t stream;
-    cudaStreamCreate(&stream);
-    return Kokkos::Cuda(stream);
+bool is_overlapping<Kokkos::Cuda>(const Kokkos::Cuda&) {
+  bool value          = true;
+  auto local_rank_str = std::getenv("CUDA_LAUNCH_BLOCKING");
+  if (local_rank_str) {
+    value = (std::stoi(local_rank_str) == 0);
   }
-  static void destroy(Kokkos::Cuda& space) {
-    cudaStream_t stream = space.cuda_stream();
-    cudaStreamDestroy(stream);
-  }
-  static bool overlap() {
-    bool value          = true;
-    auto local_rank_str = std::getenv("CUDA_LAUNCH_BLOCKING");
-    if (local_rank_str) {
-      value = (std::stoi(local_rank_str) == 0);
-    }
-    return value;
-  }
-};
+  return value;
+}
 #endif
+
+#ifdef KOKKOS_ENABLE_HIP
+template <>
+bool is_overlapping<Kokkos::Experimental::HIP>(
+    const Kokkos::Experimental::HIP&) {
+  // FIXME_HIP This doesn't pass yet in CI.
+  return false;
+  // bool value          = true;
+  // auto local_rank_str = std::getenv("HIP_LAUNCH_BLOCKING");
+  // if (local_rank_str) {
+  //  value = (std::stoi(local_rank_str) == 0);
+  //}
+  // return value;
+}
+#endif
+
+#ifdef KOKKOS_ENABLE_SYCL
+template <>
+bool is_overlapping<Kokkos::Experimental::SYCL>(
+    const Kokkos::Experimental::SYCL&) {
+  return true;
+}
 #endif
+#endif
+
 }  // namespace
 
 struct FunctorRange {
@@ -133,8 +145,10 @@ TEST(default_exec, overlap_range_policy) {
   int R = 10;
 
   TEST_EXECSPACE space;
-  TEST_EXECSPACE space1 = SpaceInstance<TEST_EXECSPACE>::create();
-  TEST_EXECSPACE space2 = SpaceInstance<TEST_EXECSPACE>::create();
+  std::vector<TEST_EXECSPACE> execution_space_instances =
+      Kokkos::Experimental::partition_space(space, 1, 1);
+  TEST_EXECSPACE space1 = execution_space_instances[0];
+  TEST_EXECSPACE space2 = execution_space_instances[1];
 
   Kokkos::View<double**, TEST_EXECSPACE> a("A", N, M);
   FunctorRange f(M, R, a);
@@ -204,7 +218,7 @@ TEST(default_exec, overlap_range_policy) {
   Kokkos::fence();
   double time_end = timer.seconds();
 
-  if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
+  if (is_overlapping(space)) {
     ASSERT_GT(time_end, 1.5 * time_overlap);
   }
   printf("Time RangePolicy: NonOverlap: %lf Time Overlap: %lf\n", time_end,
@@ -237,7 +251,7 @@ TEST(default_exec, overlap_range_policy) {
       fr, result);
   double time_not_fenced = timer.seconds();
   Kokkos::fence();
-  if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
+  if (is_overlapping(space)) {
     ASSERT_GT(time_fenced, 2.0 * time_not_fenced);
   }
 
@@ -279,13 +293,11 @@ TEST(default_exec, overlap_range_policy) {
   ASSERT_EQ(h_result1(), h_result());
   ASSERT_EQ(h_result2(), h_result());
 
-  if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
+  if (is_overlapping(space)) {
     ASSERT_LT(time_overlapped_reduce, 1.5 * time_no_overlapped_reduce);
   }
   printf("Time RangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",
          time_no_overlapped_reduce, time_overlapped_reduce);
-  SpaceInstance<TEST_EXECSPACE>::destroy(space1);
-  SpaceInstance<TEST_EXECSPACE>::destroy(space2);
 }
 
 TEST(default_exec, overlap_mdrange_policy) {
@@ -294,8 +306,10 @@ TEST(default_exec, overlap_mdrange_policy) {
   int R = 10;
 
   TEST_EXECSPACE space;
-  TEST_EXECSPACE space1 = SpaceInstance<TEST_EXECSPACE>::create();
-  TEST_EXECSPACE space2 = SpaceInstance<TEST_EXECSPACE>::create();
+  std::vector<TEST_EXECSPACE> execution_space_instances =
+      Kokkos::Experimental::partition_space(space, 1, 1);
+  TEST_EXECSPACE space1 = execution_space_instances[0];
+  TEST_EXECSPACE space2 = execution_space_instances[1];
 
   Kokkos::View<double**, TEST_EXECSPACE> a("A", N, M);
   FunctorMDRange f(M, R, a);
@@ -377,7 +391,7 @@ TEST(default_exec, overlap_mdrange_policy) {
   Kokkos::fence();
   double time_end = timer.seconds();
 
-  if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
+  if (is_overlapping(space)) {
     ASSERT_GT(time_end, 1.5 * time_overlap);
   }
   printf("Time MDRangePolicy: NonOverlap: %lf Time Overlap: %lf\n", time_end,
@@ -412,7 +426,7 @@ TEST(default_exec, overlap_mdrange_policy) {
       fr, result);
   double time_not_fenced = timer.seconds();
   Kokkos::fence();
-  if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
+  if (is_overlapping(space)) {
     ASSERT_GT(time_fenced, 2.0 * time_not_fenced);
   }
 
@@ -458,13 +472,11 @@ TEST(default_exec, overlap_mdrange_policy) {
   ASSERT_EQ(h_result1(), h_result());
   ASSERT_EQ(h_result2(), h_result());
 
-  if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
+  if (is_overlapping(space)) {
     ASSERT_LT(time_overlapped_reduce, 1.5 * time_no_overlapped_reduce);
   }
   printf("Time MDRangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",
          time_no_overlapped_reduce, time_overlapped_reduce);
-  SpaceInstance<TEST_EXECSPACE>::destroy(space2);
-  SpaceInstance<TEST_EXECSPACE>::destroy(space1);
 }
 
 TEST(default_exec, overlap_team_policy) {
@@ -473,8 +485,10 @@ TEST(default_exec, overlap_team_policy) {
   int R = 10;
 
   TEST_EXECSPACE space;
-  TEST_EXECSPACE space1 = SpaceInstance<TEST_EXECSPACE>::create();
-  TEST_EXECSPACE space2 = SpaceInstance<TEST_EXECSPACE>::create();
+  std::vector<TEST_EXECSPACE> execution_space_instances =
+      Kokkos::Experimental::partition_space(space, 1, 1);
+  TEST_EXECSPACE space1 = execution_space_instances[0];
+  TEST_EXECSPACE space2 = execution_space_instances[1];
 
   Kokkos::View<double**, Kokkos::LayoutRight, TEST_EXECSPACE> a("A", N, M);
   FunctorTeam f(M, R, a);
@@ -547,7 +561,7 @@ TEST(default_exec, overlap_team_policy) {
   Kokkos::fence();
   double time_end = timer.seconds();
 
-  if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
+  if (is_overlapping(space)) {
     ASSERT_GT(time_end, 1.5 * time_overlap);
   }
   printf("Time TeamPolicy: NonOverlap: %lf Time Overlap: %lf\n", time_end,
@@ -580,7 +594,7 @@ TEST(default_exec, overlap_team_policy) {
       fr, result);
   double time_not_fenced = timer.seconds();
   Kokkos::fence();
-  if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
+  if (is_overlapping(space)) {
     ASSERT_GT(time_fenced, 2.0 * time_not_fenced);
   }
   timer.reset();
@@ -621,12 +635,10 @@ TEST(default_exec, overlap_team_policy) {
   ASSERT_EQ(h_result1(), h_result());
   ASSERT_EQ(h_result2(), h_result());
 
-  if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
+  if (is_overlapping(space)) {
     ASSERT_LT(time_overlapped_reduce, 1.5 * time_no_overlapped_reduce);
   }
   printf("Time TeamPolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",
          time_no_overlapped_reduce, time_overlapped_reduce);
-  SpaceInstance<TEST_EXECSPACE>::destroy(space1);
-  SpaceInstance<TEST_EXECSPACE>::destroy(space2);
 }
 }  // namespace Test
diff --git a/packages/kokkos/core/src/CMakeLists.txt b/packages/kokkos/core/src/CMakeLists.txt
index 499736c60d55b7746682f8828a9af45fc6c0aa8b..88cca93f3cda0939bfa6effaf6f25d971cef9cbc 100644
--- a/packages/kokkos/core/src/CMakeLists.txt
+++ b/packages/kokkos/core/src/CMakeLists.txt
@@ -8,7 +8,7 @@ KOKKOS_INCLUDE_DIRECTORIES(
 INSTALL (DIRECTORY
   "${CMAKE_CURRENT_SOURCE_DIR}/"
   DESTINATION ${KOKKOS_HEADER_DIR}
-  FILES_MATCHING
+  PATTERN desul/src EXCLUDE
   PATTERN "*.inc"
   PATTERN "*.inc_*"
   PATTERN "*.hpp"
@@ -36,7 +36,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
   APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/OpenMPTarget/*.hpp)
 ENDIF()
 
-IF (KOKKOS_ENABLE_PTHREAD)
+IF (KOKKOS_ENABLE_THREADS)
   APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Threads/*.cpp)
   APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/Threads/*.hpp)
 ENDIF()
@@ -91,11 +91,16 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscore
 
 KOKKOS_LINK_TPL(kokkoscore PUBLIC HWLOC)
 KOKKOS_LINK_TPL(kokkoscore PUBLIC MEMKIND)
-KOKKOS_LINK_TPL(kokkoscore PUBLIC CUDA)
+IF (NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
+  KOKKOS_LINK_TPL(kokkoscore PUBLIC CUDA)
+ENDIF()
 KOKKOS_LINK_TPL(kokkoscore PUBLIC HPX)
 KOKKOS_LINK_TPL(kokkoscore PUBLIC LIBDL)
 KOKKOS_LINK_TPL(kokkoscore PUBLIC LIBRT)
-KOKKOS_LINK_TPL(kokkoscore PUBLIC PTHREAD)
+# On *nix-like systems (Linux, macOS) we need pthread for C++ std::thread
+IF (NOT WIN32)
+  KOKKOS_LINK_TPL(kokkoscore PUBLIC THREADS)
+ENDIF()
 KOKKOS_LINK_TPL(kokkoscore PUBLIC ROCM)
 
 # FIXME: We need a proper solution to figure out whether to enable
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
index f6b276240316ec1e6edc332d680eb72853c980b1..31601944ba5a03979aecc9431919e5cbd819ba4b 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
@@ -487,24 +487,15 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::attach_texture_object(
 // <editor-fold desc="SharedAllocationRecord destructors"> {{{1
 
 SharedAllocationRecord<Kokkos::CudaSpace, void>::~SharedAllocationRecord() {
-  const char *label = nullptr;
-  if (Kokkos::Profiling::profileLibraryLoaded()) {
-    SharedAllocationHeader header;
-    Kokkos::Impl::DeepCopy<Kokkos::CudaSpace, HostSpace>(
-        &header, RecordBase::m_alloc_ptr, sizeof(SharedAllocationHeader));
-    label = header.label();
-  }
   auto alloc_size = SharedAllocationRecord<void, void>::m_alloc_size;
-  m_space.deallocate(label, SharedAllocationRecord<void, void>::m_alloc_ptr,
+  m_space.deallocate(m_label.c_str(),
+                     SharedAllocationRecord<void, void>::m_alloc_ptr,
                      alloc_size, (alloc_size - sizeof(SharedAllocationHeader)));
 }
 
 SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::~SharedAllocationRecord() {
-  const char *label = nullptr;
-  if (Kokkos::Profiling::profileLibraryLoaded()) {
-    label = RecordBase::m_alloc_ptr->m_label;
-  }
-  m_space.deallocate(label, SharedAllocationRecord<void, void>::m_alloc_ptr,
+  m_space.deallocate(m_label.c_str(),
+                     SharedAllocationRecord<void, void>::m_alloc_ptr,
                      SharedAllocationRecord<void, void>::m_alloc_size,
                      (SharedAllocationRecord<void, void>::m_alloc_size -
                       sizeof(SharedAllocationHeader)));
@@ -512,7 +503,7 @@ SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::~SharedAllocationRecord() {
 
 SharedAllocationRecord<Kokkos::CudaHostPinnedSpace,
                        void>::~SharedAllocationRecord() {
-  m_space.deallocate(RecordBase::m_alloc_ptr->m_label,
+  m_space.deallocate(m_label.c_str(),
                      SharedAllocationRecord<void, void>::m_alloc_ptr,
                      SharedAllocationRecord<void, void>::m_alloc_size,
                      (SharedAllocationRecord<void, void>::m_alloc_size -
@@ -537,7 +528,8 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::SharedAllocationRecord(
 #endif
           Impl::checked_allocation_with_header(arg_space, arg_label,
                                                arg_alloc_size),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_tex_obj(0),
       m_space(arg_space) {
 
@@ -546,8 +538,13 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::SharedAllocationRecord(
   this->base_t::_fill_host_accessible_header_info(header, arg_label);
 
   // Copy to device memory
-  Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(RecordBase::m_alloc_ptr, &header,
-                                               sizeof(SharedAllocationHeader));
+  Kokkos::Cuda exec;
+  Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(
+      exec, RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
+  exec.fence(
+      "SharedAllocationRecord<Kokkos::CudaSpace, "
+      "void>::SharedAllocationRecord(): fence after copying header from "
+      "HostSpace");
 }
 
 SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::SharedAllocationRecord(
@@ -562,7 +559,8 @@ SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::SharedAllocationRecord(
 #endif
           Impl::checked_allocation_with_header(arg_space, arg_label,
                                                arg_alloc_size),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_tex_obj(0),
       m_space(arg_space) {
   this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
@@ -583,7 +581,8 @@ SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::
 #endif
           Impl::checked_allocation_with_header(arg_space, arg_label,
                                                arg_alloc_size),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_space(arg_space) {
   this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
                                                   arg_label);
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half.hpp
deleted file mode 100644
index c81286eb1004b10219b64f38563bc3e8af257ae9..0000000000000000000000000000000000000000
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half.hpp
+++ /dev/null
@@ -1,1012 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 3.0
-//       Copyright (2020) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
-//
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_CUDA_HALF_HPP_
-#define KOKKOS_CUDA_HALF_HPP_
-
-#include <Kokkos_Macros.hpp>
-#ifdef KOKKOS_ENABLE_CUDA
-#if !(defined(KOKKOS_COMPILER_CLANG) && KOKKOS_COMPILER_CLANG < 900) && \
-    !(defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL50) ||  \
-      defined(KOKKOS_ARCH_MAXWELL52))
-#include <cuda_fp16.h>
-#include <iosfwd>  // istream & ostream for extraction and insertion ops
-#include <string>
-#include <Kokkos_NumericTraits.hpp>  // reduction_identity
-
-#ifndef KOKKOS_IMPL_HALF_TYPE_DEFINED
-// Make sure no one else tries to define half_t
-#define KOKKOS_IMPL_HALF_TYPE_DEFINED
-
-namespace Kokkos {
-namespace Impl {
-struct half_impl_t {
-  using type = __half;
-};
-}  // namespace Impl
-namespace Experimental {
-
-// Forward declarations
-class half_t;
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(float val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(bool val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(double val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(short val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(int val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(long val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(long long val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned short val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned int val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned long val);
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned long long val);
-
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
-    cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, bool>::value, T>
-    cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
-    cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
-    cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
-    cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
-    cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
-    cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
-        cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned int>::value, T>
-    cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
-        cast_from_half(half_t);
-template <class T>
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
-        cast_from_half(half_t);
-
-class alignas(2) half_t {
- public:
-  using impl_type = Kokkos::Impl::half_impl_t::type;
-
- private:
-  impl_type val;
-
- public:
-  KOKKOS_FUNCTION
-  half_t() : val(0.0F) {}
-
-  // Copy constructors
-  KOKKOS_DEFAULTED_FUNCTION
-  half_t(const half_t&) noexcept = default;
-
-  KOKKOS_INLINE_FUNCTION
-  half_t(const volatile half_t& rhs) {
-#ifdef __CUDA_ARCH__
-    val = rhs.val;
-#else
-    const volatile uint16_t* rv_ptr =
-        reinterpret_cast<const volatile uint16_t*>(&rhs.val);
-    const uint16_t rv_val = *rv_ptr;
-    val                   = reinterpret_cast<const impl_type&>(rv_val);
-#endif  // __CUDA_ARCH__
-  }
-
-  // Don't support implicit conversion back to impl_type.
-  // impl_type is a storage only type on host.
-  KOKKOS_FUNCTION
-  explicit operator impl_type() const { return val; }
-  KOKKOS_FUNCTION
-  explicit operator float() const { return cast_from_half<float>(*this); }
-  KOKKOS_FUNCTION
-  explicit operator bool() const { return cast_from_half<bool>(*this); }
-  KOKKOS_FUNCTION
-  explicit operator double() const { return cast_from_half<double>(*this); }
-  KOKKOS_FUNCTION
-  explicit operator short() const { return cast_from_half<short>(*this); }
-  KOKKOS_FUNCTION
-  explicit operator int() const { return cast_from_half<int>(*this); }
-  KOKKOS_FUNCTION
-  explicit operator long() const { return cast_from_half<long>(*this); }
-  KOKKOS_FUNCTION
-  explicit operator long long() const {
-    return cast_from_half<long long>(*this);
-  }
-  KOKKOS_FUNCTION
-  explicit operator unsigned short() const {
-    return cast_from_half<unsigned short>(*this);
-  }
-  KOKKOS_FUNCTION
-  explicit operator unsigned int() const {
-    return cast_from_half<unsigned int>(*this);
-  }
-  KOKKOS_FUNCTION
-  explicit operator unsigned long() const {
-    return cast_from_half<unsigned long>(*this);
-  }
-  KOKKOS_FUNCTION
-  explicit operator unsigned long long() const {
-    return cast_from_half<unsigned long long>(*this);
-  }
-
-  /**
-   * Conversion constructors.
-   *
-   * Support implicit conversions from impl_type, float, double -> half_t
-   * Mixed precision expressions require upcasting which is done in the
-   * "// Binary Arithmetic" operator overloads below.
-   *
-   * Support implicit conversions from integral types -> half_t.
-   * Expressions involving half_t with integral types require downcasting
-   * the integral types to half_t. Existing operator overloads can handle this
-   * with the addition of the below implicit conversion constructors.
-   */
-  KOKKOS_FUNCTION
-  half_t(impl_type rhs) : val(rhs) {}
-  KOKKOS_FUNCTION
-  half_t(float rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  half_t(double rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  explicit half_t(bool rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  half_t(short rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  half_t(int rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  half_t(long rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  half_t(long long rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  half_t(unsigned short rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  half_t(unsigned int rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  half_t(unsigned long rhs) : val(cast_to_half(rhs).val) {}
-  KOKKOS_FUNCTION
-  half_t(unsigned long long rhs) : val(cast_to_half(rhs).val) {}
-
-  // Unary operators
-  KOKKOS_FUNCTION
-  half_t operator+() const {
-    half_t tmp = *this;
-#ifdef __CUDA_ARCH__
-    tmp.val = +tmp.val;
-#else
-    tmp.val               = __float2half(+__half2float(tmp.val));
-#endif
-    return tmp;
-  }
-
-  KOKKOS_FUNCTION
-  half_t operator-() const {
-    half_t tmp = *this;
-#ifdef __CUDA_ARCH__
-    tmp.val = -tmp.val;
-#else
-    tmp.val               = __float2half(-__half2float(tmp.val));
-#endif
-    return tmp;
-  }
-
-  // Prefix operators
-  KOKKOS_FUNCTION
-  half_t& operator++() {
-#ifdef __CUDA_ARCH__
-    ++val;
-#else
-    float tmp             = __half2float(val);
-    ++tmp;
-    val       = __float2half(tmp);
-#endif
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator--() {
-#ifdef __CUDA_ARCH__
-    --val;
-#else
-    float tmp = __half2float(val);
-    --tmp;
-    val     = __float2half(tmp);
-#endif
-    return *this;
-  }
-
-  // Postfix operators
-  KOKKOS_FUNCTION
-  half_t operator++(int) {
-    half_t tmp = *this;
-    operator++();
-    return tmp;
-  }
-
-  KOKKOS_FUNCTION
-  half_t operator--(int) {
-    half_t tmp = *this;
-    operator--();
-    return tmp;
-  }
-
-  // Binary operators
-  KOKKOS_FUNCTION
-  half_t& operator=(impl_type rhs) {
-    val = rhs;
-    return *this;
-  }
-
-  template <class T>
-  KOKKOS_FUNCTION half_t& operator=(T rhs) {
-    val = cast_to_half(rhs).val;
-    return *this;
-  }
-
-  template <class T>
-  KOKKOS_FUNCTION void operator=(T rhs) volatile {
-    impl_type new_val = cast_to_half(rhs).val;
-    volatile uint16_t* val_ptr =
-        reinterpret_cast<volatile uint16_t*>(const_cast<impl_type*>(&val));
-    *val_ptr = reinterpret_cast<uint16_t&>(new_val);
-  }
-
-  // Compound operators
-  KOKKOS_FUNCTION
-  half_t& operator+=(half_t rhs) {
-#ifdef __CUDA_ARCH__
-    val += rhs.val;
-#else
-    val     = __float2half(__half2float(val) + __half2float(rhs.val));
-#endif
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  void operator+=(const volatile half_t& rhs) volatile {
-    half_t tmp_rhs = rhs;
-    half_t tmp_lhs = *this;
-
-    tmp_lhs += tmp_rhs;
-    *this = tmp_lhs;
-  }
-
-  // Compound operators: upcast overloads for +=
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator+=(T& lhs, half_t rhs) {
-    lhs += static_cast<T>(rhs);
-    return lhs;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator+=(float rhs) {
-    float result = static_cast<float>(val) + rhs;
-    val          = static_cast<impl_type>(result);
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator+=(double rhs) {
-    double result = static_cast<double>(val) + rhs;
-    val           = static_cast<impl_type>(result);
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator-=(half_t rhs) {
-#ifdef __CUDA_ARCH__
-    val -= rhs.val;
-#else
-    val     = __float2half(__half2float(val) - __half2float(rhs.val));
-#endif
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  void operator-=(const volatile half_t& rhs) volatile {
-    half_t tmp_rhs = rhs;
-    half_t tmp_lhs = *this;
-
-    tmp_lhs -= tmp_rhs;
-    *this = tmp_lhs;
-  }
-
-  // Compund operators: upcast overloads for -=
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator-=(T& lhs, half_t rhs) {
-    lhs -= static_cast<T>(rhs);
-    return lhs;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator-=(float rhs) {
-    float result = static_cast<float>(val) - rhs;
-    val          = static_cast<impl_type>(result);
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator-=(double rhs) {
-    double result = static_cast<double>(val) - rhs;
-    val           = static_cast<impl_type>(result);
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator*=(half_t rhs) {
-#ifdef __CUDA_ARCH__
-    val *= rhs.val;
-#else
-    val     = __float2half(__half2float(val) * __half2float(rhs.val));
-#endif
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  void operator*=(const volatile half_t& rhs) volatile {
-    half_t tmp_rhs = rhs;
-    half_t tmp_lhs = *this;
-
-    tmp_lhs *= tmp_rhs;
-    *this = tmp_lhs;
-  }
-
-  // Compund operators: upcast overloads for *=
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator*=(T& lhs, half_t rhs) {
-    lhs *= static_cast<T>(rhs);
-    return lhs;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator*=(float rhs) {
-    float result = static_cast<float>(val) * rhs;
-    val          = static_cast<impl_type>(result);
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator*=(double rhs) {
-    double result = static_cast<double>(val) * rhs;
-    val           = static_cast<impl_type>(result);
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator/=(half_t rhs) {
-#ifdef __CUDA_ARCH__
-    val /= rhs.val;
-#else
-    val     = __float2half(__half2float(val) / __half2float(rhs.val));
-#endif
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  void operator/=(const volatile half_t& rhs) volatile {
-    half_t tmp_rhs = rhs;
-    half_t tmp_lhs = *this;
-
-    tmp_lhs /= tmp_rhs;
-    *this = tmp_lhs;
-  }
-
-  // Compund operators: upcast overloads for /=
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator/=(T& lhs, half_t rhs) {
-    lhs /= static_cast<T>(rhs);
-    return lhs;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator/=(float rhs) {
-    float result = static_cast<float>(val) / rhs;
-    val          = static_cast<impl_type>(result);
-    return *this;
-  }
-
-  KOKKOS_FUNCTION
-  half_t& operator/=(double rhs) {
-    double result = static_cast<double>(val) / rhs;
-    val           = static_cast<impl_type>(result);
-    return *this;
-  }
-
-  // Binary Arithmetic
-  KOKKOS_FUNCTION
-  half_t friend operator+(half_t lhs, half_t rhs) {
-#ifdef __CUDA_ARCH__
-    lhs.val += rhs.val;
-#else
-    lhs.val = __float2half(__half2float(lhs.val) + __half2float(rhs.val));
-#endif
-    return lhs;
-  }
-
-  // Binary Arithmetic upcast operators for +
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator+(half_t lhs, T rhs) {
-    return T(lhs) + rhs;
-  }
-
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator+(T lhs, half_t rhs) {
-    return lhs + T(rhs);
-  }
-
-  KOKKOS_FUNCTION
-  half_t friend operator-(half_t lhs, half_t rhs) {
-#ifdef __CUDA_ARCH__
-    lhs.val -= rhs.val;
-#else
-    lhs.val = __float2half(__half2float(lhs.val) - __half2float(rhs.val));
-#endif
-    return lhs;
-  }
-
-  // Binary Arithmetic upcast operators for -
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator-(half_t lhs, T rhs) {
-    return T(lhs) - rhs;
-  }
-
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator-(T lhs, half_t rhs) {
-    return lhs - T(rhs);
-  }
-
-  KOKKOS_FUNCTION
-  half_t friend operator*(half_t lhs, half_t rhs) {
-#ifdef __CUDA_ARCH__
-    lhs.val *= rhs.val;
-#else
-    lhs.val = __float2half(__half2float(lhs.val) * __half2float(rhs.val));
-#endif
-    return lhs;
-  }
-
-  // Binary Arithmetic upcast operators for *
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator*(half_t lhs, T rhs) {
-    return T(lhs) * rhs;
-  }
-
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator*(T lhs, half_t rhs) {
-    return lhs * T(rhs);
-  }
-
-  KOKKOS_FUNCTION
-  half_t friend operator/(half_t lhs, half_t rhs) {
-#ifdef __CUDA_ARCH__
-    lhs.val /= rhs.val;
-#else
-    lhs.val = __float2half(__half2float(lhs.val) / __half2float(rhs.val));
-#endif
-    return lhs;
-  }
-
-  // Binary Arithmetic upcast operators for /
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator/(half_t lhs, T rhs) {
-    return T(lhs) / rhs;
-  }
-
-  template <class T>
-  KOKKOS_FUNCTION std::enable_if_t<
-      std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend
-  operator/(T lhs, half_t rhs) {
-    return lhs / T(rhs);
-  }
-
-  // Logical operators
-  KOKKOS_FUNCTION
-  bool operator!() const {
-#ifdef __CUDA_ARCH__
-    return static_cast<bool>(!val);
-#else
-    return !__half2float(val);
-#endif
-  }
-
-  // NOTE: Loses short-circuit evaluation
-  KOKKOS_FUNCTION
-  bool operator&&(half_t rhs) const {
-#ifdef __CUDA_ARCH__
-    return static_cast<bool>(val && rhs.val);
-#else
-    return __half2float(val) && __half2float(rhs.val);
-#endif
-  }
-
-  // NOTE: Loses short-circuit evaluation
-  KOKKOS_FUNCTION
-  bool operator||(half_t rhs) const {
-#ifdef __CUDA_ARCH__
-    return static_cast<bool>(val || rhs.val);
-#else
-    return __half2float(val) || __half2float(rhs.val);
-#endif
-  }
-
-  // Comparison operators
-  KOKKOS_FUNCTION
-  bool operator==(half_t rhs) const {
-#ifdef __CUDA_ARCH__
-    return static_cast<bool>(val == rhs.val);
-#else
-    return __half2float(val) == __half2float(rhs.val);
-#endif
-  }
-
-  KOKKOS_FUNCTION
-  bool operator!=(half_t rhs) const {
-#ifdef __CUDA_ARCH__
-    return static_cast<bool>(val != rhs.val);
-#else
-    return __half2float(val) != __half2float(rhs.val);
-#endif
-  }
-
-  KOKKOS_FUNCTION
-  bool operator<(half_t rhs) const {
-#ifdef __CUDA_ARCH__
-    return static_cast<bool>(val < rhs.val);
-#else
-    return __half2float(val) < __half2float(rhs.val);
-#endif
-  }
-
-  KOKKOS_FUNCTION
-  bool operator>(half_t rhs) const {
-#ifdef __CUDA_ARCH__
-    return static_cast<bool>(val > rhs.val);
-#else
-    return __half2float(val) > __half2float(rhs.val);
-#endif
-  }
-
-  KOKKOS_FUNCTION
-  bool operator<=(half_t rhs) const {
-#ifdef __CUDA_ARCH__
-    return static_cast<bool>(val <= rhs.val);
-#else
-    return __half2float(val) <= __half2float(rhs.val);
-#endif
-  }
-
-  KOKKOS_FUNCTION
-  bool operator>=(half_t rhs) const {
-#ifdef __CUDA_ARCH__
-    return static_cast<bool>(val >= rhs.val);
-#else
-    return __half2float(val) >= __half2float(rhs.val);
-#endif
-  }
-
-  KOKKOS_FUNCTION
-  friend bool operator==(const volatile half_t& lhs,
-                         const volatile half_t& rhs) {
-    half_t tmp_lhs = lhs, tmp_rhs = rhs;
-    return tmp_lhs == tmp_rhs;
-  }
-
-  KOKKOS_FUNCTION
-  friend bool operator!=(const volatile half_t& lhs,
-                         const volatile half_t& rhs) {
-    half_t tmp_lhs = lhs, tmp_rhs = rhs;
-    return tmp_lhs != tmp_rhs;
-  }
-
-  KOKKOS_FUNCTION
-  friend bool operator<(const volatile half_t& lhs,
-                        const volatile half_t& rhs) {
-    half_t tmp_lhs = lhs, tmp_rhs = rhs;
-    return tmp_lhs < tmp_rhs;
-  }
-
-  KOKKOS_FUNCTION
-  friend bool operator>(const volatile half_t& lhs,
-                        const volatile half_t& rhs) {
-    half_t tmp_lhs = lhs, tmp_rhs = rhs;
-    return tmp_lhs > tmp_rhs;
-  }
-
-  KOKKOS_FUNCTION
-  friend bool operator<=(const volatile half_t& lhs,
-                         const volatile half_t& rhs) {
-    half_t tmp_lhs = lhs, tmp_rhs = rhs;
-    return tmp_lhs <= tmp_rhs;
-  }
-
-  KOKKOS_FUNCTION
-  friend bool operator>=(const volatile half_t& lhs,
-                         const volatile half_t& rhs) {
-    half_t tmp_lhs = lhs, tmp_rhs = rhs;
-    return tmp_lhs >= tmp_rhs;
-  }
-
-  // Insertion and extraction operators
-  friend std::ostream& operator<<(std::ostream& os, const half_t& x) {
-    const std::string out = std::to_string(static_cast<double>(x));
-    os << out;
-    return os;
-  }
-
-  friend std::istream& operator>>(std::istream& is, half_t& x) {
-    std::string in;
-    is >> in;
-    x = std::stod(in);
-    return is;
-  }
-};
-
-// CUDA before 11.1 only has the half <-> float conversions marked host device
-// So we will largely convert to float on the host for conversion
-// But still call the correct functions on the device
-#if (CUDA_VERSION < 11100)
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(half_t val) { return val; }
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(float val) { return half_t(__float2half(val)); }
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(bool val) { return cast_to_half(static_cast<float>(val)); }
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(double val) {
-  // double2half was only introduced in CUDA 11 too
-  return half_t(__float2half(static_cast<float>(val)));
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(short val) {
-#ifdef __CUDA_ARCH__
-  return half_t(__short2half_rn(val));
-#else
-  return half_t(__float2half(static_cast<float>(val)));
-#endif
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned short val) {
-#ifdef __CUDA_ARCH__
-  return half_t(__ushort2half_rn(val));
-#else
-  return half_t(__float2half(static_cast<float>(val)));
-#endif
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(int val) {
-#ifdef __CUDA_ARCH__
-  return half_t(__int2half_rn(val));
-#else
-  return half_t(__float2half(static_cast<float>(val)));
-#endif
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned int val) {
-#ifdef __CUDA_ARCH__
-  return half_t(__uint2half_rn(val));
-#else
-  return half_t(__float2half(static_cast<float>(val)));
-#endif
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(long long val) {
-#ifdef __CUDA_ARCH__
-  return half_t(__ll2half_rn(val));
-#else
-  return half_t(__float2half(static_cast<float>(val)));
-#endif
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned long long val) {
-#ifdef __CUDA_ARCH__
-  return half_t(__ull2half_rn(val));
-#else
-  return half_t(__float2half(static_cast<float>(val)));
-#endif
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(long val) {
-  return cast_to_half(static_cast<long long>(val));
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned long val) {
-  return cast_to_half(static_cast<unsigned long long>(val));
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
-cast_from_half(half_t val) {
-  return __half2float(half_t::impl_type(val));
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, bool>::value, T>
-cast_from_half(half_t val) {
-  return static_cast<T>(cast_from_half<float>(val));
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
-cast_from_half(half_t val) {
-  return static_cast<T>(__half2float(half_t::impl_type(val)));
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
-cast_from_half(half_t val) {
-#ifdef __CUDA_ARCH__
-  return __half2short_rz(half_t::impl_type(val));
-#else
-  return static_cast<T>(__half2float(half_t::impl_type(val)));
-#endif
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
-    cast_from_half(half_t val) {
-#ifdef __CUDA_ARCH__
-  return __half2ushort_rz(half_t::impl_type(val));
-#else
-  return static_cast<T>(__half2float(half_t::impl_type(val)));
-#endif
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
-cast_from_half(half_t val) {
-#ifdef __CUDA_ARCH__
-  return __half2int_rz(half_t::impl_type(val));
-#else
-  return static_cast<T>(__half2float(half_t::impl_type(val)));
-#endif
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned>::value, T>
-cast_from_half(half_t val) {
-#ifdef __CUDA_ARCH__
-  return __half2uint_rz(half_t::impl_type(val));
-#else
-  return static_cast<T>(__half2float(half_t::impl_type(val)));
-#endif
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
-cast_from_half(half_t val) {
-#ifdef __CUDA_ARCH__
-  return __half2ll_rz(half_t::impl_type(val));
-#else
-  return static_cast<T>(__half2float(half_t::impl_type(val)));
-#endif
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
-    cast_from_half(half_t val) {
-#ifdef __CUDA_ARCH__
-  return __half2ull_rz(half_t::impl_type(val));
-#else
-  return static_cast<T>(__half2float(half_t::impl_type(val)));
-#endif
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
-cast_from_half(half_t val) {
-  return static_cast<T>(cast_from_half<long long>(val));
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
-    cast_from_half(half_t val) {
-  return static_cast<T>(cast_from_half<unsigned long long>(val));
-}
-
-#else  // CUDA 11.1 versions follow
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(float val) { return __float2half(val); }
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(double val) { return __double2half(val); }
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(short val) { return __short2half_rn(val); }
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned short val) { return __ushort2half_rn(val); }
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(int val) { return __int2half_rn(val); }
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned int val) { return __uint2half_rn(val); }
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(long long val) { return __ll2half_rn(val); }
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned long long val) { return __ull2half_rn(val); }
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(long val) {
-  return cast_to_half(static_cast<long long>(val));
-}
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned long val) {
-  return cast_to_half(static_cast<unsigned long long>(val));
-}
-
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
-cast_from_half(half_t val) {
-  return __half2float(val);
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
-cast_from_half(half_t val) {
-  return __half2double(val);
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
-cast_from_half(half_t val) {
-  return __half2short_rz(val);
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
-    cast_from_half(half_t val) {
-  return __half2ushort_rz(val);
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
-cast_from_half(half_t val) {
-  return __half2int_rz(val);
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned int>::value, T>
-cast_from_half(half_t val) {
-  return __half2uint_rz(val);
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
-cast_from_half(half_t val) {
-  return __half2ll_rz(val);
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
-    cast_from_half(half_t val) {
-  return __half2ull_rz(val);
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
-cast_from_half(half_t val) {
-  return static_cast<T>(cast_from_half<long long>(val));
-}
-template <class T>
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
-    cast_from_half(half_t val) {
-  return static_cast<T>(cast_from_half<unsigned long long>(val));
-}
-#endif
-}  // namespace Experimental
-
-// use float as the return type for sum and prod since cuda_fp16.h
-// has no constexpr functions for casting to __half
-template <>
-struct reduction_identity<Kokkos::Experimental::half_t> {
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum() noexcept {
-    return 0.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() noexcept {
-    return 1.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() noexcept {
-    return -65504.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float min() noexcept {
-    return 65504.0F;
-  }
-};
-
-}  // namespace Kokkos
-#endif  // KOKKOS_IMPL_HALF_TYPE_DEFINED
-#endif  // KOKKOS_ENABLE_CUDA
-#endif  // Disables for half_t on cuda:
-        // Clang/8||KEPLER30||KEPLER32||KEPLER37||MAXWELL50||MAXWELL52
-#endif
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8a7641347241a2443b8ca7e1a15d10ea00c74a2
--- /dev/null
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp
@@ -0,0 +1,573 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_CUDA_HALF_HPP_
+#define KOKKOS_CUDA_HALF_HPP_
+
+#ifdef KOKKOS_IMPL_CUDA_HALF_TYPE_DEFINED
+
+#include <Kokkos_Half.hpp>
+#include <Kokkos_NumericTraits.hpp>  // reduction_identity
+
+#if CUDA_VERSION >= 11000
+#include <cuda_bf16.h>
+#endif
+
+namespace Kokkos {
+namespace Experimental {
+
+/************************** half conversions **********************************/
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(half_t val) { return val; }
+
+// CUDA before 11.1 only has the half <-> float conversions marked host device
+// So we will largely convert to float on the host for conversion
+// But still call the correct functions on the device
+#if (CUDA_VERSION < 11010)
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(float val) { return half_t(__float2half(val)); }
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(bool val) { return cast_to_half(static_cast<float>(val)); }
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(double val) {
+  // double2half was only introduced in CUDA 11 too
+  return half_t(__float2half(static_cast<float>(val)));
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(short val) {
+#ifdef __CUDA_ARCH__
+  return half_t(__short2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned short val) {
+#ifdef __CUDA_ARCH__
+  return half_t(__ushort2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(int val) {
+#ifdef __CUDA_ARCH__
+  return half_t(__int2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned int val) {
+#ifdef __CUDA_ARCH__
+  return half_t(__uint2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long long val) {
+#ifdef __CUDA_ARCH__
+  return half_t(__ll2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long long val) {
+#ifdef __CUDA_ARCH__
+  return half_t(__ull2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long val) {
+  return cast_to_half(static_cast<long long>(val));
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long val) {
+  return cast_to_half(static_cast<unsigned long long>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
+cast_from_half(half_t val) {
+  return __half2float(half_t::impl_type(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, bool>::value, T>
+cast_from_half(half_t val) {
+  return static_cast<T>(cast_from_half<float>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
+cast_from_half(half_t val) {
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
+cast_from_half(half_t val) {
+#ifdef __CUDA_ARCH__
+  return __half2short_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
+    cast_from_half(half_t val) {
+#ifdef __CUDA_ARCH__
+  return __half2ushort_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
+cast_from_half(half_t val) {
+#ifdef __CUDA_ARCH__
+  return __half2int_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned>::value, T>
+cast_from_half(half_t val) {
+#ifdef __CUDA_ARCH__
+  return __half2uint_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
+cast_from_half(half_t val) {
+#ifdef __CUDA_ARCH__
+  return __half2ll_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
+    cast_from_half(half_t val) {
+#ifdef __CUDA_ARCH__
+  return __half2ull_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
+cast_from_half(half_t val) {
+  return static_cast<T>(cast_from_half<long long>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
+    cast_from_half(half_t val) {
+  return static_cast<T>(cast_from_half<unsigned long long>(val));
+}
+
+#else  // CUDA 11.1 versions follow
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(float val) { return __float2half(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(double val) { return __double2half(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(short val) { return __short2half_rn(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned short val) { return __ushort2half_rn(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(int val) { return __int2half_rn(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned int val) { return __uint2half_rn(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long long val) { return __ll2half_rn(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long long val) { return __ull2half_rn(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long val) {
+  return cast_to_half(static_cast<long long>(val));
+}
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long val) {
+  return cast_to_half(static_cast<unsigned long long>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
+cast_from_half(half_t val) {
+  return __half2float(__half(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
+cast_from_half(half_t val) {
+  return static_cast<double>(__half2float(__half(val)));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
+cast_from_half(half_t val) {
+  return __half2short_rz(__half(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
+    cast_from_half(half_t val) {
+  return __half2ushort_rz(__half(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
+cast_from_half(half_t val) {
+  return __half2int_rz(__half(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned int>::value, T>
+cast_from_half(half_t val) {
+  return __half2uint_rz(__half(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
+cast_from_half(half_t val) {
+  return __half2ll_rz(__half(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
+    cast_from_half(half_t val) {
+  return __half2ull_rz(__half(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
+cast_from_half(half_t val) {
+  return static_cast<T>(cast_from_half<long long>(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
+    cast_from_half(half_t val) {
+  return static_cast<T>(cast_from_half<unsigned long long>(val));
+}
+#endif
+
+/************************** bhalf conversions *********************************/
+// Go in this branch if CUDA version is >= 11.0.0 and less than 11.1.0 or if the
+// architecture is not Ampere
+#if CUDA_VERSION >= 11000 && \
+    (CUDA_VERSION < 11010 || !defined(KOKKOS_ARCH_AMPERE))
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(bhalf_t val) { return val; }
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(float val) { return bhalf_t(__float2bfloat16(val)); }
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(bool val) {
+  return cast_to_bhalf(static_cast<float>(val));
+}
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(double val) {
+  // double2bfloat16 was only introduced in CUDA 11 too
+  return bhalf_t(__float2bfloat16(static_cast<float>(val)));
+}
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(short val) {
+  return bhalf_t(__float2bfloat16(static_cast<float>(val)));
+}
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned short val) {
+  return bhalf_t(__float2bfloat16(static_cast<float>(val)));
+}
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(int val) {
+  return bhalf_t(__float2bfloat16(static_cast<float>(val)));
+}
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned int val) {
+  return bhalf_t(__float2bfloat16(static_cast<float>(val)));
+}
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(long long val) {
+  return bhalf_t(__float2bfloat16(static_cast<float>(val)));
+}
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned long long val) {
+  return bhalf_t(__float2bfloat16(static_cast<float>(val)));
+}
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(long val) {
+  return cast_to_bhalf(static_cast<long long>(val));
+}
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned long val) {
+  return cast_to_bhalf(static_cast<unsigned long long>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return __bfloat162float(bhalf_t::impl_type(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, bool>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(cast_from_bhalf<float>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(__bfloat162float(bhalf_t::impl_type(val)));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(__bfloat162float(bhalf_t::impl_type(val)));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
+    cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(__bfloat162float(bhalf_t::impl_type(val)));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(__bfloat162float(bhalf_t::impl_type(val)));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(__bfloat162float(bhalf_t::impl_type(val)));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(__bfloat162float(bhalf_t::impl_type(val)));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
+    cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(__bfloat162float(bhalf_t::impl_type(val)));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(cast_from_bhalf<long long>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
+    cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(cast_from_bhalf<unsigned long long>(val));
+}
+#endif  // CUDA_VERSION >= 11000 && CUDA_VERSION < 11010
+
+#if CUDA_VERSION >= 11010 && \
+    ((defined(KOKKOS_ARCH_AMPERE80) || defined(KOKKOS_ARCH_AMPERE86)))
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(bhalf_t val) { return val; }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(float val) { return __float2bfloat16(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(double val) { return __double2bfloat16(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(short val) { return __short2bfloat16_rn(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned short val) { return __ushort2bfloat16_rn(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(int val) { return __int2bfloat16_rn(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned int val) { return __uint2bfloat16_rn(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(long long val) { return __ll2bfloat16_rn(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned long long val) { return __ull2bfloat16_rn(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(long val) {
+  return cast_to_bhalf(static_cast<long long>(val));
+}
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned long val) {
+  return cast_to_bhalf(static_cast<unsigned long long>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return __bfloat162float(__nv_bfloat16(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return static_cast<double>(__bfloat162float(__nv_bfloat16(val)));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return __bfloat162short_rz(__nv_bfloat16(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
+    cast_from_bhalf(bhalf_t val) {
+  return __bfloat162ushort_rz(__nv_bfloat16(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return __bfloat162int_rz(__nv_bfloat16(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned int>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return __bfloat162uint_rz(__nv_bfloat16(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return __bfloat162ll_rz(__nv_bfloat16(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
+    cast_from_bhalf(bhalf_t val) {
+  return __bfloat162ull_rz(__nv_bfloat16(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
+cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(cast_from_bhalf<long long>(val));
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
+    cast_from_bhalf(bhalf_t val) {
+  return static_cast<T>(cast_from_bhalf<unsigned long long>(val));
+}
+#endif  // CUDA_VERSION >= 11010
+}  // namespace Experimental
+
+#if (CUDA_VERSION >= 11000)
+template <>
+struct reduction_identity<Kokkos::Experimental::bhalf_t> {
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum() noexcept {
+    return 0.0F;
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() noexcept {
+    return 1.0F;
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() noexcept {
+    return -0x7f7f;
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float min() noexcept {
+    return 0x7f7f;
+  }
+};
+#endif  // CUDA_VERSION >= 11000
+
+// use float as the return type for sum and prod since cuda_fp16.h
+// has no constexpr functions for casting to __half
+template <>
+struct reduction_identity<Kokkos::Experimental::half_t> {
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum() noexcept {
+    return 0.0F;
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() noexcept {
+    return 1.0F;
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() noexcept {
+    return -65504.0F;
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float min() noexcept {
+    return 65504.0F;
+  }
+};
+
+}  // namespace Kokkos
+#endif  // KOKKOS_IMPL_CUDA_HALF_TYPE_DEFINED
+#endif
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e296a9239a6f0cc35604a84cda6cd7a919b7130a
--- /dev/null
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp
@@ -0,0 +1,80 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_CUDA_HALF_IMPL_TYPE_HPP_
+#define KOKKOS_CUDA_HALF_IMPL_TYPE_HPP_
+
+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_ENABLE_CUDA
+#if !(defined(KOKKOS_COMPILER_CLANG) && KOKKOS_COMPILER_CLANG < 900) && \
+    !(defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL50) ||  \
+      defined(KOKKOS_ARCH_MAXWELL52))
+#include <cuda_fp16.h>
+#if (CUDA_VERSION >= 11000)
+#include <cuda_bf16.h>
+#endif  // CUDA_VERSION >= 11000
+
+#ifndef KOKKOS_IMPL_HALF_TYPE_DEFINED
+// Make sure no one else tries to define half_t
+#define KOKKOS_IMPL_HALF_TYPE_DEFINED
+#define KOKKOS_IMPL_CUDA_HALF_TYPE_DEFINED
+
+namespace Kokkos {
+namespace Impl {
+struct half_impl_t {
+  using type = __half;
+};
+#if (CUDA_VERSION >= 11000)
+#define KOKKOS_IMPL_BHALF_TYPE_DEFINED
+struct bhalf_impl_t {
+  using type = __nv_bfloat16;
+};
+#endif  // CUDA_VERSION >= 11000
+}  // namespace Impl
+}  // namespace Kokkos
+#endif  // KOKKOS_IMPL_HALF_TYPE_DEFINED
+#endif  // Disables for half_t on cuda:
+        // Clang/8||KEPLER30||KEPLER32||KEPLER37||MAXWELL50||MAXWELL52
+#endif  // KOKKOS_ENABLE_CUDA
+#endif
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp
index 6964d5b41b72368e9b4305d37e156e08321f7814..294be2774b8f93edbeba3893b62e978f08ff28d9 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp
@@ -129,17 +129,18 @@ int cuda_kernel_arch() {
   return arch;
 }
 
-#ifdef KOKKOS_ENABLE_CUDA_UVM
-bool cuda_launch_blocking() {
-  const char *env = getenv("CUDA_LAUNCH_BLOCKING");
-
-  if (env == nullptr) return false;
+}  // namespace
 
-  return std::stoi(env);
+Kokkos::View<uint32_t *, Kokkos::CudaSpace> cuda_global_unique_token_locks(
+    bool deallocate) {
+  static Kokkos::View<uint32_t *, Kokkos::CudaSpace> locks =
+      Kokkos::View<uint32_t *, Kokkos::CudaSpace>();
+  if (!deallocate && locks.extent(0) == 0)
+    locks = Kokkos::View<uint32_t *, Kokkos::CudaSpace>(
+        "Kokkos::UniqueToken<Cuda>::m_locks", Kokkos::Cuda().concurrency());
+  if (deallocate) locks = Kokkos::View<uint32_t *, Kokkos::CudaSpace>();
+  return locks;
 }
-#endif
-
-}  // namespace
 
 void cuda_device_synchronize(const std::string &name) {
   Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::Cuda>(
@@ -271,6 +272,7 @@ const CudaInternalDevices &CudaInternalDevices::singleton() {
 
 unsigned long *CudaInternal::constantMemHostStaging = nullptr;
 cudaEvent_t CudaInternal::constantMemReusable       = nullptr;
+std::mutex CudaInternal::constantMemMutex;
 
 //----------------------------------------------------------------------------
 
@@ -300,8 +302,7 @@ void CudaInternal::print_configuration(std::ostream &s) const {
 //----------------------------------------------------------------------------
 
 CudaInternal::~CudaInternal() {
-  if (m_stream || m_scratchSpace || m_scratchFlags || m_scratchUnified ||
-      m_scratchConcurrentBitset) {
+  if (m_stream || m_scratchSpace || m_scratchFlags || m_scratchUnified) {
     std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()"
               << std::endl;
   }
@@ -310,7 +311,7 @@ CudaInternal::~CudaInternal() {
   m_cudaArch                = -1;
   m_multiProcCount          = 0;
   m_maxWarpCount            = 0;
-  m_maxBlock                = 0;
+  m_maxBlock                = {0, 0, 0};
   m_maxSharedWords          = 0;
   m_maxConcurrency          = 0;
   m_scratchSpaceCount       = 0;
@@ -321,7 +322,6 @@ CudaInternal::~CudaInternal() {
   m_scratchSpace            = nullptr;
   m_scratchFlags            = nullptr;
   m_scratchUnified          = nullptr;
-  m_scratchConcurrentBitset = nullptr;
   m_stream                  = nullptr;
   for (int i = 0; i < m_n_team_scratch; ++i) {
     m_team_scratch_current_size[i] = 0;
@@ -442,7 +442,9 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream,
     //----------------------------------
     // Maximum number of blocks:
 
-    m_maxBlock = cudaProp.maxGridSize[0];
+    m_maxBlock[0] = cudaProp.maxGridSize[0];
+    m_maxBlock[1] = cudaProp.maxGridSize[1];
+    m_maxBlock[2] = cudaProp.maxGridSize[2];
 
     m_shmemPerSM       = cudaProp.sharedMemPerMultiprocessor;
     m_maxShmemPerBlock = cudaProp.sharedMemPerBlock;
@@ -497,11 +499,6 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream,
                            sizeof(uint32_t) * buffer_bound);
 
       Record::increment(r);
-
-      m_scratchConcurrentBitset = reinterpret_cast<uint32_t *>(r->data());
-
-      KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemset(m_scratchConcurrentBitset, 0,
-                                            sizeof(uint32_t) * buffer_bound));
     }
     //----------------------------------
 
@@ -526,15 +523,6 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream,
   }
 
 #ifdef KOKKOS_ENABLE_CUDA_UVM
-  if (Kokkos::show_warnings() && !cuda_launch_blocking()) {
-    std::cerr << R"warning(
-Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default
-                                  without setting CUDA_LAUNCH_BLOCKING=1.
-                                  The code must call Cuda().fence() after each kernel
-                                  or will likely crash when accessing data on the host.)warning"
-              << std::endl;
-  }
-
   const char *env_force_device_alloc =
       getenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC");
   bool force_device_alloc;
@@ -584,6 +572,11 @@ Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default
     m_team_scratch_current_size[i] = 0;
     m_team_scratch_ptr[i]          = nullptr;
   }
+
+  KOKKOS_IMPL_CUDA_SAFE_CALL(
+      cudaMalloc(&m_scratch_locks, sizeof(int32_t) * m_maxConcurrency));
+  KOKKOS_IMPL_CUDA_SAFE_CALL(
+      cudaMemset(m_scratch_locks, 0, sizeof(int32_t) * m_maxConcurrency));
 }
 
 //----------------------------------------------------------------------------
@@ -591,7 +584,7 @@ Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default
 using ScratchGrain = Cuda::size_type[Impl::CudaTraits::WarpSize];
 enum { sizeScratchGrain = sizeof(ScratchGrain) };
 
-Cuda::size_type *CudaInternal::scratch_flags(const Cuda::size_type size) const {
+Cuda::size_type *CudaInternal::scratch_flags(const std::size_t size) const {
   if (verify_is_initialized("scratch_flags") &&
       m_scratchFlagsCount * sizeScratchGrain < size) {
     m_scratchFlagsCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
@@ -616,7 +609,7 @@ Cuda::size_type *CudaInternal::scratch_flags(const Cuda::size_type size) const {
   return m_scratchFlags;
 }
 
-Cuda::size_type *CudaInternal::scratch_space(const Cuda::size_type size) const {
+Cuda::size_type *CudaInternal::scratch_space(const std::size_t size) const {
   if (verify_is_initialized("scratch_space") &&
       m_scratchSpaceCount * sizeScratchGrain < size) {
     m_scratchSpaceCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
@@ -638,8 +631,7 @@ Cuda::size_type *CudaInternal::scratch_space(const Cuda::size_type size) const {
   return m_scratchSpace;
 }
 
-Cuda::size_type *CudaInternal::scratch_unified(
-    const Cuda::size_type size) const {
+Cuda::size_type *CudaInternal::scratch_unified(const std::size_t size) const {
   if (verify_is_initialized("scratch_unified") && m_scratchUnifiedSupported &&
       m_scratchUnifiedCount * sizeScratchGrain < size) {
     m_scratchUnifiedCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
@@ -662,8 +654,7 @@ Cuda::size_type *CudaInternal::scratch_unified(
   return m_scratchUnified;
 }
 
-Cuda::size_type *CudaInternal::scratch_functor(
-    const Cuda::size_type size) const {
+Cuda::size_type *CudaInternal::scratch_functor(const std::size_t size) const {
   if (verify_is_initialized("scratch_functor") && m_scratchFunctorSize < size) {
     m_scratchFunctorSize = size;
 
@@ -694,7 +685,7 @@ std::pair<void *, int> CudaInternal::resize_team_scratch_space(
   int current_team_scratch = 0;
   int zero                 = 0;
   int one                  = 1;
-  while (m_team_scratch_pool[current_team_scratch].compare_exchange_weak(
+  while (!m_team_scratch_pool[current_team_scratch].compare_exchange_weak(
       zero, one, std::memory_order_release, std::memory_order_relaxed)) {
     current_team_scratch = (current_team_scratch + 1) % m_n_team_scratch;
   }
@@ -728,6 +719,7 @@ void CudaInternal::finalize() {
   if (nullptr != m_scratchSpace || nullptr != m_scratchFlags) {
     // Only finalize this if we're the singleton
     if (this == &singleton()) {
+      (void)Impl::cuda_global_unique_token_locks(true);
       Impl::finalize_host_cuda_lock_arrays();
     }
 
@@ -738,7 +730,6 @@ void CudaInternal::finalize() {
     RecordCuda::decrement(RecordCuda::get_record(m_scratchFlags));
     RecordCuda::decrement(RecordCuda::get_record(m_scratchSpace));
     RecordHost::decrement(RecordHost::get_record(m_scratchUnified));
-    RecordCuda::decrement(RecordCuda::get_record(m_scratchConcurrentBitset));
     if (m_scratchFunctorSize > 0)
       RecordCuda::decrement(RecordCuda::get_record(m_scratchFunctor));
 
@@ -750,24 +741,26 @@ void CudaInternal::finalize() {
     if (m_manage_stream && m_stream != nullptr)
       KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamDestroy(m_stream));
 
-    m_cudaDev                 = -1;
-    m_multiProcCount          = 0;
-    m_maxWarpCount            = 0;
-    m_maxBlock                = 0;
-    m_maxSharedWords          = 0;
-    m_scratchSpaceCount       = 0;
-    m_scratchFlagsCount       = 0;
-    m_scratchUnifiedCount     = 0;
-    m_streamCount             = 0;
-    m_scratchSpace            = nullptr;
-    m_scratchFlags            = nullptr;
-    m_scratchUnified          = nullptr;
-    m_scratchConcurrentBitset = nullptr;
-    m_stream                  = nullptr;
+    m_cudaDev             = -1;
+    m_multiProcCount      = 0;
+    m_maxWarpCount        = 0;
+    m_maxBlock            = {0, 0, 0};
+    m_maxSharedWords      = 0;
+    m_scratchSpaceCount   = 0;
+    m_scratchFlagsCount   = 0;
+    m_scratchUnifiedCount = 0;
+    m_streamCount         = 0;
+    m_scratchSpace        = nullptr;
+    m_scratchFlags        = nullptr;
+    m_scratchUnified      = nullptr;
+    m_stream              = nullptr;
     for (int i = 0; i < m_n_team_scratch; ++i) {
       m_team_scratch_current_size[i] = 0;
       m_team_scratch_ptr[i]          = nullptr;
     }
+
+    KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(m_scratch_locks));
+    m_scratch_locks = nullptr;
   }
 
   // only destroy these if we're finalizing the singleton
@@ -804,7 +797,7 @@ Cuda::size_type cuda_internal_maximum_warp_count() {
   return CudaInternal::singleton().m_maxWarpCount;
 }
 
-Cuda::size_type cuda_internal_maximum_grid_count() {
+std::array<Cuda::size_type, 3> cuda_internal_maximum_grid_count() {
   return CudaInternal::singleton().m_maxBlock;
 }
 
@@ -813,17 +806,17 @@ Cuda::size_type cuda_internal_maximum_shared_words() {
 }
 
 Cuda::size_type *cuda_internal_scratch_space(const Cuda &instance,
-                                             const Cuda::size_type size) {
+                                             const std::size_t size) {
   return instance.impl_internal_space_instance()->scratch_space(size);
 }
 
 Cuda::size_type *cuda_internal_scratch_flags(const Cuda &instance,
-                                             const Cuda::size_type size) {
+                                             const std::size_t size) {
   return instance.impl_internal_space_instance()->scratch_flags(size);
 }
 
 Cuda::size_type *cuda_internal_scratch_unified(const Cuda &instance,
-                                               const Cuda::size_type size) {
+                                               const std::size_t size) {
   return instance.impl_internal_space_instance()->scratch_unified(size);
 }
 
@@ -1012,20 +1005,8 @@ void CudaSpaceInitializer::print_configuration(std::ostream &msg,
   msg << "\nCuda Runtime Configuration:" << std::endl;
   Cuda::print_configuration(msg, detail);
 }
-}  // namespace Impl
 
-}  // namespace Kokkos
-
-namespace Kokkos {
-namespace Experimental {
-
-UniqueToken<Kokkos::Cuda, Kokkos::Experimental::UniqueTokenScope::Global>::
-    UniqueToken(Kokkos::Cuda const &)
-    : m_buffer(
-          Kokkos::Impl::CudaInternal::singleton().m_scratchConcurrentBitset),
-      m_count(Kokkos::Impl::CudaInternal::singleton().m_maxConcurrency) {}
-
-}  // namespace Experimental
+}  // namespace Impl
 }  // namespace Kokkos
 
 #else
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp
index 7eb169838c05dc144e9789d4466f83d3febfe926..62b1f09cd564fd233ea57a1c97f25097f15dc795 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp
@@ -55,17 +55,17 @@ struct CudaTraits {
 
 CudaSpace::size_type cuda_internal_multiprocessor_count();
 CudaSpace::size_type cuda_internal_maximum_warp_count();
-CudaSpace::size_type cuda_internal_maximum_grid_count();
+std::array<CudaSpace::size_type, 3> cuda_internal_maximum_grid_count();
 CudaSpace::size_type cuda_internal_maximum_shared_words();
 
 CudaSpace::size_type cuda_internal_maximum_concurrent_block_count();
 
-CudaSpace::size_type* cuda_internal_scratch_flags(
-    const Cuda&, const CudaSpace::size_type size);
-CudaSpace::size_type* cuda_internal_scratch_space(
-    const Cuda&, const CudaSpace::size_type size);
-CudaSpace::size_type* cuda_internal_scratch_unified(
-    const Cuda&, const CudaSpace::size_type size);
+CudaSpace::size_type* cuda_internal_scratch_flags(const Cuda&,
+                                                  const std::size_t size);
+CudaSpace::size_type* cuda_internal_scratch_space(const Cuda&,
+                                                  const std::size_t size);
+CudaSpace::size_type* cuda_internal_scratch_unified(const Cuda&,
+                                                    const std::size_t size);
 
 }  // namespace Impl
 }  // namespace Kokkos
@@ -91,7 +91,7 @@ class CudaInternal {
   int m_cudaArch;
   unsigned m_multiProcCount;
   unsigned m_maxWarpCount;
-  unsigned m_maxBlock;
+  std::array<size_type, 3> m_maxBlock;
   unsigned m_maxSharedWords;
   uint32_t m_maxConcurrency;
   int m_shmemPerSM;
@@ -104,10 +104,10 @@ class CudaInternal {
   cudaDeviceProp m_deviceProp;
 
   // Scratch Spaces for Reductions
-  mutable size_type m_scratchSpaceCount;
-  mutable size_type m_scratchFlagsCount;
-  mutable size_type m_scratchUnifiedCount;
-  mutable size_type m_scratchFunctorSize;
+  mutable std::size_t m_scratchSpaceCount;
+  mutable std::size_t m_scratchFlagsCount;
+  mutable std::size_t m_scratchUnifiedCount;
+  mutable std::size_t m_scratchFunctorSize;
 
   size_type m_scratchUnifiedSupported;
   size_type m_streamCount;
@@ -115,7 +115,6 @@ class CudaInternal {
   mutable size_type* m_scratchFlags;
   mutable size_type* m_scratchUnified;
   mutable size_type* m_scratchFunctor;
-  uint32_t* m_scratchConcurrentBitset;
   cudaStream_t m_stream;
   uint32_t m_instance_id;
   bool m_manage_stream;
@@ -125,6 +124,7 @@ class CudaInternal {
   mutable int64_t m_team_scratch_current_size[10];
   mutable void* m_team_scratch_ptr[10];
   mutable std::atomic_int m_team_scratch_pool[10];
+  std::int32_t* m_scratch_locks;
 
   bool was_initialized = false;
   bool was_finalized   = false;
@@ -133,6 +133,7 @@ class CudaInternal {
   //  here will break once there are multiple devices though
   static unsigned long* constantMemHostStaging;
   static cudaEvent_t constantMemReusable;
+  static std::mutex constantMemMutex;
 
   static CudaInternal& singleton();
 
@@ -163,7 +164,7 @@ class CudaInternal {
         m_cudaArch(-1),
         m_multiProcCount(0),
         m_maxWarpCount(0),
-        m_maxBlock(0),
+        m_maxBlock({0, 0, 0}),
         m_maxSharedWords(0),
         m_maxConcurrency(0),
         m_shmemPerSM(0),
@@ -182,7 +183,6 @@ class CudaInternal {
         m_scratchFlags(nullptr),
         m_scratchUnified(nullptr),
         m_scratchFunctor(nullptr),
-        m_scratchConcurrentBitset(nullptr),
         m_stream(nullptr),
         m_instance_id(
             Kokkos::Tools::Experimental::Impl::idForInstance<Kokkos::Cuda>(
@@ -195,10 +195,10 @@ class CudaInternal {
   }
 
   // Resizing of reduction related scratch spaces
-  size_type* scratch_space(const size_type size) const;
-  size_type* scratch_flags(const size_type size) const;
-  size_type* scratch_unified(const size_type size) const;
-  size_type* scratch_functor(const size_type size) const;
+  size_type* scratch_space(const std::size_t size) const;
+  size_type* scratch_flags(const std::size_t size) const;
+  size_type* scratch_unified(const std::size_t size) const;
+  size_type* scratch_functor(const std::size_t size) const;
   uint32_t impl_get_instance_id() const;
   // Resizing of team level 1 scratch
   std::pair<void*, int> resize_team_scratch_space(std::int64_t bytes,
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
index 4b01798f5e2cad495c897b8110d96eec87fe429f..b7a80ad84ff22b00d9666956cf5896b259d38b6a 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
@@ -540,8 +540,9 @@ struct CudaParallelLaunchKernelInvoker<
                             dim3 const& block, int shmem,
                             CudaInternal const* cuda_instance) {
     // Wait until the previous kernel that uses the constant buffer is done
+    std::lock_guard<std::mutex> lock(CudaInternal::constantMemMutex);
     KOKKOS_IMPL_CUDA_SAFE_CALL(
-        cudaEventSynchronize(cuda_instance->constantMemReusable));
+        cudaEventSynchronize(CudaInternal::constantMemReusable));
 
     // Copy functor (synchronously) to staging buffer in pinned host memory
     unsigned long* staging = cuda_instance->constantMemHostStaging;
@@ -558,7 +559,7 @@ struct CudaParallelLaunchKernelInvoker<
 
     // Record an event that says when the constant buffer can be reused
     KOKKOS_IMPL_CUDA_SAFE_CALL(
-        cudaEventRecord(cuda_instance->constantMemReusable,
+        cudaEventRecord(CudaInternal::constantMemReusable,
                         cudaStream_t(cuda_instance->m_stream)));
   }
 
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
index 1f3024f3186a14d847a6999b995832e7782b62e9..1dcbdf0392fc3961e2c1e30e140c3edcaa61d820 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
@@ -50,8 +50,7 @@
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
 namespace Kokkos {
 namespace Impl {
-__device__ __constant__ CudaLockArrays g_device_cuda_lock_arrays = {nullptr,
-                                                                    nullptr, 0};
+__device__ __constant__ CudaLockArrays g_device_cuda_lock_arrays = {nullptr, 0};
 }
 }  // namespace Kokkos
 #endif
@@ -67,18 +66,11 @@ __global__ void init_lock_array_kernel_atomic() {
   }
 }
 
-__global__ void init_lock_array_kernel_threadid(int N) {
-  unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
-  if (i < (unsigned)N) {
-    Kokkos::Impl::g_device_cuda_lock_arrays.scratch[i] = 0;
-  }
-}
-
 }  // namespace
 
 namespace Impl {
 
-CudaLockArrays g_host_cuda_lock_arrays = {nullptr, nullptr, 0};
+CudaLockArrays g_host_cuda_lock_arrays = {nullptr, 0};
 
 void initialize_host_cuda_lock_arrays() {
 #ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
@@ -90,16 +82,12 @@ void initialize_host_cuda_lock_arrays() {
   KOKKOS_IMPL_CUDA_SAFE_CALL(
       cudaMalloc(&g_host_cuda_lock_arrays.atomic,
                  sizeof(int) * (CUDA_SPACE_ATOMIC_MASK + 1)));
-  KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&g_host_cuda_lock_arrays.scratch,
-                                        sizeof(int) * (Cuda::concurrency())));
   Impl::cuda_device_synchronize(
       "Kokkos::Impl::initialize_host_cuda_lock_arrays: Pre Init Lock Arrays");
   g_host_cuda_lock_arrays.n = Cuda::concurrency();
   KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
   init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256,
                                   256>>>();
-  init_lock_array_kernel_threadid<<<(Kokkos::Cuda::concurrency() + 255) / 256,
-                                    256>>>(Kokkos::Cuda::concurrency());
   Impl::cuda_device_synchronize(
       "Kokkos::Impl::initialize_host_cuda_lock_arrays: Post Init Lock Arrays");
 }
@@ -112,9 +100,7 @@ void finalize_host_cuda_lock_arrays() {
   if (g_host_cuda_lock_arrays.atomic == nullptr) return;
   cudaFree(g_host_cuda_lock_arrays.atomic);
   g_host_cuda_lock_arrays.atomic = nullptr;
-  cudaFree(g_host_cuda_lock_arrays.scratch);
-  g_host_cuda_lock_arrays.scratch = nullptr;
-  g_host_cuda_lock_arrays.n       = 0;
+  g_host_cuda_lock_arrays.n      = 0;
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
   KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
 #endif
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
index 04fb7cb345a27e9d9932d188216d5261d8606939..bdb7723985e5a3c6c0451ada3d0b6b7303204089 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
@@ -62,7 +62,6 @@ namespace Impl {
 
 struct CudaLockArrays {
   std::int32_t* atomic;
-  std::int32_t* scratch;
   std::int32_t n;
 };
 
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
index f83b43e608855492f9d4df725533a08184f5edaf..5016f73e3c7d3fe3fd4f98cc37af9e1ed5ff3c2e 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
@@ -61,6 +61,7 @@
 #include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp>
 #include <Cuda/Kokkos_Cuda_Locks.hpp>
 #include <Cuda/Kokkos_Cuda_Team.hpp>
+#include <Kokkos_MinMaxClamp.hpp>
 #include <Kokkos_Vectorization.hpp>
 
 #include <impl/Kokkos_Tools.hpp>
@@ -294,7 +295,7 @@ class TeamPolicyInternal<Kokkos::Cuda, Properties...>
         m_tune_team(bool(team_size_request <= 0)),
         m_tune_vector(bool(vector_length_request <= 0)) {
     // Make sure league size is permissible
-    if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
+    if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count()[0]))
       Impl::throw_runtime_exception(
           "Requested too large league_size for TeamPolicy on Cuda execution "
           "space.");
@@ -505,7 +506,7 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> {
     dim3 grid(
         std::min(
             typename Policy::index_type((nwork + block.y - 1) / block.y),
-            typename Policy::index_type(cuda_internal_maximum_grid_count())),
+            typename Policy::index_type(cuda_internal_maximum_grid_count()[0])),
         1, 1);
 #ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION
     if (Kokkos::Impl::CudaInternal::cuda_use_serial_execution()) {
@@ -566,17 +567,18 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, Kokkos::Cuda> {
     using namespace std;
 
     if (m_rp.m_num_tiles == 0) return;
-    const array_index_type maxblocks = static_cast<array_index_type>(
-        m_rp.space().impl_internal_space_instance()->m_maxBlock);
+    const auto maxblocks = cuda_internal_maximum_grid_count();
     if (RP::rank == 2) {
       const dim3 block(m_rp.m_tile[0], m_rp.m_tile[1], 1);
       KOKKOS_ASSERT(block.x > 0);
       KOKKOS_ASSERT(block.y > 0);
       const dim3 grid(
-          min((m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x,
-              maxblocks),
-          min((m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y,
-              maxblocks),
+          std::min<array_index_type>(
+              (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x,
+              maxblocks[0]),
+          std::min<array_index_type>(
+              (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y,
+              maxblocks[1]),
           1);
       CudaParallelLaunch<ParallelFor, LaunchBounds>(
           *this, grid, block, 0, m_rp.space().impl_internal_space_instance(),
@@ -587,12 +589,15 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, Kokkos::Cuda> {
       KOKKOS_ASSERT(block.y > 0);
       KOKKOS_ASSERT(block.z > 0);
       const dim3 grid(
-          min((m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x,
-              maxblocks),
-          min((m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y,
-              maxblocks),
-          min((m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1) / block.z,
-              maxblocks));
+          std::min<array_index_type>(
+              (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x,
+              maxblocks[0]),
+          std::min<array_index_type>(
+              (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y,
+              maxblocks[1]),
+          std::min<array_index_type>(
+              (m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1) / block.z,
+              maxblocks[2]));
       CudaParallelLaunch<ParallelFor, LaunchBounds>(
           *this, grid, block, 0, m_rp.space().impl_internal_space_instance(),
           false);
@@ -604,12 +609,14 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, Kokkos::Cuda> {
       KOKKOS_ASSERT(block.y > 0);
       KOKKOS_ASSERT(block.z > 0);
       const dim3 grid(
-          min(static_cast<index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1]),
-              static_cast<index_type>(maxblocks)),
-          min((m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1) / block.y,
-              maxblocks),
-          min((m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1) / block.z,
-              maxblocks));
+          std::min<array_index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1],
+                                     maxblocks[0]),
+          std::min<array_index_type>(
+              (m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1) / block.y,
+              maxblocks[1]),
+          std::min<array_index_type>(
+              (m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1) / block.z,
+              maxblocks[2]));
       CudaParallelLaunch<ParallelFor, LaunchBounds>(
           *this, grid, block, 0, m_rp.space().impl_internal_space_instance(),
           false);
@@ -620,12 +627,13 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, Kokkos::Cuda> {
                        m_rp.m_tile[2] * m_rp.m_tile[3], m_rp.m_tile[4]);
       KOKKOS_ASSERT(block.z > 0);
       const dim3 grid(
-          min(static_cast<index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1]),
-              static_cast<index_type>(maxblocks)),
-          min(static_cast<index_type>(m_rp.m_tile_end[2] * m_rp.m_tile_end[3]),
-              static_cast<index_type>(maxblocks)),
-          min((m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1) / block.z,
-              maxblocks));
+          std::min<array_index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1],
+                                     maxblocks[0]),
+          std::min<array_index_type>(m_rp.m_tile_end[2] * m_rp.m_tile_end[3],
+                                     maxblocks[1]),
+          std::min<array_index_type>(
+              (m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1) / block.z,
+              maxblocks[2]));
       CudaParallelLaunch<ParallelFor, LaunchBounds>(
           *this, grid, block, 0, m_rp.space().impl_internal_space_instance(),
           false);
@@ -636,12 +644,12 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, Kokkos::Cuda> {
                        m_rp.m_tile[2] * m_rp.m_tile[3],
                        m_rp.m_tile[4] * m_rp.m_tile[5]);
       const dim3 grid(
-          min(static_cast<index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1]),
-              static_cast<index_type>(maxblocks)),
-          min(static_cast<index_type>(m_rp.m_tile_end[2] * m_rp.m_tile_end[3]),
-              static_cast<index_type>(maxblocks)),
-          min(static_cast<index_type>(m_rp.m_tile_end[4] * m_rp.m_tile_end[5]),
-              static_cast<index_type>(maxblocks)));
+          std::min<array_index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1],
+                                     maxblocks[0]),
+          std::min<array_index_type>(m_rp.m_tile_end[2] * m_rp.m_tile_end[3],
+                                     maxblocks[1]),
+          std::min<array_index_type>(m_rp.m_tile_end[4] * m_rp.m_tile_end[5],
+                                     maxblocks[2]));
       CudaParallelLaunch<ParallelFor, LaunchBounds>(
           *this, grid, block, 0, m_rp.space().impl_internal_space_instance(),
           false);
@@ -656,6 +664,42 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, Kokkos::Cuda> {
       : m_functor(arg_functor), m_rp(arg_policy) {}
 };
 
+__device__ inline int64_t cuda_get_scratch_index(Cuda::size_type league_size,
+                                                 int32_t* scratch_locks) {
+  int64_t threadid = 0;
+  __shared__ int64_t base_thread_id;
+  if (threadIdx.x == 0 && threadIdx.y == 0) {
+    int64_t const wraparound_len = Kokkos::Experimental::min(
+        int64_t(league_size),
+        (int64_t(Kokkos::Impl::g_device_cuda_lock_arrays.n)) /
+            (blockDim.x * blockDim.y));
+    threadid = (blockIdx.x * blockDim.z + threadIdx.z) % wraparound_len;
+    threadid *= blockDim.x * blockDim.y;
+    int done = 0;
+    while (!done) {
+      done = (0 == atomicCAS(&scratch_locks[threadid], 0, 1));
+      if (!done) {
+        threadid += blockDim.x * blockDim.y;
+        if (int64_t(threadid + blockDim.x * blockDim.y) >=
+            wraparound_len * blockDim.x * blockDim.y)
+          threadid = 0;
+      }
+    }
+    base_thread_id = threadid;
+  }
+  __syncthreads();
+  threadid = base_thread_id;
+  return threadid;
+}
+
+__device__ inline void cuda_release_scratch_index(int32_t* scratch_locks,
+                                                  int64_t threadid) {
+  __syncthreads();
+  if (threadIdx.x == 0 && threadIdx.y == 0) {
+    scratch_locks[threadid] = 0;
+  }
+}
+
 template <class FunctorType, class... Properties>
 class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
                   Kokkos::Cuda> {
@@ -689,6 +733,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
   void* m_scratch_ptr[2];
   int m_scratch_size[2];
   int m_scratch_pool_id = -1;
+  int32_t* m_scratch_locks;
 
   template <class TagType>
   __device__ inline
@@ -711,30 +756,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
     // Iterate this block through the league
     int64_t threadid = 0;
     if (m_scratch_size[1] > 0) {
-      __shared__ int64_t base_thread_id;
-      if (threadIdx.x == 0 && threadIdx.y == 0) {
-        threadid = (blockIdx.x * blockDim.z + threadIdx.z) %
-                   (Kokkos::Impl::g_device_cuda_lock_arrays.n /
-                    (blockDim.x * blockDim.y));
-        threadid *= blockDim.x * blockDim.y;
-        int done = 0;
-        while (!done) {
-          done =
-              (0 ==
-               atomicCAS(
-                   &Kokkos::Impl::g_device_cuda_lock_arrays.scratch[threadid],
-                   0, 1));
-          if (!done) {
-            threadid += blockDim.x * blockDim.y;
-            if (int64_t(threadid + blockDim.x * blockDim.y) >=
-                int64_t(Kokkos::Impl::g_device_cuda_lock_arrays.n))
-              threadid = 0;
-          }
-        }
-        base_thread_id = threadid;
-      }
-      __syncthreads();
-      threadid = base_thread_id;
+      threadid = cuda_get_scratch_index(m_league_size, m_scratch_locks);
     }
 
     const int int_league_size = (int)m_league_size;
@@ -748,9 +770,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
           m_scratch_size[1], league_rank, m_league_size));
     }
     if (m_scratch_size[1] > 0) {
-      __syncthreads();
-      if (threadIdx.x == 0 && threadIdx.y == 0)
-        Kokkos::Impl::g_device_cuda_lock_arrays.scratch[threadid] = 0;
+      cuda_release_scratch_index(m_scratch_locks, threadid);
     }
   }
 
@@ -795,6 +815,8 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
          FunctorTeamShmemSize<FunctorType>::value(m_functor, m_team_size));
     m_scratch_size[0] = m_policy.scratch_size(0, m_team_size);
     m_scratch_size[1] = m_policy.scratch_size(1, m_team_size);
+    m_scratch_locks =
+        m_policy.space().impl_internal_space_instance()->m_scratch_locks;
 
     // Functor's reduce memory, team scan memory, and team shared memory depend
     // upon team size.
@@ -807,8 +829,10 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
               .impl_internal_space_instance()
               ->resize_team_scratch_space(
                   static_cast<std::int64_t>(m_scratch_size[1]) *
-                  (static_cast<std::int64_t>(Cuda::concurrency() /
-                                             (m_team_size * m_vector_size))));
+                  (std::min(
+                      static_cast<std::int64_t>(Cuda::concurrency() /
+                                                (m_team_size * m_vector_size)),
+                      static_cast<std::int64_t>(m_league_size))));
       m_scratch_ptr[1]  = scratch_ptr_id.first;
       m_scratch_pool_id = scratch_ptr_id.second;
     }
@@ -1221,7 +1245,9 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
                               typename ViewType::memory_space>::accessible),
         m_scratch_space(nullptr),
         m_scratch_flags(nullptr),
-        m_unified_space(nullptr) {}
+        m_unified_space(nullptr) {
+    check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor);
+  }
 
   ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy,
                  const ReducerType& reducer)
@@ -1239,7 +1265,9 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
                                   memory_space>::accessible),
         m_scratch_space(nullptr),
         m_scratch_flags(nullptr),
-        m_unified_space(nullptr) {}
+        m_unified_space(nullptr) {
+    check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor);
+  }
 };
 
 // MDRangePolicy impl
@@ -1456,7 +1484,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
   }
 
   inline void execute() {
-    const int nwork = m_policy.m_num_tiles;
+    const auto nwork = m_policy.m_num_tiles;
     if (nwork) {
       int block_size = m_policy.m_prod_tile_dims;
       // CONSTRAINT: Algorithm requires block_size >= product of tile dimensions
@@ -1540,7 +1568,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
                               typename ViewType::memory_space>::accessible),
         m_scratch_space(nullptr),
         m_scratch_flags(nullptr),
-        m_unified_space(nullptr) {}
+        m_unified_space(nullptr) {
+    check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor);
+  }
 
   ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy,
                  const ReducerType& reducer)
@@ -1554,7 +1584,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
                                   memory_space>::accessible),
         m_scratch_space(nullptr),
         m_scratch_flags(nullptr),
-        m_unified_space(nullptr) {}
+        m_unified_space(nullptr) {
+    check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor);
+  }
 };
 
 //----------------------------------------------------------------------------
@@ -1623,6 +1655,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
   void* m_scratch_ptr[2];
   int m_scratch_size[2];
   int m_scratch_pool_id = -1;
+  int32_t* m_scratch_locks;
   const size_type m_league_size;
   int m_team_size;
   const size_type m_vector_size;
@@ -1647,39 +1680,14 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
   __device__ inline void operator()() const {
     int64_t threadid = 0;
     if (m_scratch_size[1] > 0) {
-      __shared__ int64_t base_thread_id;
-      if (threadIdx.x == 0 && threadIdx.y == 0) {
-        threadid = (blockIdx.x * blockDim.z + threadIdx.z) %
-                   (Kokkos::Impl::g_device_cuda_lock_arrays.n /
-                    (blockDim.x * blockDim.y));
-        threadid *= blockDim.x * blockDim.y;
-        int done = 0;
-        while (!done) {
-          done =
-              (0 ==
-               atomicCAS(
-                   &Kokkos::Impl::g_device_cuda_lock_arrays.scratch[threadid],
-                   0, 1));
-          if (!done) {
-            threadid += blockDim.x * blockDim.y;
-            if (int64_t(threadid + blockDim.x * blockDim.y) >=
-                int64_t(Kokkos::Impl::g_device_cuda_lock_arrays.n))
-              threadid = 0;
-          }
-        }
-        base_thread_id = threadid;
-      }
-      __syncthreads();
-      threadid = base_thread_id;
+      threadid = cuda_get_scratch_index(m_league_size, m_scratch_locks);
     }
 
     run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType,
                            DummySHMEMReductionType>::select(1, 1.0),
         threadid);
     if (m_scratch_size[1] > 0) {
-      __syncthreads();
-      if (threadIdx.x == 0 && threadIdx.y == 0)
-        Kokkos::Impl::g_device_cuda_lock_arrays.scratch[threadid] = 0;
+      cuda_release_scratch_index(m_scratch_locks, threadid);
     }
   }
 
@@ -1822,7 +1830,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
   }
 
   inline void execute() {
-    const int nwork            = m_league_size * m_team_size;
+    const bool is_empty_range  = m_league_size == 0 || m_team_size == 0;
     const bool need_device_set = ReduceFunctorHasInit<FunctorType>::value ||
                                  ReduceFunctorHasFinal<FunctorType>::value ||
                                  !m_result_ptr_host_accessible ||
@@ -1830,7 +1838,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
                                  Policy::is_graph_kernel::value ||
 #endif
                                  !std::is_same<ReducerType, InvalidType>::value;
-    if ((nwork > 0) || need_device_set) {
+    if (!is_empty_range || need_device_set) {
       const int block_count =
           UseShflReduction ? std::min(m_league_size, size_type(1024 * 32))
                            : std::min(int(m_league_size), m_team_size);
@@ -1849,7 +1857,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
       dim3 grid(block_count, 1, 1);
       const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size;
 
-      if ((nwork == 0)
+      if (is_empty_range
 #ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION
           || Kokkos::Impl::CudaInternal::cuda_use_serial_execution()
 #endif
@@ -1940,6 +1948,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         FunctorTeamShmemSize<FunctorType>::value(arg_functor, m_team_size);
     m_scratch_size[0] = m_shmem_size;
     m_scratch_size[1] = m_policy.scratch_size(1, m_team_size);
+    m_scratch_locks =
+        m_policy.space().impl_internal_space_instance()->m_scratch_locks;
     if (m_team_size <= 0) {
       m_scratch_ptr[1] = nullptr;
     } else {
@@ -1948,8 +1958,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
               .impl_internal_space_instance()
               ->resize_team_scratch_space(
                   static_cast<std::int64_t>(m_scratch_size[1]) *
-                  (static_cast<std::int64_t>(Cuda::concurrency() /
-                                             (m_team_size * m_vector_size))));
+                  (std::min(
+                      static_cast<std::int64_t>(Cuda::concurrency() /
+                                                (m_team_size * m_vector_size)),
+                      static_cast<std::int64_t>(m_league_size))));
       m_scratch_ptr[1]  = scratch_ptr_id.first;
       m_scratch_pool_id = scratch_ptr_id.second;
     }
@@ -2044,6 +2056,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         FunctorTeamShmemSize<FunctorType>::value(arg_functor, m_team_size);
     m_scratch_size[0] = m_shmem_size;
     m_scratch_size[1] = m_policy.scratch_size(1, m_team_size);
+    m_scratch_locks =
+        m_policy.space().impl_internal_space_instance()->m_scratch_locks;
     if (m_team_size <= 0) {
       m_scratch_ptr[1] = nullptr;
     } else {
@@ -2052,8 +2066,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
               .impl_internal_space_instance()
               ->resize_team_scratch_space(
                   static_cast<std::int64_t>(m_scratch_size[1]) *
-                  (static_cast<std::int64_t>(Cuda::concurrency() /
-                                             (m_team_size * m_vector_size))));
+                  (std::min(
+                      static_cast<std::int64_t>(Cuda::concurrency() /
+                                                (m_team_size * m_vector_size)),
+                      static_cast<std::int64_t>(m_league_size))));
       m_scratch_ptr[1]  = scratch_ptr_id.first;
       m_scratch_pool_id = scratch_ptr_id.second;
     }
@@ -2331,7 +2347,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> {
   }
 
   inline void execute() {
-    const int nwork = m_policy.end() - m_policy.begin();
+    const auto nwork = m_policy.end() - m_policy.begin();
     if (nwork) {
       enum { GridMaxComputeCapability_2x = 0x0ffff };
 
@@ -2618,7 +2634,7 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>,
   }
 
   inline void execute() {
-    const int nwork = m_policy.end() - m_policy.begin();
+    const auto nwork = m_policy.end() - m_policy.begin();
     if (nwork) {
       enum { GridMaxComputeCapability_2x = 0x0ffff };
 
@@ -2698,234 +2714,6 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>,
   }
 };
 
-}  // namespace Impl
-}  // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-
-namespace Impl {
-template <class FunctorType, class ExecPolicy, class ValueType,
-          class Tag = typename ExecPolicy::work_tag>
-struct CudaFunctorAdapter {
-  const FunctorType f;
-  using value_type = ValueType;
-  CudaFunctorAdapter(const FunctorType& f_) : f(f_) {}
-
-  __device__ inline void operator()(typename ExecPolicy::work_tag,
-                                    const typename ExecPolicy::member_type& i,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals third argument
-    // type of FunctorType::operator()
-    f(typename ExecPolicy::work_tag(), i, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::work_tag,
-                                    const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals third argument
-    // type of FunctorType::operator()
-    f(typename ExecPolicy::work_tag(), i, j, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::work_tag,
-                                    const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    const typename ExecPolicy::member_type& k,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals third argument
-    // type of FunctorType::operator()
-    f(typename ExecPolicy::work_tag(), i, j, k, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::work_tag,
-                                    const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    const typename ExecPolicy::member_type& k,
-                                    const typename ExecPolicy::member_type& l,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals third argument
-    // type of FunctorType::operator()
-    f(typename ExecPolicy::work_tag(), i, j, k, l, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::work_tag,
-                                    const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    const typename ExecPolicy::member_type& k,
-                                    const typename ExecPolicy::member_type& l,
-                                    const typename ExecPolicy::member_type& m,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals third argument
-    // type of FunctorType::operator()
-    f(typename ExecPolicy::work_tag(), i, j, k, l, m, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::work_tag,
-                                    const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    const typename ExecPolicy::member_type& k,
-                                    const typename ExecPolicy::member_type& l,
-                                    const typename ExecPolicy::member_type& m,
-                                    const typename ExecPolicy::member_type& n,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals third argument
-    // type of FunctorType::operator()
-    f(typename ExecPolicy::work_tag(), i, j, k, l, m, n, val);
-  }
-};
-
-template <class FunctorType, class ExecPolicy, class ValueType>
-struct CudaFunctorAdapter<FunctorType, ExecPolicy, ValueType, void> {
-  const FunctorType f;
-  using value_type = ValueType;
-  CudaFunctorAdapter(const FunctorType& f_) : f(f_) {}
-
-  __device__ inline void operator()(const typename ExecPolicy::member_type& i,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, val);
-  }
-
-  __device__ inline void operator()(const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, val);
-  }
-
-  __device__ inline void operator()(const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    const typename ExecPolicy::member_type& k,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, k, val);
-  }
-
-  __device__ inline void operator()(const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    const typename ExecPolicy::member_type& k,
-                                    const typename ExecPolicy::member_type& l,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, k, l, val);
-  }
-
-  __device__ inline void operator()(const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    const typename ExecPolicy::member_type& k,
-                                    const typename ExecPolicy::member_type& l,
-                                    const typename ExecPolicy::member_type& m,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, k, l, m, val);
-  }
-
-  __device__ inline void operator()(const typename ExecPolicy::member_type& i,
-                                    const typename ExecPolicy::member_type& j,
-                                    const typename ExecPolicy::member_type& k,
-                                    const typename ExecPolicy::member_type& l,
-                                    const typename ExecPolicy::member_type& m,
-                                    const typename ExecPolicy::member_type& n,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, k, l, m, n, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::member_type& i,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::member_type& i,
-                                    typename ExecPolicy::member_type& j,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::member_type& i,
-                                    typename ExecPolicy::member_type& j,
-                                    typename ExecPolicy::member_type& k,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, k, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::member_type& i,
-                                    typename ExecPolicy::member_type& j,
-                                    typename ExecPolicy::member_type& k,
-                                    typename ExecPolicy::member_type& l,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, k, l, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::member_type& i,
-                                    typename ExecPolicy::member_type& j,
-                                    typename ExecPolicy::member_type& k,
-                                    typename ExecPolicy::member_type& l,
-                                    typename ExecPolicy::member_type& m,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, k, l, m, val);
-  }
-
-  __device__ inline void operator()(typename ExecPolicy::member_type& i,
-                                    typename ExecPolicy::member_type& j,
-                                    typename ExecPolicy::member_type& k,
-                                    typename ExecPolicy::member_type& l,
-                                    typename ExecPolicy::member_type& m,
-                                    typename ExecPolicy::member_type& n,
-                                    ValueType& val) const {
-    // Insert Static Assert with decltype on ValueType equals second argument
-    // type of FunctorType::operator()
-    f(i, j, k, l, m, n, val);
-  }
-};
-
-template <class FunctorType, class ResultType, class Tag,
-          bool Enable = IsNonTrivialReduceFunctor<FunctorType>::value>
-struct FunctorReferenceType {
-  using reference_type = ResultType&;
-};
-
-template <class FunctorType, class ResultType, class Tag>
-struct FunctorReferenceType<FunctorType, ResultType, Tag, true> {
-  using reference_type =
-      typename Kokkos::Impl::FunctorValueTraits<FunctorType,
-                                                Tag>::reference_type;
-};
-
-template <class FunctorTypeIn, class ExecPolicy, class ValueType>
-struct ParallelReduceFunctorType<FunctorTypeIn, ExecPolicy, ValueType, Cuda> {
-  enum {
-    FunctorHasValueType = IsNonTrivialReduceFunctor<FunctorTypeIn>::value
-  };
-  using functor_type = typename Kokkos::Impl::if_c<
-      FunctorHasValueType, FunctorTypeIn,
-      Impl::CudaFunctorAdapter<FunctorTypeIn, ExecPolicy, ValueType>>::type;
-  static functor_type functor(const FunctorTypeIn& functor_in) {
-    return Impl::if_c<FunctorHasValueType, FunctorTypeIn, functor_type>::select(
-        functor_in, functor_type(functor_in));
-  }
-};
-
 }  // namespace Impl
 
 }  // namespace Kokkos
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
index e5b05bcc64f183ef98248a239e6b305fae9410ea..30f5221da4c40175a84352e3afbf0cf29ac79e21 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
@@ -192,27 +192,27 @@ __device__ bool cuda_inter_block_reduction(
         if (id + 1 < int(gridDim.x)) join(value, tmp);
       }
       unsigned int mask = __activemask();
-      int active        = __ballot_sync(mask, 1);
+      __syncwarp(mask);
       if (int(blockDim.x * blockDim.y) > 2) {
         value_type tmp = Kokkos::shfl_down(value, 2, 32);
         if (id + 2 < int(gridDim.x)) join(value, tmp);
       }
-      active += __ballot_sync(mask, 1);
+      __syncwarp(mask);
       if (int(blockDim.x * blockDim.y) > 4) {
         value_type tmp = Kokkos::shfl_down(value, 4, 32);
         if (id + 4 < int(gridDim.x)) join(value, tmp);
       }
-      active += __ballot_sync(mask, 1);
+      __syncwarp(mask);
       if (int(blockDim.x * blockDim.y) > 8) {
         value_type tmp = Kokkos::shfl_down(value, 8, 32);
         if (id + 8 < int(gridDim.x)) join(value, tmp);
       }
-      active += __ballot_sync(mask, 1);
+      __syncwarp(mask);
       if (int(blockDim.x * blockDim.y) > 16) {
         value_type tmp = Kokkos::shfl_down(value, 16, 32);
         if (id + 16 < int(gridDim.x)) join(value, tmp);
       }
-      active += __ballot_sync(mask, 1);
+      __syncwarp(mask);
     }
   }
   // The last block has in its thread=0 the global reduction value through
@@ -369,27 +369,27 @@ __device__ inline
         if (id + 1 < int(gridDim.x)) reducer.join(value, tmp);
       }
       unsigned int mask = __activemask();
-      int active        = __ballot_sync(mask, 1);
+      __syncwarp(mask);
       if (int(blockDim.x * blockDim.y) > 2) {
         value_type tmp = Kokkos::shfl_down(value, 2, 32);
         if (id + 2 < int(gridDim.x)) reducer.join(value, tmp);
       }
-      active += __ballot_sync(mask, 1);
+      __syncwarp(mask);
       if (int(blockDim.x * blockDim.y) > 4) {
         value_type tmp = Kokkos::shfl_down(value, 4, 32);
         if (id + 4 < int(gridDim.x)) reducer.join(value, tmp);
       }
-      active += __ballot_sync(mask, 1);
+      __syncwarp(mask);
       if (int(blockDim.x * blockDim.y) > 8) {
         value_type tmp = Kokkos::shfl_down(value, 8, 32);
         if (id + 8 < int(gridDim.x)) reducer.join(value, tmp);
       }
-      active += __ballot_sync(mask, 1);
+      __syncwarp(mask);
       if (int(blockDim.x * blockDim.y) > 16) {
         value_type tmp = Kokkos::shfl_down(value, 16, 32);
         if (id + 16 < int(gridDim.x)) reducer.join(value, tmp);
       }
-      active += __ballot_sync(mask, 1);
+      __syncwarp(mask);
     }
   }
 
@@ -897,6 +897,23 @@ inline unsigned cuda_single_inter_block_reduce_scan_shmem(
          Impl::FunctorValueTraits<FunctorType, ArgTag>::value_size(functor);
 }
 
+template <typename WorkTag, typename Policy, typename FunctorType>
+inline void check_reduced_view_shmem_size(const Policy& policy,
+                                          const FunctorType& functor) {
+  size_t minBlockSize = CudaTraits::WarpSize * 1;
+  unsigned reqShmemSize =
+      cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, WorkTag>(
+          functor, minBlockSize);
+  size_t maxShmemPerBlock =
+      policy.space().impl_internal_space_instance()->m_maxShmemPerBlock;
+
+  if (reqShmemSize > maxShmemPerBlock) {
+    Kokkos::Impl::throw_runtime_exception(
+        "Kokkos::Impl::ParallelReduce< Cuda > requested too much L0 scratch "
+        "memory");
+  }
+}
+
 }  // namespace Impl
 }  // namespace Kokkos
 
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp
index f846c06ce573fcd13f797bbaaa9375af4ce8ad33..6da2cad011bd8681034cfdfc650b74a5c92febdf 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp
@@ -51,24 +51,43 @@
 #include <Kokkos_CudaSpace.hpp>
 #include <Kokkos_UniqueToken.hpp>
 #include <impl/Kokkos_SharedAlloc.hpp>
-#include <impl/Kokkos_ConcurrentBitset.hpp>
 
 namespace Kokkos {
-namespace Experimental {
 
+namespace Impl {
+Kokkos::View<uint32_t*, Kokkos::CudaSpace> cuda_global_unique_token_locks(
+    bool deallocate = false);
+}
+
+namespace Experimental {
 // both global and instance Unique Tokens are implemented in the same way
+// the global version has one shared static lock array underneath
+// but it can't be a static member variable since we need to acces it on device
+// and we share the implementation with the instance version
 template <>
 class UniqueToken<Cuda, UniqueTokenScope::Global> {
  protected:
-  uint32_t volatile* m_buffer;
-  uint32_t m_count;
+  Kokkos::View<uint32_t*, Kokkos::CudaSpace> m_locks;
 
  public:
   using execution_space = Cuda;
   using size_type       = int32_t;
 
-  explicit UniqueToken(execution_space const& = execution_space());
+  explicit UniqueToken(execution_space const& = Cuda())
+      : m_locks(Kokkos::Impl::cuda_global_unique_token_locks()) {}
+
+ protected:
+  // These are constructors for the Instance version
+  UniqueToken(size_type max_size) {
+    m_locks = Kokkos::View<uint32_t*, Kokkos::CudaSpace>(
+        "Kokkos::UniqueToken::m_locks", max_size);
+  }
+  UniqueToken(size_type max_size, execution_space const& exec) {
+    m_locks = Kokkos::View<uint32_t*, Kokkos::CudaSpace>(
+        Kokkos::view_alloc(exec, "Kokkos::UniqueToken::m_locks"), max_size);
+  }
 
+ public:
   KOKKOS_DEFAULTED_FUNCTION
   UniqueToken(const UniqueToken&) = default;
 
@@ -83,47 +102,84 @@ class UniqueToken<Cuda, UniqueTokenScope::Global> {
 
   /// \brief upper bound for acquired values, i.e. 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
-  size_type size() const noexcept { return m_count; }
+  size_type size() const noexcept { return m_locks.extent(0); }
+
+ private:
+  __device__ size_type impl_acquire() const {
+    int idx = blockIdx.x * (blockDim.x * blockDim.y) +
+              threadIdx.y * blockDim.x + threadIdx.x;
+    idx = idx % size();
+#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_PASCAL) || \
+    defined(KOKKOS_ARCH_MAXWELL)
+    unsigned int mask        = __activemask();
+    unsigned int active      = __ballot_sync(mask, 1);
+    unsigned int done_active = 0;
+    bool done                = false;
+    while (active != done_active) {
+      if (!done) {
+        if (Kokkos::atomic_compare_exchange(&m_locks(idx), 0, 1) == 0) {
+          done = true;
+        } else {
+          idx += blockDim.y * blockDim.x + 1;
+          idx = idx % size();
+        }
+      }
+      done_active = __ballot_sync(mask, done ? 1 : 0);
+    }
+#else
+    while (Kokkos::atomic_compare_exchange(&m_locks(idx), 0, 1) == 1) {
+      idx += blockDim.y * blockDim.x + 1;
+      idx = idx % size();
+    }
+#endif
+// Make sure that all writes in the previous lock owner are visible to me
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    desul::atomic_thread_fence(desul::MemoryOrderAcquire(),
+                               desul::MemoryScopeDevice());
+#else
+    Kokkos::memory_fence();
+#endif
+    return idx;
+  }
 
+ public:
   /// \brief acquire value such that 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   size_type acquire() const {
-    const Kokkos::pair<int, int> result =
-        Kokkos::Impl::concurrent_bitset::acquire_bounded(
-            m_buffer, m_count, Kokkos::Impl::clock_tic() % m_count);
-
-    if (result.first < 0) {
-      Kokkos::abort(
-          "UniqueToken<Cuda> failure to acquire tokens, no tokens available");
-    }
-
-    return result.first;
+    KOKKOS_IF_ON_DEVICE(return impl_acquire();)
+    KOKKOS_IF_ON_HOST(return 0;)
   }
 
   /// \brief release an acquired value
   KOKKOS_INLINE_FUNCTION
-  void release(size_type i) const noexcept {
-    Kokkos::Impl::concurrent_bitset::release(m_buffer, i);
+  void release(size_type idx) const noexcept {
+// Make sure my writes are visible to the next lock owner
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    desul::atomic_thread_fence(desul::MemoryOrderRelease(),
+                               desul::MemoryScopeDevice());
+#else
+    Kokkos::memory_fence();
+#endif
+    (void)Kokkos::atomic_exchange(&m_locks(idx), 0);
   }
 };
 
 template <>
 class UniqueToken<Cuda, UniqueTokenScope::Instance>
     : public UniqueToken<Cuda, UniqueTokenScope::Global> {
- private:
-  Kokkos::View<uint32_t*, ::Kokkos::CudaSpace> m_buffer_view;
-
  public:
-  explicit UniqueToken(execution_space const& arg = execution_space())
-      : UniqueToken<Cuda, UniqueTokenScope::Global>(arg) {}
-
-  UniqueToken(size_type max_size, execution_space const& = execution_space())
-      : m_buffer_view(
-            "UniqueToken::m_buffer_view",
-            ::Kokkos::Impl::concurrent_bitset::buffer_bound(max_size)) {
-    m_buffer = m_buffer_view.data();
-    m_count  = max_size;
-  }
+  // The instance version will forward to protected constructor which creates
+  // a lock array per instance
+  UniqueToken()
+      : UniqueToken<Cuda, UniqueTokenScope::Global>(
+            Kokkos::Cuda().concurrency()) {}
+  explicit UniqueToken(execution_space const& arg)
+      : UniqueToken<Cuda, UniqueTokenScope::Global>(
+            Kokkos::Cuda().concurrency(), arg) {}
+  explicit UniqueToken(size_type max_size)
+      : UniqueToken<Cuda, UniqueTokenScope::Global>(max_size) {}
+  UniqueToken(size_type max_size, execution_space const& arg)
+      : UniqueToken<Cuda, UniqueTokenScope::Global>(max_size, arg) {}
 };
 
 }  // namespace Experimental
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
index c55956ede9665bc3005fa570d7ac120404a54d49..61563a01007f68547a5a4432f36f04ada6fa67f2 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
@@ -252,32 +252,45 @@ class ViewDataHandle<
                             track_type const& arg_tracker) {
     if (arg_data_ptr == nullptr) return handle_type();
 
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    // Assignment of texture = non-texture requires creation of a texture object
-    // which can only occur on the host.  In addition, 'get_record' is only
-    // valid if called in a host execution space
-
-    using memory_space = typename Traits::memory_space;
-    using record = typename Impl::SharedAllocationRecord<memory_space, void>;
+#if !defined(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC)
+    KOKKOS_IF_ON_HOST((
+        // Assignment of texture = non-texture requires creation of a texture
+        // object which can only occur on the host.  In addition, 'get_record'
+        // is only valid if called in a host execution space
 
-    record* const r = arg_tracker.template get_record<memory_space>();
+        using memory_space = typename Traits::memory_space;
+        using record =
+            typename Impl::SharedAllocationRecord<memory_space, void>;
 
-#if !defined(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC)
-    if (0 == r) {
-      Kokkos::abort(
-          "Cuda const random access View using Cuda texture memory requires "
-          "Kokkos to allocate the View's memory");
-    }
-#endif
+        record* const r = arg_tracker.template get_record<memory_space>();
 
-    return handle_type(arg_data_ptr, r);
+        if (0 == r) {
+          Kokkos::abort(
+              "Cuda const random access View using Cuda texture memory "
+              "requires "
+              "Kokkos to allocate the View's memory");
+        }
 
+        return handle_type(arg_data_ptr, r);))
 #else
-    (void)arg_tracker;
-    Kokkos::Impl::cuda_abort(
-        "Cannot create Cuda texture object from within a Cuda kernel");
-    return handle_type();
+    KOKKOS_IF_ON_HOST((
+        // Assignment of texture = non-texture requires creation of a texture
+        // object which can only occur on the host.  In addition, 'get_record'
+        // is only valid if called in a host execution space
+
+        using memory_space = typename Traits::memory_space;
+        using record =
+            typename Impl::SharedAllocationRecord<memory_space, void>;
+
+        record* const r = arg_tracker.template get_record<memory_space>();
+
+        return handle_type(arg_data_ptr, r);))
 #endif
+
+    KOKKOS_IF_ON_DEVICE(
+        ((void)arg_tracker; Kokkos::Impl::cuda_abort(
+             "Cannot create Cuda texture object from within a Cuda kernel");
+         return handle_type();))
   }
 };
 
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp
index 98b457d8cf52ddcd69ededd5ba3cc75d09509b49..59aac2b5269de2eae3a3861d8289de479c720e04 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp
@@ -53,7 +53,9 @@
 namespace Kokkos {
 namespace Impl {
 
-[[noreturn]] __device__ __attribute__((noinline)) void hip_abort(
+// The two keywords below are not contradictory. `noinline` is a
+// directive to the optimizer.
+[[noreturn]] __device__ __attribute__((noinline)) inline void hip_abort(
     char const *msg) {
 #ifdef NDEBUG
   (void)msg;
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp
index 7eb3e1e9f70fe4cf724e3b766e38ebc16b3c7c8f..10d9bc0150a2abe477f502c3ef05d4dd7dd1aa9f 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp
@@ -101,6 +101,10 @@ template <typename DriverType, typename LaunchBounds = Kokkos::LaunchBounds<>,
           HIPLaunchMechanism LaunchMechanism =
               DeduceHIPLaunchMechanism<DriverType>::launch_mechanism>
 hipFuncAttributes get_hip_func_attributes_impl() {
+#ifndef KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
+  return HIPParallelLaunch<DriverType, LaunchBounds,
+                           LaunchMechanism>::get_hip_func_attributes();
+#else
   // FIXME_HIP - could be if constexpr for c++17
   if (!HIPParallelLaunch<DriverType, LaunchBounds,
                          LaunchMechanism>::default_launchbounds()) {
@@ -129,6 +133,7 @@ hipFuncAttributes get_hip_func_attributes_impl() {
       }
     }
   }
+#endif
 }
 
 // Given an initial block-size limitation based on register usage
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Half_Conversion.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Half_Conversion.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d04fe2051a435df04691d84392b0b460f5ce1747
--- /dev/null
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Half_Conversion.hpp
@@ -0,0 +1,248 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_HIP_HALF_HPP_
+#define KOKKOS_HIP_HALF_HPP_
+
+#ifdef KOKKOS_IMPL_HALF_TYPE_DEFINED
+
+#include <Kokkos_Half.hpp>
+#include <Kokkos_NumericTraits.hpp>  // reduction_identity
+
+namespace Kokkos {
+namespace Experimental {
+
+/************************** half conversions **********************************/
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(half_t val) { return val; }
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(float val) { return half_t(__float2half(val)); }
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(bool val) { return cast_to_half(static_cast<float>(val)); }
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(double val) {
+  return half_t(__float2half(static_cast<float>(val)));
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(short val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return half_t(__short2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned short val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return half_t(__ushort2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(int val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return half_t(__int2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned int val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return half_t(__uint2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long long val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return half_t(__ll2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long long val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return half_t(__ull2half_rn(val));
+#else
+  return half_t(__float2half(static_cast<float>(val)));
+#endif
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long val) {
+  return cast_to_half(static_cast<long long>(val));
+}
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long val) {
+  return cast_to_half(static_cast<unsigned long long>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
+cast_from_half(half_t val) {
+  return __half2float(half_t::impl_type(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, bool>::value, T>
+cast_from_half(half_t val) {
+  return static_cast<T>(cast_from_half<float>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
+cast_from_half(half_t val) {
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
+cast_from_half(half_t val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return __half2short_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
+    cast_from_half(half_t val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return __half2ushort_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
+cast_from_half(half_t val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return __half2int_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned>::value, T>
+cast_from_half(half_t val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return __half2uint_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
+cast_from_half(half_t val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return __half2ll_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
+    cast_from_half(half_t val) {
+#ifdef __HIP_DEVICE_COMPILE__
+  return __half2ull_rz(half_t::impl_type(val));
+#else
+  return static_cast<T>(__half2float(half_t::impl_type(val)));
+#endif
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
+cast_from_half(half_t val) {
+  return static_cast<T>(cast_from_half<long long>(val));
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
+    cast_from_half(half_t val) {
+  return static_cast<T>(cast_from_half<unsigned long long>(val));
+}
+}  // namespace Experimental
+
+// use float as the return type for sum and prod since hip_fp16.h
+// has no constexpr functions for casting to __half
+template <>
+struct reduction_identity<Kokkos::Experimental::half_t> {
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum() noexcept {
+    return 0.0F;
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() noexcept {
+    return 1.0F;
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() noexcept {
+    return -65504.0F;
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float min() noexcept {
+    return 65504.0F;
+  }
+};
+
+}  // namespace Kokkos
+#endif
+#endif
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Half_Impl_Type.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Half_Impl_Type.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..728a229c1a825a366b012de8f474644108587180
--- /dev/null
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Half_Impl_Type.hpp
@@ -0,0 +1,63 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_HIP_HALF_IMPL_TYPE_HPP_
+#define KOKKOS_HIP_HALF_IMPL_TYPE_HPP_
+
+#include <hip/hip_fp16.h>
+
+#ifndef KOKKOS_IMPL_HALF_TYPE_DEFINED
+// Make sure no one else tries to define half_t
+#define KOKKOS_IMPL_HALF_TYPE_DEFINED
+#define KOKKOS_IMPL_HIP_HALF_TYPE_DEFINED
+
+namespace Kokkos {
+namespace Impl {
+struct half_impl_t {
+  using type = __half;
+};
+}  // namespace Impl
+}  // namespace Kokkos
+#endif  // KOKKOS_IMPL_HALF_TYPE_DEFINED
+#endif  // KOKKOS_ENABLE_HIP
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp
index 336ac8c6987c6538836f49792c41fd5520d0af8a..4a6a3ba99ebd7e6850cf117a787f33cf5d1d49f2 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp
@@ -62,6 +62,29 @@
 #include <string>
 #include <vector>
 
+#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
+__device__ __constant__ unsigned long kokkos_impl_hip_constant_memory_buffer
+    [Kokkos::Experimental::Impl::HIPTraits::ConstantMemoryUsage /
+     sizeof(unsigned long)];
+#endif
+
+namespace Kokkos {
+namespace Impl {
+Kokkos::View<uint32_t *, Kokkos::Experimental::HIPSpace>
+hip_global_unique_token_locks(bool deallocate) {
+  static Kokkos::View<uint32_t *, Kokkos::Experimental::HIPSpace> locks =
+      Kokkos::View<uint32_t *, Kokkos::Experimental::HIPSpace>();
+  if (!deallocate && locks.extent(0) == 0)
+    locks = Kokkos::View<uint32_t *, Kokkos::Experimental::HIPSpace>(
+        "Kokkos::UniqueToken<HIP>::m_locks",
+        Kokkos::Experimental::HIP().concurrency());
+  if (deallocate)
+    locks = Kokkos::View<uint32_t *, Kokkos::Experimental::HIPSpace>();
+  return locks;
+}
+}  // namespace Impl
+}  // namespace Kokkos
+
 namespace Kokkos {
 namespace Experimental {
 namespace {
@@ -97,6 +120,7 @@ const HIPInternalDevices &HIPInternalDevices::singleton() {
 
 unsigned long *Impl::HIPInternal::constantMemHostStaging = nullptr;
 hipEvent_t Impl::HIPInternal::constantMemReusable        = nullptr;
+std::mutex Impl::HIPInternal::constantMemMutex;
 
 namespace Impl {
 
@@ -128,25 +152,24 @@ void HIPInternal::print_configuration(std::ostream &s) const {
 //----------------------------------------------------------------------------
 
 HIPInternal::~HIPInternal() {
-  if (m_scratchSpace || m_scratchFlags || m_scratchConcurrentBitset) {
+  if (m_scratchSpace || m_scratchFlags) {
     std::cerr << "Kokkos::Experimental::HIP ERROR: Failed to call "
                  "Kokkos::Experimental::HIP::finalize()"
               << std::endl;
     std::cerr.flush();
   }
 
-  m_hipDev                  = -1;
-  m_hipArch                 = -1;
-  m_multiProcCount          = 0;
-  m_maxWarpCount            = 0;
-  m_maxSharedWords          = 0;
-  m_maxShmemPerBlock        = 0;
-  m_scratchSpaceCount       = 0;
-  m_scratchFlagsCount       = 0;
-  m_scratchSpace            = nullptr;
-  m_scratchFlags            = nullptr;
-  m_scratchConcurrentBitset = nullptr;
-  m_stream                  = nullptr;
+  m_hipDev            = -1;
+  m_hipArch           = -1;
+  m_multiProcCount    = 0;
+  m_maxWarpCount      = 0;
+  m_maxSharedWords    = 0;
+  m_maxShmemPerBlock  = 0;
+  m_scratchSpaceCount = 0;
+  m_scratchFlagsCount = 0;
+  m_scratchSpace      = nullptr;
+  m_scratchFlags      = nullptr;
+  m_stream            = nullptr;
 }
 
 int HIPInternal::verify_is_initialized(const char *const label) const {
@@ -235,7 +258,9 @@ void HIPInternal::initialize(int hip_device_id, hipStream_t stream,
 
     //----------------------------------
     // Maximum number of blocks
-    m_maxBlock = hipProp.maxGridSize[0];
+    m_maxBlock[0] = hipProp.maxGridSize[0];
+    m_maxBlock[1] = hipProp.maxGridSize[1];
+    m_maxBlock[2] = hipProp.maxGridSize[2];
 
     // theoretically, we can get 40 WF's / CU, but only can sustain 32
     // see
@@ -280,11 +305,6 @@ void HIPInternal::initialize(int hip_device_id, hipStream_t stream,
                                          sizeof(uint32_t) * buffer_bound);
 
       Record::increment(r);
-
-      m_scratchConcurrentBitset = reinterpret_cast<uint32_t *>(r->data());
-
-      KOKKOS_IMPL_HIP_SAFE_CALL(hipMemset(m_scratchConcurrentBitset, 0,
-                                          sizeof(uint32_t) * buffer_bound));
     }
     //----------------------------------
 
@@ -314,6 +334,11 @@ void HIPInternal::initialize(int hip_device_id, hipStream_t stream,
 
     KOKKOS_IMPL_HIP_SAFE_CALL(hipEventCreate(&constantMemReusable));
   }
+
+  KOKKOS_IMPL_HIP_SAFE_CALL(
+      hipMalloc(&m_scratch_locks, sizeof(int32_t) * HIP::concurrency()));
+  KOKKOS_IMPL_HIP_SAFE_CALL(
+      hipMemset(m_scratch_locks, 0, sizeof(int32_t) * HIP::concurrency()));
 }
 
 //----------------------------------------------------------------------------
@@ -323,7 +348,7 @@ using ScratchGrain =
 enum { sizeScratchGrain = sizeof(ScratchGrain) };
 
 Kokkos::Experimental::HIP::size_type *HIPInternal::scratch_space(
-    const Kokkos::Experimental::HIP::size_type size) {
+    const std::size_t size) {
   if (verify_is_initialized("scratch_space") &&
       m_scratchSpaceCount * sizeScratchGrain < size) {
     m_scratchSpaceCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
@@ -347,7 +372,7 @@ Kokkos::Experimental::HIP::size_type *HIPInternal::scratch_space(
 }
 
 Kokkos::Experimental::HIP::size_type *HIPInternal::scratch_flags(
-    const Kokkos::Experimental::HIP::size_type size) {
+    const std::size_t size) {
   if (verify_is_initialized("scratch_flags") &&
       m_scratchFlagsCount * sizeScratchGrain < size) {
     m_scratchFlagsCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
@@ -394,13 +419,16 @@ void *HIPInternal::resize_team_scratch_space(std::int64_t bytes,
 void HIPInternal::finalize() {
   this->fence("Kokkos::HIPInternal::finalize: fence on finalization");
   was_finalized = true;
+
   if (nullptr != m_scratchSpace || nullptr != m_scratchFlags) {
+    if (this == &singleton())
+      (void)Kokkos::Impl::hip_global_unique_token_locks(true);
+
     using RecordHIP =
         Kokkos::Impl::SharedAllocationRecord<Kokkos::Experimental::HIPSpace>;
 
     RecordHIP::decrement(RecordHIP::get_record(m_scratchFlags));
     RecordHIP::decrement(RecordHIP::get_record(m_scratchSpace));
-    RecordHIP::decrement(RecordHIP::get_record(m_scratchConcurrentBitset));
 
     if (m_team_scratch_current_size > 0)
       Kokkos::kokkos_free<Kokkos::Experimental::HIPSpace>(m_team_scratch_ptr);
@@ -412,17 +440,19 @@ void HIPInternal::finalize() {
     m_hipArch                   = -1;
     m_multiProcCount            = 0;
     m_maxWarpCount              = 0;
-    m_maxBlock                  = 0;
+    m_maxBlock                  = {0, 0, 0};
     m_maxSharedWords            = 0;
     m_maxShmemPerBlock          = 0;
     m_scratchSpaceCount         = 0;
     m_scratchFlagsCount         = 0;
     m_scratchSpace              = nullptr;
     m_scratchFlags              = nullptr;
-    m_scratchConcurrentBitset   = nullptr;
     m_stream                    = nullptr;
     m_team_scratch_current_size = 0;
     m_team_scratch_ptr          = nullptr;
+
+    KOKKOS_IMPL_HIP_SAFE_CALL(hipFree(m_scratch_locks));
+    m_scratch_locks = nullptr;
   }
   if (nullptr != d_driverWorkArray) {
     KOKKOS_IMPL_HIP_SAFE_CALL(hipHostFree(d_driverWorkArray));
@@ -480,18 +510,19 @@ Kokkos::Experimental::HIP::size_type hip_internal_maximum_warp_count() {
   return HIPInternal::singleton().m_maxWarpCount;
 }
 
-Kokkos::Experimental::HIP::size_type hip_internal_maximum_grid_count() {
+std::array<Kokkos::Experimental::HIP::size_type, 3>
+hip_internal_maximum_grid_count() {
   return HIPInternal::singleton().m_maxBlock;
 }
 
 Kokkos::Experimental::HIP::size_type *hip_internal_scratch_space(
-    const Kokkos::Experimental::HIP::size_type size) {
-  return HIPInternal::singleton().scratch_space(size);
+    const HIP &instance, const std::size_t size) {
+  return instance.impl_internal_space_instance()->scratch_space(size);
 }
 
 Kokkos::Experimental::HIP::size_type *hip_internal_scratch_flags(
-    const Kokkos::Experimental::HIP::size_type size) {
-  return HIPInternal::singleton().scratch_flags(size);
+    const HIP &instance, const std::size_t size) {
+  return instance.impl_internal_space_instance()->scratch_flags(size);
 }
 
 }  // namespace Impl
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
index 967c6fdd4be63e11b00c6b7f97b8d3d0b27bbcfc..d8f265834a0e5103b322201a6a5ab8bb86568b7f 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
@@ -72,11 +72,13 @@ struct HIPTraits {
 //----------------------------------------------------------------------------
 
 HIP::size_type hip_internal_maximum_warp_count();
-HIP::size_type hip_internal_maximum_grid_count();
+std::array<HIP::size_type, 3> hip_internal_maximum_grid_count();
 HIP::size_type hip_internal_multiprocessor_count();
 
-HIP::size_type *hip_internal_scratch_space(const HIP::size_type size);
-HIP::size_type *hip_internal_scratch_flags(const HIP::size_type size);
+HIP::size_type *hip_internal_scratch_space(const HIP &instance,
+                                           const std::size_t size);
+HIP::size_type *hip_internal_scratch_flags(const HIP &instance,
+                                           const std::size_t size);
 
 //----------------------------------------------------------------------------
 
@@ -88,13 +90,13 @@ class HIPInternal {
  public:
   using size_type = ::Kokkos::Experimental::HIP::size_type;
 
-  int m_hipDev              = -1;
-  int m_hipArch             = -1;
-  unsigned m_multiProcCount = 0;
-  unsigned m_maxWarpCount   = 0;
-  unsigned m_maxBlock       = 0;
-  unsigned m_maxWavesPerCU  = 0;
-  unsigned m_maxSharedWords = 0;
+  int m_hipDev                        = -1;
+  int m_hipArch                       = -1;
+  unsigned m_multiProcCount           = 0;
+  unsigned m_maxWarpCount             = 0;
+  std::array<size_type, 3> m_maxBlock = {0, 0, 0};
+  unsigned m_maxWavesPerCU            = 0;
+  unsigned m_maxSharedWords           = 0;
   int m_regsPerSM;
   int m_shmemPerSM       = 0;
   int m_maxShmemPerBlock = 0;
@@ -115,12 +117,11 @@ class HIPInternal {
   mutable std::mutex m_mutexSharedMemory;
 
   // Scratch Spaces for Reductions
-  size_type m_scratchSpaceCount = 0;
-  size_type m_scratchFlagsCount = 0;
+  std::size_t m_scratchSpaceCount = 0;
+  std::size_t m_scratchFlagsCount = 0;
 
-  size_type *m_scratchSpace           = nullptr;
-  size_type *m_scratchFlags           = nullptr;
-  uint32_t *m_scratchConcurrentBitset = nullptr;
+  size_type *m_scratchSpace = nullptr;
+  size_type *m_scratchFlags = nullptr;
 
   hipDeviceProp_t m_deviceProp;
 
@@ -133,6 +134,7 @@ class HIPInternal {
   mutable int64_t m_team_scratch_current_size = 0;
   mutable void *m_team_scratch_ptr            = nullptr;
   mutable std::mutex m_team_scratch_mutex;
+  std::int32_t *m_scratch_locks;
 
   bool was_finalized = false;
 
@@ -140,6 +142,7 @@ class HIPInternal {
   // here will break once there are multiple devices though
   static unsigned long *constantMemHostStaging;
   static hipEvent_t constantMemReusable;
+  static std::mutex constantMemMutex;
 
   static HIPInternal &singleton();
 
@@ -164,8 +167,8 @@ class HIPInternal {
   HIPInternal() = default;
 
   // Resizing of reduction related scratch spaces
-  size_type *scratch_space(const size_type size);
-  size_type *scratch_flags(const size_type size);
+  size_type *scratch_space(const std::size_t size);
+  size_type *scratch_flags(const std::size_t size);
   uint32_t impl_get_instance_id() const noexcept;
   // Resizing of team level 1 scratch
   void *resize_team_scratch_space(std::int64_t bytes,
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp
index f209edf7c04ecc9b0001c4527e1bcebc0f24b256..384b7ffd67e488b727d47e0831da4e84e74b3fc3 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp
@@ -56,10 +56,15 @@
 
 // Must use global variable on the device with HIP-Clang
 #ifdef __HIP__
+#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
+__device__ __constant__ extern unsigned long
+    kokkos_impl_hip_constant_memory_buffer[];
+#else
 __device__ __constant__ unsigned long kokkos_impl_hip_constant_memory_buffer
     [Kokkos::Experimental::Impl::HIPTraits::ConstantMemoryUsage /
      sizeof(unsigned long)];
 #endif
+#endif
 
 namespace Kokkos {
 namespace Experimental {
@@ -430,8 +435,9 @@ struct HIPParallelLaunchKernelInvoker<DriverType, LaunchBounds,
                             dim3 const &block, int shmem,
                             HIPInternal const *hip_instance) {
     // Wait until the previous kernel that uses the constant buffer is done
+    std::lock_guard<std::mutex> lock(HIPInternal::constantMemMutex);
     KOKKOS_IMPL_HIP_SAFE_CALL(
-        hipEventSynchronize(hip_instance->constantMemReusable));
+        hipEventSynchronize(HIPInternal::constantMemReusable));
 
     // Copy functor (synchronously) to staging buffer in pinned host memory
     unsigned long *staging = hip_instance->constantMemHostStaging;
@@ -447,7 +453,7 @@ struct HIPParallelLaunchKernelInvoker<DriverType, LaunchBounds,
          get_kernel_func())<<<grid, block, shmem, hip_instance->m_stream>>>();
 
     // Record an event that says when the constant buffer can be reused
-    KOKKOS_IMPL_HIP_SAFE_CALL(hipEventRecord(hip_instance->constantMemReusable,
+    KOKKOS_IMPL_HIP_SAFE_CALL(hipEventRecord(HIPInternal::constantMemReusable,
                                              hip_instance->m_stream));
   }
 };
@@ -509,6 +515,10 @@ void hip_parallel_launch(const DriverType &driver, const dim3 &grid,
                          const dim3 &block, const int shmem,
                          const HIPInternal *hip_instance,
                          const bool prefer_shmem) {
+#ifndef KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
+  HIPParallelLaunch<DriverType, LaunchBounds, LaunchMechanism>(
+      driver, grid, block, shmem, hip_instance, prefer_shmem);
+#else
   // FIXME_HIP - could be if constexpr for c++17
   if (!HIPParallelLaunch<DriverType, LaunchBounds,
                          LaunchMechanism>::default_launchbounds()) {
@@ -532,6 +542,7 @@ void hip_parallel_launch(const DriverType &driver, const dim3 &grid,
                                          hip_instance, prefer_shmem);
     }
   }
+#endif
 }
 }  // namespace Impl
 }  // namespace Experimental
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp
index c4292d35eca793bc58d76ba20db4358f85810996..f334d934123103edb1212e5ab385aebdaf158504 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp
@@ -70,18 +70,11 @@ __global__ void init_lock_array_kernel_atomic() {
   }
 }
 
-__global__ void init_lock_array_kernel_threadid(int N) {
-  unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
-  if (i < static_cast<unsigned>(N)) {
-    Kokkos::Impl::g_device_hip_lock_arrays.scratch[i] = 0;
-  }
-}
-
 }  // namespace
 
 namespace Impl {
 
-HIPLockArrays g_host_hip_lock_arrays = {nullptr, nullptr, 0};
+HIPLockArrays g_host_hip_lock_arrays = {nullptr, 0};
 
 void initialize_host_hip_lock_arrays() {
 #ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
@@ -94,18 +87,12 @@ void initialize_host_hip_lock_arrays() {
   KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(
       &g_host_hip_lock_arrays.atomic,
       sizeof(std::int32_t) * (KOKKOS_IMPL_HIP_SPACE_ATOMIC_MASK + 1)));
-  KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(
-      &g_host_hip_lock_arrays.scratch,
-      sizeof(std::int32_t) * (::Kokkos::Experimental::HIP::concurrency())));
 
   g_host_hip_lock_arrays.n = ::Kokkos::Experimental::HIP::concurrency();
 
   KOKKOS_COPY_HIP_LOCK_ARRAYS_TO_DEVICE();
   init_lock_array_kernel_atomic<<<
       (KOKKOS_IMPL_HIP_SPACE_ATOMIC_MASK + 1 + 255) / 256, 256, 0, nullptr>>>();
-  init_lock_array_kernel_threadid<<<
-      (::Kokkos::Experimental::HIP::concurrency() + 255) / 256, 256, 0,
-      nullptr>>>(::Kokkos::Experimental::HIP::concurrency());
 }
 
 void finalize_host_hip_lock_arrays() {
@@ -116,9 +103,7 @@ void finalize_host_hip_lock_arrays() {
   if (g_host_hip_lock_arrays.atomic == nullptr) return;
   KOKKOS_IMPL_HIP_SAFE_CALL(hipFree(g_host_hip_lock_arrays.atomic));
   g_host_hip_lock_arrays.atomic = nullptr;
-  KOKKOS_IMPL_HIP_SAFE_CALL(hipFree(g_host_hip_lock_arrays.scratch));
-  g_host_hip_lock_arrays.scratch = nullptr;
-  g_host_hip_lock_arrays.n       = 0;
+  g_host_hip_lock_arrays.n      = 0;
 #ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
   KOKKOS_COPY_HIP_LOCK_ARRAYS_TO_DEVICE();
 #endif
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.hpp
index 71b104c2e4b65aff7ab3b3688c0901d000e8d9d8..c72616dab166ccd0e1f3744eebbc1e2c941bf367 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.hpp
@@ -60,7 +60,6 @@ namespace Impl {
 
 struct HIPLockArrays {
   std::int32_t* atomic;
-  std::int32_t* scratch;
   std::int32_t n;
 };
 
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp
index eae323dd913d7f20383e3afcf5f1264d013b7be1..24b05f293c8ea5d0422b7261befb0a4de976a83a 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp
@@ -84,17 +84,19 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
     using ClosureType =
         ParallelFor<FunctorType, Policy, Kokkos::Experimental::HIP>;
     if (m_policy.m_num_tiles == 0) return;
-    array_index_type const maxblocks = static_cast<array_index_type>(
-        m_policy.space().impl_internal_space_instance()->m_maxBlock);
+    auto const maxblocks =
+        Kokkos::Experimental::Impl::hip_internal_maximum_grid_count();
     if (Policy::rank == 2) {
       dim3 const block(m_policy.m_tile[0], m_policy.m_tile[1], 1);
       dim3 const grid(
-          std::min((m_policy.m_upper[0] - m_policy.m_lower[0] + block.x - 1) /
-                       block.x,
-                   maxblocks),
-          std::min((m_policy.m_upper[1] - m_policy.m_lower[1] + block.y - 1) /
-                       block.y,
-                   maxblocks),
+          std::min<array_index_type>(
+              (m_policy.m_upper[0] - m_policy.m_lower[0] + block.x - 1) /
+                  block.x,
+              maxblocks[0]),
+          std::min<array_index_type>(
+              (m_policy.m_upper[1] - m_policy.m_lower[1] + block.y - 1) /
+                  block.y,
+              maxblocks[1]),
           1);
       Kokkos::Experimental::Impl::hip_parallel_launch<ClosureType,
                                                       LaunchBounds>(
@@ -104,15 +106,18 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
       dim3 const block(m_policy.m_tile[0], m_policy.m_tile[1],
                        m_policy.m_tile[2]);
       dim3 const grid(
-          std::min((m_policy.m_upper[0] - m_policy.m_lower[0] + block.x - 1) /
-                       block.x,
-                   maxblocks),
-          std::min((m_policy.m_upper[1] - m_policy.m_lower[1] + block.y - 1) /
-                       block.y,
-                   maxblocks),
-          std::min((m_policy.m_upper[2] - m_policy.m_lower[2] + block.z - 1) /
-                       block.z,
-                   maxblocks));
+          std::min<array_index_type>(
+              (m_policy.m_upper[0] - m_policy.m_lower[0] + block.x - 1) /
+                  block.x,
+              maxblocks[0]),
+          std::min<array_index_type>(
+              (m_policy.m_upper[1] - m_policy.m_lower[1] + block.y - 1) /
+                  block.y,
+              maxblocks[1]),
+          std::min<array_index_type>(
+              (m_policy.m_upper[2] - m_policy.m_lower[2] + block.z - 1) /
+                  block.z,
+              maxblocks[2]));
       Kokkos::Experimental::Impl::hip_parallel_launch<ClosureType,
                                                       LaunchBounds>(
           *this, grid, block, 0,
@@ -123,15 +128,16 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
       dim3 const block(m_policy.m_tile[0] * m_policy.m_tile[1],
                        m_policy.m_tile[2], m_policy.m_tile[3]);
       dim3 const grid(
-          std::min(static_cast<uint32_t>(m_policy.m_tile_end[0] *
-                                         m_policy.m_tile_end[1]),
-                   static_cast<uint32_t>(maxblocks)),
-          std::min((m_policy.m_upper[2] - m_policy.m_lower[2] + block.y - 1) /
-                       block.y,
-                   maxblocks),
-          std::min((m_policy.m_upper[3] - m_policy.m_lower[3] + block.z - 1) /
-                       block.z,
-                   maxblocks));
+          std::min<array_index_type>(
+              m_policy.m_tile_end[0] * m_policy.m_tile_end[1], maxblocks[0]),
+          std::min<array_index_type>(
+              (m_policy.m_upper[2] - m_policy.m_lower[2] + block.y - 1) /
+                  block.y,
+              maxblocks[1]),
+          std::min<array_index_type>(
+              (m_policy.m_upper[3] - m_policy.m_lower[3] + block.z - 1) /
+                  block.z,
+              maxblocks[2]));
       Kokkos::Experimental::Impl::hip_parallel_launch<ClosureType,
                                                       LaunchBounds>(
           *this, grid, block, 0,
@@ -143,15 +149,14 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
                        m_policy.m_tile[2] * m_policy.m_tile[3],
                        m_policy.m_tile[4]);
       dim3 const grid(
-          std::min(static_cast<index_type>(m_policy.m_tile_end[0] *
-                                           m_policy.m_tile_end[1]),
-                   static_cast<index_type>(maxblocks)),
-          std::min(static_cast<index_type>(m_policy.m_tile_end[2] *
-                                           m_policy.m_tile_end[3]),
-                   static_cast<index_type>(maxblocks)),
-          std::min((m_policy.m_upper[4] - m_policy.m_lower[4] + block.z - 1) /
-                       block.z,
-                   maxblocks));
+          std::min<array_index_type>(
+              m_policy.m_tile_end[0] * m_policy.m_tile_end[1], maxblocks[0]),
+          std::min<array_index_type>(
+              m_policy.m_tile_end[2] * m_policy.m_tile_end[3], maxblocks[1]),
+          std::min<array_index_type>(
+              (m_policy.m_upper[4] - m_policy.m_lower[4] + block.z - 1) /
+                  block.z,
+              maxblocks[2]));
       Kokkos::Experimental::Impl::hip_parallel_launch<ClosureType,
                                                       LaunchBounds>(
           *this, grid, block, 0,
@@ -162,15 +167,13 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
       dim3 const block(m_policy.m_tile[0] * m_policy.m_tile[1],
                        m_policy.m_tile[2] * m_policy.m_tile[3],
                        m_policy.m_tile[4] * m_policy.m_tile[5]);
-      dim3 const grid(std::min(static_cast<index_type>(m_policy.m_tile_end[0] *
-                                                       m_policy.m_tile_end[1]),
-                               static_cast<index_type>(maxblocks)),
-                      std::min(static_cast<index_type>(m_policy.m_tile_end[2] *
-                                                       m_policy.m_tile_end[3]),
-                               static_cast<index_type>(maxblocks)),
-                      std::min(static_cast<index_type>(m_policy.m_tile_end[4] *
-                                                       m_policy.m_tile_end[5]),
-                               static_cast<index_type>(maxblocks)));
+      dim3 const grid(
+          std::min<array_index_type>(
+              m_policy.m_tile_end[0] * m_policy.m_tile_end[1], maxblocks[0]),
+          std::min<array_index_type>(
+              m_policy.m_tile_end[2] * m_policy.m_tile_end[3], maxblocks[1]),
+          std::min<array_index_type>(
+              m_policy.m_tile_end[4] * m_policy.m_tile_end[5], maxblocks[2]));
       Kokkos::Experimental::Impl::hip_parallel_launch<ClosureType,
                                                       LaunchBounds>(
           *this, grid, block, 0,
@@ -247,7 +250,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
   size_type* m_scratch_flags;
   // Only let one Parallel/Scan modify the shared memory. The
   // constructor acquires the mutex which is released in the destructor.
-  std::unique_lock<std::mutex> m_shared_memory_lock;
+  std::lock_guard<std::mutex> m_shared_memory_lock;
 
   using DeviceIteratePattern = typename Kokkos::Impl::Reduce::DeviceIterateTile<
       Policy::rank, Policy, FunctorType, WorkTag, reference_type>;
@@ -336,7 +339,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
   inline void execute() {
     using ClosureType = ParallelReduce<FunctorType, Policy, ReducerType,
                                        Kokkos::Experimental::HIP>;
-    const int nwork   = m_policy.m_num_tiles;
+    const auto nwork  = m_policy.m_num_tiles;
     if (nwork) {
       int block_size = m_policy.m_prod_tile_dims;
       // CONSTRAINT: Algorithm requires block_size >= product of tile dimensions
@@ -352,12 +355,13 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
 
       m_scratch_space =
           ::Kokkos::Experimental::Impl::hip_internal_scratch_space(
+              m_policy.space(),
               ValueTraits::value_size(
                   ReducerConditional::select(m_functor, m_reducer)) *
-              block_size /* block_size == max block_count */);
+                  block_size /* block_size == max block_count */);
       m_scratch_flags =
           ::Kokkos::Experimental::Impl::hip_internal_scratch_flags(
-              sizeof(size_type));
+              m_policy.space(), sizeof(size_type));
 
       // REQUIRED ( 1 , N , 1 )
       const dim3 block(1, block_size, 1);
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp
index e02ead1e990151a30d0a87b280bada8c774ca5c5..14a282cc31dbb5d0eaa6e3a4578aec9c0cbaa0df 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp
@@ -180,7 +180,7 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
   size_type* m_scratch_flags = nullptr;
   // Only let one ParallelReduce/Scan modify the shared memory. The
   // constructor acquires the mutex which is released in the destructor.
-  std::unique_lock<std::mutex> m_shared_memory_lock;
+  std::lock_guard<std::mutex> m_shared_memory_lock;
 
   static bool constexpr UseShflReduction =
       static_cast<bool>(ValueTraits::StaticValueSize);
@@ -357,12 +357,13 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
 
       m_scratch_space =
           ::Kokkos::Experimental::Impl::hip_internal_scratch_space(
+              m_policy.space(),
               ValueTraits::value_size(
                   ReducerConditional::select(m_functor, m_reducer)) *
-              block_size /* block_size == max block_count */);
+                  block_size /* block_size == max block_count */);
       m_scratch_flags =
           ::Kokkos::Experimental::Impl::hip_internal_scratch_flags(
-              sizeof(size_type));
+              m_policy.space(), sizeof(size_type));
 
       // REQUIRED ( 1 , N , 1 )
       dim3 block(1, block_size, 1);
@@ -484,7 +485,7 @@ class ParallelScanHIPBase {
   int m_grid_x               = 0;
   // Only let one ParallelReduce/Scan modify the shared memory. The
   // constructor acquires the mutex which is released in the destructor.
-  std::unique_lock<std::mutex> m_shared_memory_lock;
+  std::lock_guard<std::mutex> m_shared_memory_lock;
 
  private:
   template <class TagType>
@@ -657,9 +658,9 @@ class ParallelScanHIPBase {
       m_grid_x = (nwork + work_per_block - 1) / work_per_block;
 
       m_scratch_space = Kokkos::Experimental::Impl::hip_internal_scratch_space(
-          ValueTraits::value_size(m_functor) * m_grid_x);
+          m_policy.space(), ValueTraits::value_size(m_functor) * m_grid_x);
       m_scratch_flags = Kokkos::Experimental::Impl::hip_internal_scratch_flags(
-          sizeof(size_type) * 1);
+          m_policy.space(), sizeof(size_type) * 1);
 
       dim3 grid(m_grid_x, 1, 1);
       dim3 block(1, block_size, 1);  // REQUIRED DIMENSIONS ( 1 , N , 1 )
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp
index b794f5bc037111a8774ed23d1181326d3fa23b51..0ec0761f7dd40d4c805b781330df4bee5a0898ca 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp
@@ -53,6 +53,7 @@
 #include <HIP/Kokkos_HIP_Locks.hpp>
 #include <HIP/Kokkos_HIP_Team.hpp>
 #include <HIP/Kokkos_HIP_Instance.hpp>
+#include <Kokkos_MinMaxClamp.hpp>
 
 namespace Kokkos {
 namespace Impl {
@@ -248,7 +249,7 @@ class TeamPolicyInternal<Kokkos::Experimental::HIP, Properties...>
     // Make sure league size is permissible
     if (league_size_ >=
         static_cast<int>(
-            ::Kokkos::Experimental::Impl::hip_internal_maximum_grid_count()))
+            ::Kokkos::Experimental::Impl::hip_internal_maximum_grid_count()[0]))
       Impl::throw_runtime_exception(
           "Requested too large league_size for TeamPolicy on HIP execution "
           "space.");
@@ -449,6 +450,42 @@ class TeamPolicyInternal<Kokkos::Experimental::HIP, Properties...>
   }
 };
 
+__device__ inline int64_t hip_get_scratch_index(
+    Experimental::HIP::size_type league_size, int32_t* scratch_locks) {
+  int64_t threadid = 0;
+  __shared__ int64_t base_thread_id;
+  if (threadIdx.x == 0 && threadIdx.y == 0) {
+    int64_t const wraparound_len = Kokkos::Experimental::min(
+        int64_t(league_size),
+        (int64_t(Kokkos::Impl::g_device_hip_lock_arrays.n)) /
+            (blockDim.x * blockDim.y));
+    threadid = (blockIdx.x * blockDim.z + threadIdx.z) % wraparound_len;
+    threadid *= blockDim.x * blockDim.y;
+    int done = 0;
+    while (!done) {
+      done = (0 == atomicCAS(&scratch_locks[threadid], 0, 1));
+      if (!done) {
+        threadid += blockDim.x * blockDim.y;
+        if (int64_t(threadid + blockDim.x * blockDim.y) >=
+            wraparound_len * blockDim.x * blockDim.y)
+          threadid = 0;
+      }
+    }
+    base_thread_id = threadid;
+  }
+  __syncthreads();
+  threadid = base_thread_id;
+  return threadid;
+}
+
+__device__ inline void hip_release_scratch_index(int32_t* scratch_locks,
+                                                 int64_t threadid) {
+  __syncthreads();
+  if (threadIdx.x == 0 && threadIdx.y == 0) {
+    scratch_locks[threadid] = 0;
+  }
+}
+
 template <typename FunctorType, typename... Properties>
 class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
                   Kokkos::Experimental::HIP> {
@@ -477,9 +514,10 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
   int m_shmem_size;
   void* m_scratch_ptr[2];
   int m_scratch_size[2];
+  int32_t* m_scratch_locks;
   // Only let one ParallelFor/Reduce modify the team scratch memory. The
   // constructor acquires the mutex which is released in the destructor.
-  std::unique_lock<std::mutex> m_scratch_lock;
+  std::lock_guard<std::mutex> m_scratch_lock_guard;
 
   template <typename TagType>
   __device__ inline
@@ -500,29 +538,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
     // Iterate this block through the league
     int64_t threadid = 0;
     if (m_scratch_size[1] > 0) {
-      __shared__ int64_t base_thread_id;
-      if (threadIdx.x == 0 && threadIdx.y == 0) {
-        threadid = (blockIdx.x * blockDim.z + threadIdx.z) %
-                   (Kokkos::Impl::g_device_hip_lock_arrays.n /
-                    (blockDim.x * blockDim.y));
-        threadid *= blockDim.x * blockDim.y;
-        int done = 0;
-        while (!done) {
-          done = (0 ==
-                  atomicCAS(
-                      &Kokkos::Impl::g_device_hip_lock_arrays.scratch[threadid],
-                      0, 1));
-          if (!done) {
-            threadid += blockDim.x * blockDim.y;
-            if (int64_t(threadid + blockDim.x * blockDim.y) >=
-                int64_t(Kokkos::Impl::g_device_hip_lock_arrays.n))
-              threadid = 0;
-          }
-        }
-        base_thread_id = threadid;
-      }
-      __syncthreads();
-      threadid = base_thread_id;
+      threadid = hip_get_scratch_index(m_league_size, m_scratch_locks);
     }
 
     int const int_league_size = static_cast<int>(m_league_size);
@@ -537,9 +553,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
           m_scratch_size[1], league_rank, m_league_size));
     }
     if (m_scratch_size[1] > 0) {
-      __syncthreads();
-      if (threadIdx.x == 0 && threadIdx.y == 0)
-        Kokkos::Impl::g_device_hip_lock_arrays.scratch[threadid] = 0;
+      hip_release_scratch_index(m_scratch_locks, threadid);
     }
   }
 
@@ -565,9 +579,9 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
         m_league_size(arg_policy.league_size()),
         m_team_size(arg_policy.team_size()),
         m_vector_size(arg_policy.impl_vector_length()),
-        m_scratch_lock(m_policy.space()
-                           .impl_internal_space_instance()
-                           ->m_team_scratch_mutex) {
+        m_scratch_lock_guard(m_policy.space()
+                                 .impl_internal_space_instance()
+                                 ->m_team_scratch_mutex) {
     m_team_size = m_team_size >= 0 ? m_team_size
                                    : arg_policy.team_size_recommended(
                                          arg_functor, ParallelForTag());
@@ -578,6 +592,8 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
          FunctorTeamShmemSize<FunctorType>::value(m_functor, m_team_size));
     m_scratch_size[0] = m_policy.scratch_size(0, m_team_size);
     m_scratch_size[1] = m_policy.scratch_size(1, m_team_size);
+    m_scratch_locks =
+        m_policy.space().impl_internal_space_instance()->m_scratch_locks;
 
     // Functor's reduce memory, team scan memory, and team shared memory depend
     // upon team size.
@@ -588,10 +604,11 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
             : m_policy.space()
                   .impl_internal_space_instance()
                   ->resize_team_scratch_space(
-                      static_cast<ptrdiff_t>(m_scratch_size[1]) *
-                      static_cast<ptrdiff_t>(
-                          ::Kokkos::Experimental::HIP::concurrency() /
-                          (m_team_size * m_vector_size)));
+                      static_cast<std::int64_t>(m_scratch_size[1]) *
+                      (std::min(static_cast<std::int64_t>(
+                                    Kokkos::Experimental::HIP::concurrency() /
+                                    (m_team_size * m_vector_size)),
+                                static_cast<std::int64_t>(m_league_size))));
 
     int const shmem_size_total = m_shmem_begin + m_shmem_size;
     if (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock <
@@ -672,12 +689,13 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
   size_type m_shmem_size;
   void* m_scratch_ptr[2];
   int m_scratch_size[2];
+  int32_t* m_scratch_locks;
   const size_type m_league_size;
   int m_team_size;
   const size_type m_vector_size;
   // Only let one ParallelFor/Reduce modify the team scratch memory. The
   // constructor acquires the mutex which is released in the destructor.
-  std::unique_lock<std::mutex> m_scratch_lock;
+  std::lock_guard<std::mutex> m_scratch_lock_guard;
 
   template <class TagType>
   __device__ inline
@@ -716,29 +734,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
   __device__ inline void operator()() const {
     int64_t threadid = 0;
     if (m_scratch_size[1] > 0) {
-      __shared__ int64_t base_thread_id;
-      if (threadIdx.x == 0 && threadIdx.y == 0) {
-        threadid = (blockIdx.x * blockDim.z + threadIdx.z) %
-                   (Kokkos::Impl::g_device_hip_lock_arrays.n /
-                    (blockDim.x * blockDim.y));
-        threadid *= blockDim.x * blockDim.y;
-        int done = 0;
-        while (!done) {
-          done = (0 ==
-                  atomicCAS(
-                      &Kokkos::Impl::g_device_hip_lock_arrays.scratch[threadid],
-                      0, 1));
-          if (!done) {
-            threadid += blockDim.x * blockDim.y;
-            if (static_cast<int64_t>(threadid + blockDim.x * blockDim.y) >=
-                static_cast<int64_t>(Kokkos::Impl::g_device_hip_lock_arrays.n))
-              threadid = 0;
-          }
-        }
-        base_thread_id = threadid;
-      }
-      __syncthreads();
-      threadid = base_thread_id;
+      threadid = hip_get_scratch_index(m_league_size, m_scratch_locks);
     }
 
     using ReductionTag = std::conditional_t<UseShflReduction, ShflReductionTag,
@@ -746,10 +742,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
     run(ReductionTag{}, threadid);
 
     if (m_scratch_size[1] > 0) {
-      __syncthreads();
-      if (threadIdx.x == 0 && threadIdx.y == 0) {
-        Kokkos::Impl::g_device_hip_lock_arrays.scratch[threadid] = 0;
-      }
+      hip_release_scratch_index(m_scratch_locks, threadid);
     }
   }
 
@@ -839,12 +832,12 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
   }
 
   inline void execute() {
-    const int nwork            = m_league_size * m_team_size;
+    const bool is_empty_range  = m_league_size == 0 || m_team_size == 0;
     const bool need_device_set = ReduceFunctorHasInit<FunctorType>::value ||
                                  ReduceFunctorHasFinal<FunctorType>::value ||
                                  !m_result_ptr_host_accessible ||
                                  !std::is_same<ReducerType, InvalidType>::value;
-    if ((nwork > 0) || need_device_set) {
+    if (!is_empty_range || need_device_set) {
       const int block_count =
           UseShflReduction
               ? std::min(
@@ -854,15 +847,16 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
               : std::min(static_cast<int>(m_league_size), m_team_size);
 
       m_scratch_space = Kokkos::Experimental::Impl::hip_internal_scratch_space(
+          m_policy.space(),
           value_traits::value_size(
               reducer_conditional::select(m_functor, m_reducer)) *
-          block_count);
+              block_count);
       m_scratch_flags = Kokkos::Experimental::Impl::hip_internal_scratch_flags(
-          sizeof(size_type));
+          m_policy.space(), sizeof(size_type));
 
       dim3 block(m_vector_size, m_team_size, 1);
       dim3 grid(block_count, 1, 1);
-      if (nwork == 0) {
+      if (is_empty_range) {
         block = dim3(1, 1, 1);
         grid  = dim3(1, 1, 1);
       }
@@ -919,9 +913,9 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         m_league_size(arg_policy.league_size()),
         m_team_size(arg_policy.team_size()),
         m_vector_size(arg_policy.impl_vector_length()),
-        m_scratch_lock(m_policy.space()
-                           .impl_internal_space_instance()
-                           ->m_team_scratch_mutex) {
+        m_scratch_lock_guard(m_policy.space()
+                                 .impl_internal_space_instance()
+                                 ->m_team_scratch_mutex) {
     m_team_size = m_team_size >= 0 ? m_team_size
                                    : arg_policy.team_size_recommended(
                                          arg_functor, ParallelReduceTag());
@@ -938,6 +932,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         FunctorTeamShmemSize<FunctorType>::value(arg_functor, m_team_size);
     m_scratch_size[0] = m_shmem_size;
     m_scratch_size[1] = m_policy.scratch_size(1, m_team_size);
+    m_scratch_locks =
+        m_policy.space().impl_internal_space_instance()->m_scratch_locks;
     m_scratch_ptr[1] =
         m_team_size <= 0
             ? nullptr
@@ -945,9 +941,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
                   .impl_internal_space_instance()
                   ->resize_team_scratch_space(
                       static_cast<std::int64_t>(m_scratch_size[1]) *
-                      (static_cast<std::int64_t>(
-                          Kokkos::Experimental::HIP::concurrency() /
-                          (m_team_size * m_vector_size))));
+                      (std::min(static_cast<std::int64_t>(
+                                    Kokkos::Experimental::HIP::concurrency() /
+                                    (m_team_size * m_vector_size)),
+                                static_cast<std::int64_t>(m_league_size))));
 
     // The global parallel_reduce does not support vector_length other than 1 at
     // the moment
@@ -1014,9 +1011,9 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         m_league_size(arg_policy.league_size()),
         m_team_size(arg_policy.team_size()),
         m_vector_size(arg_policy.impl_vector_length()),
-        m_scratch_lock(m_policy.space()
-                           .impl_internal_space_instance()
-                           ->m_team_scratch_mutex) {
+        m_scratch_lock_guard(m_policy.space()
+                                 .impl_internal_space_instance()
+                                 ->m_team_scratch_mutex) {
     m_team_size = m_team_size >= 0
                       ? m_team_size
                       : arg_policy.team_size_recommended(arg_functor, reducer,
@@ -1033,16 +1030,19 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         FunctorTeamShmemSize<FunctorType>::value(arg_functor, m_team_size);
     m_scratch_size[0] = m_shmem_size;
     m_scratch_size[1] = m_policy.scratch_size(1, m_team_size);
+    m_scratch_locks =
+        m_policy.space().impl_internal_space_instance()->m_scratch_locks;
     m_scratch_ptr[1] =
         m_team_size <= 0
             ? nullptr
             : m_policy.space()
                   .impl_internal_space_instance()
                   ->resize_team_scratch_space(
-                      static_cast<ptrdiff_t>(m_scratch_size[1]) *
-                      static_cast<ptrdiff_t>(
-                          Kokkos::Experimental::HIP::concurrency() /
-                          (m_team_size * m_vector_size)));
+                      static_cast<std::int64_t>(m_scratch_size[1]) *
+                      (std::min(static_cast<std::int64_t>(
+                                    Kokkos::Experimental::HIP::concurrency() /
+                                    (m_team_size * m_vector_size)),
+                                static_cast<std::int64_t>(m_league_size))));
 
     // The global parallel_reduce does not support vector_length other than 1 at
     // the moment
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp
index 98dab9a0fbca41de38234fab4173cd4d4f763699..7929e6df7445f8c461c733cfd81202d76e34c8c5 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp
@@ -147,14 +147,12 @@ struct HIPReductionsFunctor<FunctorType, ArgTag, true> {
     scalar_intra_block_reduction(functor, value, true,
                                  my_global_team_buffer_element, shared_elements,
                                  shared_team_buffer_elements);
-    __threadfence();
     __syncthreads();
 
     // Use the last block that is done to do the do the reduction across the
     // block
     __shared__ unsigned int num_teams_done;
     if (threadIdx.x + threadIdx.y == 0) {
-      __threadfence();
       num_teams_done = Kokkos::atomic_fetch_add(global_flags, 1) + 1;
     }
     bool is_last_block = false;
@@ -263,7 +261,6 @@ struct HIPReductionsFunctor<FunctorType, ArgTag, false> {
     // block
     __shared__ unsigned int num_teams_done;
     if (threadIdx.x + threadIdx.y == 0) {
-      __threadfence();
       num_teams_done = Kokkos::atomic_fetch_add(global_flags, 1) + 1;
     }
     bool is_last_block = false;
@@ -450,7 +447,6 @@ __device__ bool hip_single_inter_block_reduce_scan_impl(
   n_done = 0;
   __syncthreads();
   if (threadIdx.y == 0) {
-    __threadfence();
     n_done = 1 + atomicInc(global_flags, block_count - 1);
   }
   __syncthreads();
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp
index fe7c34bb80973a224d1d2ff6d092c4e9bc3e1571..5a27e2e0ddd2a02a03866b2d4334770518da94c2 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp
@@ -163,7 +163,6 @@ __device__ inline bool hip_inter_block_shuffle_reduction(
   // One warp of last block performs inter block reduction through loading the
   // block values from global scratch_memory
   bool last_block = false;
-  __threadfence();
   __syncthreads();
   int constexpr warp_size = Kokkos::Experimental::Impl::HIPTraits::WarpSize;
   if (id < warp_size) {
@@ -180,7 +179,7 @@ __device__ inline bool hip_inter_block_shuffle_reduction(
       last_block = true;
       value      = neutral;
 
-      pointer_type const volatile global =
+      pointer_type const global =
           reinterpret_cast<pointer_type>(m_scratch_space);
 
       // Reduce all global values with splitting work over threads in one warp
@@ -286,7 +285,6 @@ __device__ inline bool hip_inter_block_shuffle_reduction(
   // block values from global scratch_memory
   bool last_block = false;
 
-  __threadfence();
   __syncthreads();
   int constexpr warp_size = Kokkos::Experimental::Impl::HIPTraits::WarpSize;
   if (id < warp_size) {
@@ -303,7 +301,7 @@ __device__ inline bool hip_inter_block_shuffle_reduction(
       last_block = true;
       reducer.init(value);
 
-      pointer_type const volatile global =
+      pointer_type const global =
           reinterpret_cast<pointer_type>(m_scratch_space);
 
       // Reduce all global values with splitting work over threads in one warp
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp
index e25ebe2ab355e626273aeff34615db45aa3465c7..6ade677fa89f9e778eb5965d3ae5ae8b13cce711 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp
@@ -74,7 +74,7 @@ hipStream_t get_deep_copy_stream() {
 }  // namespace
 
 void DeepCopyHIP(void* dst, void const* src, size_t n) {
-  KOKKOS_IMPL_HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault));
+  KOKKOS_IMPL_HIP_SAFE_CALL(hipMemcpyAsync(dst, src, n, hipMemcpyDefault));
 }
 
 void DeepCopyAsyncHIP(const Kokkos::Experimental::HIP& instance, void* dst,
@@ -261,21 +261,15 @@ SharedAllocationRecord<void, void> SharedAllocationRecord<
 
 SharedAllocationRecord<Kokkos::Experimental::HIPSpace,
                        void>::~SharedAllocationRecord() {
-  const char* label = nullptr;
-  if (Kokkos::Profiling::profileLibraryLoaded()) {
-    SharedAllocationHeader header;
-    Kokkos::Impl::DeepCopy<Kokkos::Experimental::HIPSpace, HostSpace>(
-        &header, RecordBase::m_alloc_ptr, sizeof(SharedAllocationHeader));
-    label = header.label();
-  }
   auto alloc_size = SharedAllocationRecord<void, void>::m_alloc_size;
-  m_space.deallocate(label, SharedAllocationRecord<void, void>::m_alloc_ptr,
+  m_space.deallocate(m_label.c_str(),
+                     SharedAllocationRecord<void, void>::m_alloc_ptr,
                      alloc_size, (alloc_size - sizeof(SharedAllocationHeader)));
 }
 
 SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace,
                        void>::~SharedAllocationRecord() {
-  m_space.deallocate(RecordBase::m_alloc_ptr->m_label,
+  m_space.deallocate(m_label.c_str(),
                      SharedAllocationRecord<void, void>::m_alloc_ptr,
                      SharedAllocationRecord<void, void>::m_alloc_size);
 }
@@ -294,7 +288,8 @@ SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>::
 #endif
           Kokkos::Impl::checked_allocation_with_header(arg_space, arg_label,
                                                        arg_alloc_size),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_space(arg_space) {
 
   SharedAllocationHeader header;
@@ -302,8 +297,13 @@ SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>::
   this->base_t::_fill_host_accessible_header_info(header, arg_label);
 
   // Copy to device memory
+  Kokkos::Experimental::HIP exec;
   Kokkos::Impl::DeepCopy<Kokkos::Experimental::HIPSpace, HostSpace>(
-      RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
+      exec, RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
+  exec.fence(
+      "SharedAllocationRecord<Kokkos::Experimental::HIPSpace, "
+      "void>::SharedAllocationRecord(): fence after copying header from "
+      "HostSpace");
 }
 
 SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>::
@@ -320,7 +320,8 @@ SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>::
 #endif
           Kokkos::Impl::checked_allocation_with_header(arg_space, arg_label,
                                                        arg_alloc_size),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_space(arg_space) {
   // Fill in the Header information, directly accessible via host pinned memory
   this->base_t::_fill_host_accessible_header_info(*RecordBase::m_alloc_ptr,
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp
index f7e38a508b1696fe09701bc5b01de4cecd2d1344..99f61ed36f9ceece136595157d36abffaab82555 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp
@@ -45,28 +45,47 @@
 #ifndef KOKKOS_HIP_UNIQUE_TOKEN_HPP
 #define KOKKOS_HIP_UNIQUE_TOKEN_HPP
 
-#include <impl/Kokkos_ConcurrentBitset.hpp>
 #include <Kokkos_HIP_Space.hpp>
 #include <Kokkos_UniqueToken.hpp>
+#include <impl/Kokkos_SharedAlloc.hpp>
 
 namespace Kokkos {
+
+namespace Impl {
+Kokkos::View<uint32_t*, Kokkos::Experimental::HIPSpace>
+hip_global_unique_token_locks(bool deallocate = false);
+}
+
 namespace Experimental {
 
 // both global and instance Unique Tokens are implemented in the same way
+// the global version has one shared static lock array underneath
+// but it can't be a static member variable since we need to acces it on device
+// and we share the implementation with the instance version
 template <>
 class UniqueToken<HIP, UniqueTokenScope::Global> {
  protected:
-  uint32_t volatile* m_buffer;
-  uint32_t m_count;
+  View<uint32_t*, HIPSpace> m_locks;
 
  public:
   using execution_space = HIP;
   using size_type       = int32_t;
 
-  explicit UniqueToken(execution_space const& = execution_space())
-      : m_buffer(Impl::HIPInternal::singleton().m_scratchConcurrentBitset),
-        m_count(HIP::concurrency()) {}
+  explicit UniqueToken(execution_space const& = HIP())
+      : m_locks(Kokkos::Impl::hip_global_unique_token_locks()) {}
+
+ protected:
+  // These are constructors for the Instance version
+  UniqueToken(size_type max_size) {
+    m_locks = Kokkos::View<uint32_t*, HIPSpace>("Kokkos::UniqueToken::m_locks",
+                                                max_size);
+  }
+  UniqueToken(size_type max_size, execution_space const& exec) {
+    m_locks = Kokkos::View<uint32_t*, HIPSpace>(
+        Kokkos::view_alloc(exec, "Kokkos::UniqueToken::m_locks"), max_size);
+  }
 
+ public:
   KOKKOS_DEFAULTED_FUNCTION
   UniqueToken(const UniqueToken&) = default;
 
@@ -81,49 +100,83 @@ class UniqueToken<HIP, UniqueTokenScope::Global> {
 
   /// \brief upper bound for acquired values, i.e. 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
-  size_type size() const noexcept { return m_count; }
+  size_type size() const noexcept { return m_locks.extent(0); }
+
+ private:
+  // FIXME_HIP
+  KOKKOS_INLINE_FUNCTION size_type impl_acquire() const {
+    int idx = blockIdx.x * (blockDim.x * blockDim.y) +
+              threadIdx.y * blockDim.x + threadIdx.x;
+    idx                            = idx % size();
+    unsigned long long active      = __ballot(1);
+    unsigned long long done_active = 0;
+    bool done                      = false;
+    while (active != done_active) {
+      if (!done) {
+        // Using m_locks(idx) fails self containment test of Kokkos_HIP.hpp
+        // That failure stems from the space access verification because the
+        // Host execution space is not defined
+        if (Kokkos::atomic_compare_exchange(m_locks.data() + idx, 0, 1) == 0) {
+          done = true;
+        } else {
+          idx += blockDim.y * blockDim.x + 1;
+          idx = idx % size();
+        }
+      }
+      done_active = __ballot(done ? 1 : 0);
+    }
 
+// Make sure that all writes in the previous lock owner are visible to me
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    desul::atomic_thread_fence(desul::MemoryOrderAcquire(),
+                               desul::MemoryScopeDevice());
+#else
+    Kokkos::memory_fence();
+#endif
+    return idx;
+  }
+
+ public:
   /// \brief acquire value such that 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   size_type acquire() const {
-    const Kokkos::pair<int, int> result =
-        Kokkos::Impl::concurrent_bitset::acquire_bounded(
-            m_buffer, m_count, Kokkos::Impl::clock_tic() % m_count);
-
-    if (result.first < 0) {
-      Kokkos::abort(
-          "UniqueToken<HIP> failure to acquire tokens, no tokens available");
-    }
-
-    return result.first;
+    KOKKOS_IF_ON_DEVICE(return impl_acquire();)
+    KOKKOS_IF_ON_HOST(return 0;)
   }
 
   /// \brief release an acquired value
   KOKKOS_INLINE_FUNCTION
-  void release(size_type i) const noexcept {
-    Kokkos::Impl::concurrent_bitset::release(m_buffer, i);
+  void release(size_type idx) const noexcept {
+// Make sure my writes are visible to the next lock owner
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    desul::atomic_thread_fence(desul::MemoryOrderRelease(),
+                               desul::MemoryScopeDevice());
+#else
+    Kokkos::memory_fence();
+#endif
+    (void)Kokkos::atomic_exchange(m_locks.data() + idx, 0);
   }
 };
 
 template <>
 class UniqueToken<HIP, UniqueTokenScope::Instance>
     : public UniqueToken<HIP, UniqueTokenScope::Global> {
-  View<uint32_t*, HIPSpace> m_buffer_view;
-
  public:
-  explicit UniqueToken(execution_space const& arg = execution_space())
-      : UniqueToken<HIP, UniqueTokenScope::Global>(arg) {}
-
-  UniqueToken(size_type max_size, execution_space const& = execution_space())
-      : m_buffer_view(
-            "UniqueToken::m_buffer_view",
-            ::Kokkos::Impl::concurrent_bitset::buffer_bound(max_size)) {
-    m_buffer = m_buffer_view.data();
-    m_count  = max_size;
-  }
+  // The instance version will forward to protected constructor which creates
+  // a lock array per instance
+  UniqueToken()
+      : UniqueToken<HIP, UniqueTokenScope::Global>(
+            Kokkos::Experimental::HIP().concurrency()) {}
+  explicit UniqueToken(execution_space const& arg)
+      : UniqueToken<HIP, UniqueTokenScope::Global>(
+            Kokkos::Experimental::HIP().concurrency(), arg) {}
+  explicit UniqueToken(size_type max_size)
+      : UniqueToken<HIP, UniqueTokenScope::Global>(max_size) {}
+  UniqueToken(size_type max_size, execution_space const& arg)
+      : UniqueToken<HIP, UniqueTokenScope::Global>(max_size, arg) {}
 };
 
 }  // namespace Experimental
 }  // namespace Kokkos
 
-#endif
+#endif  // KOKKOS_HIP_UNIQUE_TOKEN_HPP
diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp
index c5ca89a9fdeb61bd7965df1b706443b96b294180..a6c65ee5eb08c8fe308adc7bbe70158e440dfcc9 100644
--- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp
+++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp
@@ -65,7 +65,6 @@ struct in_place_shfl_op {
     return *static_cast<Derived const*>(this);
   }
 
-  // FIXME_HIP depends on UB
   // sizeof(Scalar) < sizeof(int) case
   template <class Scalar>
   // requires _assignable_from_bits<Scalar>
@@ -76,17 +75,19 @@ struct in_place_shfl_op {
     union conv_type {
       Scalar orig;
       shfl_type conv;
+      // This should be fine, members get explicitly reset, which changes the
+      // active member
+      KOKKOS_FUNCTION conv_type() { conv = 0; }
     };
     conv_type tmp_in;
     tmp_in.orig = in;
-    conv_type tmp_out;
-    tmp_out.conv = tmp_in.conv;
+    shfl_type tmp_out;
+    tmp_out = reinterpret_cast<shfl_type&>(tmp_in.orig);
     conv_type res;
     //------------------------------------------------
-    res.conv = self().do_shfl_op(
-        reinterpret_cast<shfl_type const&>(tmp_out.conv), lane_or_delta, width);
+    res.conv = self().do_shfl_op(tmp_out, lane_or_delta, width);
     //------------------------------------------------
-    out = res.orig;
+    out = reinterpret_cast<Scalar&>(res.conv);
   }
 
   // sizeof(Scalar) == sizeof(int) case
@@ -121,6 +122,9 @@ struct in_place_shfl_op {
       reinterpret_cast<shuffle_as_t*>(&out)[i] = self().do_shfl_op(
           reinterpret_cast<shuffle_as_t const*>(&val)[i], lane_or_delta, width);
     }
+    // FIXME_HIP - this fence should be removed once the hip-clang compiler
+    // properly supports fence semanics for shuffles
+    __atomic_signal_fence(__ATOMIC_SEQ_CST);
   }
 };
 
@@ -128,12 +132,7 @@ struct in_place_shfl_fn : in_place_shfl_op<in_place_shfl_fn> {
   template <class T>
   __device__ KOKKOS_IMPL_FORCEINLINE T do_shfl_op(T& val, int lane,
                                                   int width) const noexcept {
-    // FIXME_HIP Not sure why there is a race condition here. Note that the
-    // problem was also found in the CUDA backend with CUDA clang
-    // (https://github.com/kokkos/kokkos/issues/941) but it seems more limited
-    // in CUDA clang.
     auto return_val = __shfl(val, lane, width);
-    __threadfence();
     return return_val;
   }
 };
@@ -147,12 +146,7 @@ struct in_place_shfl_up_fn : in_place_shfl_op<in_place_shfl_up_fn> {
   template <class T>
   __device__ KOKKOS_IMPL_FORCEINLINE T do_shfl_op(T& val, int lane,
                                                   int width) const noexcept {
-    // FIXME_HIP Not sure why there is a race condition here. Note that the
-    // problem was also found in the CUDA backend with CUDA clang
-    // (https://github.com/kokkos/kokkos/issues/941) but it seems more limited
-    // in CUDA clang.
     auto return_val = __shfl_up(val, lane, width);
-    __threadfence();
     return return_val;
   }
 };
@@ -167,12 +161,7 @@ struct in_place_shfl_down_fn : in_place_shfl_op<in_place_shfl_down_fn> {
   template <class T>
   __device__ KOKKOS_IMPL_FORCEINLINE T do_shfl_op(T& val, int lane,
                                                   int width) const noexcept {
-    // FIXME_HIP Not sure why there is a race condition here. Note that the
-    // problem was also found in the CUDA backend with CUDA clang
-    // (https://github.com/kokkos/kokkos/issues/941) but it seems more limited
-    // in CUDA clang.
     auto return_val = __shfl_down(val, lane, width);
-    __threadfence();
     return return_val;
   }
 };
diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp b/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp
index d9cb66e11f4638c8635d0e9e33d7cbda67e1cada..acf2224f027de3abff1ab0cc84787c0b0ccfc255 100644
--- a/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp
+++ b/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp
@@ -47,18 +47,31 @@
 #ifdef KOKKOS_ENABLE_HPX
 #include <Kokkos_HPX.hpp>
 
-#include <hpx/util/yield_while.hpp>
+#include <hpx/local/condition_variable.hpp>
+#include <hpx/local/init.hpp>
+#include <hpx/local/thread.hpp>
+#include <hpx/local/mutex.hpp>
+
+#include <atomic>
+#include <chrono>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <type_traits>
 
 namespace Kokkos {
 namespace Experimental {
 
 bool HPX::m_hpx_initialized = false;
-std::atomic<uint32_t> HPX::m_next_instance_id{1};
 #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
-std::atomic<uint32_t> HPX::m_active_parallel_region_count{0};
-HPX::instance_data HPX::m_global_instance_data;
+std::atomic<uint32_t> HPX::m_next_instance_id{HPX::impl_default_instance_id() +
+                                              1};
+uint32_t HPX::m_active_parallel_region_count{0};
+hpx::spinlock HPX::m_active_parallel_region_count_mutex;
+hpx::condition_variable_any HPX::m_active_parallel_region_count_cond;
+HPX::instance_data HPX::m_default_instance_data;
 #else
-Kokkos::Impl::thread_buffer HPX::m_global_buffer;
+Kokkos::Impl::thread_buffer HPX::m_default_buffer;
 #endif
 
 int HPX::concurrency() {
@@ -77,7 +90,8 @@ int HPX::concurrency() {
 void HPX::impl_initialize(int thread_count) {
   hpx::runtime *rt = hpx::get_runtime_ptr();
   if (rt == nullptr) {
-    std::vector<std::string> config = {
+    hpx::local::init_params i;
+    i.cfg = {
         "hpx.os_threads=" + std::to_string(thread_count),
 #ifdef KOKKOS_ENABLE_DEBUG
         "--hpx:attach-debugger=exception",
@@ -86,21 +100,7 @@ void HPX::impl_initialize(int thread_count) {
     int argc_hpx     = 1;
     char name[]      = "kokkos_hpx";
     char *argv_hpx[] = {name, nullptr};
-    hpx::start(nullptr, argc_hpx, argv_hpx, config);
-
-#if HPX_VERSION_FULL < 0x010400
-    // This has been fixed in HPX 1.4.0.
-    //
-    // NOTE: Wait for runtime to start. hpx::start returns as soon as
-    // possible, meaning some operations are not allowed immediately
-    // after hpx::start. Notably, hpx::stop needs state_running. This
-    // needs to be fixed in HPX itself.
-
-    // Get runtime pointer again after it has been started.
-    rt = hpx::get_runtime_ptr();
-    hpx::util::yield_while(
-        [rt]() { return rt->get_state() < hpx::state_running; });
-#endif
+    hpx::local::start(nullptr, argc_hpx, argv_hpx, i);
 
     m_hpx_initialized = true;
   }
@@ -109,7 +109,8 @@ void HPX::impl_initialize(int thread_count) {
 void HPX::impl_initialize() {
   hpx::runtime *rt = hpx::get_runtime_ptr();
   if (rt == nullptr) {
-    std::vector<std::string> config = {
+    hpx::local::init_params i;
+    i.cfg = {
 #ifdef KOKKOS_ENABLE_DEBUG
         "--hpx:attach-debugger=exception",
 #endif
@@ -117,17 +118,7 @@ void HPX::impl_initialize() {
     int argc_hpx     = 1;
     char name[]      = "kokkos_hpx";
     char *argv_hpx[] = {name, nullptr};
-    hpx::start(nullptr, argc_hpx, argv_hpx, config);
-
-    // NOTE: Wait for runtime to start. hpx::start returns as soon as
-    // possible, meaning some operations are not allowed immediately
-    // after hpx::start. Notably, hpx::stop needs state_running. This
-    // needs to be fixed in HPX itself.
-
-    // Get runtime pointer again after it has been started.
-    rt = hpx::get_runtime_ptr();
-    hpx::util::yield_while(
-        [rt]() { return rt->get_state() < hpx::state_running; });
+    hpx::local::start(nullptr, argc_hpx, argv_hpx, i);
 
     m_hpx_initialized = true;
   }
@@ -142,8 +133,8 @@ void HPX::impl_finalize() {
   if (m_hpx_initialized) {
     hpx::runtime *rt = hpx::get_runtime_ptr();
     if (rt != nullptr) {
-      hpx::apply([]() { hpx::finalize(); });
-      hpx::stop();
+      hpx::apply([]() { hpx::local::finalize(); });
+      hpx::local::stop();
     } else {
       Kokkos::abort(
           "Kokkos::Experimental::HPX::impl_finalize: Kokkos started "
@@ -190,9 +181,11 @@ void HPXSpaceInitializer::finalize(const bool all_spaces) {
   }
 }
 
-void HPXSpaceInitializer::fence() { Kokkos::Experimental::HPX().fence(); }
 void HPXSpaceInitializer::fence(const std::string &name) {
-  Kokkos::Experimental::HPX().fence(name);
+  Kokkos::Experimental::HPX::impl_fence_global(name);
+}
+void HPXSpaceInitializer::fence() {
+  Kokkos::Experimental::HPX::impl_fence_global();
 }
 
 void HPXSpaceInitializer::print_configuration(std::ostream &msg,
diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX_ChunkedRoundRobinExecutor.hpp b/packages/kokkos/core/src/HPX/Kokkos_HPX_ChunkedRoundRobinExecutor.hpp
deleted file mode 100644
index b364b4a6eb2f93a424bd8b4904d743456174425f..0000000000000000000000000000000000000000
--- a/packages/kokkos/core/src/HPX/Kokkos_HPX_ChunkedRoundRobinExecutor.hpp
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 3.0
-//       Copyright (2020) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
-//
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef KOKKOS_HPX_CHUNKEDROUNDROBINEXECUTOR_HPP
-#define KOKKOS_HPX_CHUNKEDROUNDROBINEXECUTOR_HPP
-
-#include <hpx/config.hpp>
-#include <hpx/async_launch_policy_dispatch.hpp>
-#include <hpx/lcos/local/latch.hpp>
-#include <hpx/parallel/executors/execution.hpp>
-#include <hpx/parallel/executors/post_policy_dispatch.hpp>
-#include <hpx/runtime/get_os_thread_count.hpp>
-#include <hpx/runtime/threads/thread_helpers.hpp>
-#include <hpx/traits/is_executor.hpp>
-#include <hpx/traits/is_launch_policy.hpp>
-#include <hpx/util/deferred_call.hpp>
-
-#include <cstddef>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-namespace Kokkos {
-namespace Impl {
-
-///////////////////////////////////////////////////////////////////////////
-/// A \a ChunkedRoundRobinExecutor creates groups of parallel execution
-/// agents which execute in threads implicitly created by the executor. This
-/// executor uses the scheduling hint to spawn threads with the first grouped on
-/// the first core, the second group getting the next consecutive threads, etc.
-/// For example, if 10 tasks are spawned (num_tasks is set to 10) and num_cores
-/// is set to 2 the executor will schedule the tasks in the following order:
-///
-/// worker thread | 1 | 2
-/// --------------+---+---
-/// tasks         | 1 | 6
-///               | 2 | 7
-///               | 3 | 8
-///               | 4 | 9
-///               | 5 | 10
-///
-/// rather than the typical round robin:
-///
-/// worker thread | 1 | 2
-/// --------------+---+---
-/// tasks         | 1 | 2
-///               | 3 | 4
-///               | 5 | 6
-///               | 7 | 8
-///               | 9 | 10
-struct ChunkedRoundRobinExecutor {
-  using execution_category = hpx::parallel::execution::parallel_execution_tag;
-
-  HPX_CONSTEXPR explicit ChunkedRoundRobinExecutor(
-      std::size_t num_tasks = std::size_t(-1), std::size_t core_offset = 0,
-      std::size_t num_cores = hpx::get_os_thread_count())
-      : num_tasks_(num_tasks),
-        core_offset_(core_offset),
-        num_cores_(num_cores),
-        num_tasks_per_core_(double(num_tasks_) / num_cores_),
-        num_tasks_spawned_(0) {}
-
-  bool operator==(ChunkedRoundRobinExecutor const &rhs) const noexcept {
-    return num_cores_ == rhs.num_cores_ && num_tasks_ == rhs.num_tasks_;
-  }
-
-  bool operator!=(ChunkedRoundRobinExecutor const &rhs) const noexcept {
-    return !(*this == rhs);
-  }
-
-  ChunkedRoundRobinExecutor const &context() const noexcept { return *this; }
-
-  template <typename F, typename... Ts>
-  hpx::future<
-      typename hpx::util::detail::invoke_deferred_result<F, Ts...>::type>
-  async_execute(F &&f, Ts &&... ts) const {
-    return hpx::detail::async_launch_policy_dispatch<hpx::launch>::call(
-        hpx::launch::async_policy{}, std::forward<F>(f),
-        std::forward<Ts>(ts)...);
-  }
-
-  template <typename F, typename... Ts>
-  void post(F &&f, Ts &&... ts) const {
-    hpx::util::thread_description const desc(
-        f, "Kokkos::Impl::ChunkedRoundRobinExecutor::async_execute");
-    hpx::threads::thread_schedule_hint const hint(
-        hpx::threads::thread_schedule_hint_mode_thread,
-        core_offset_ + std::floor(double(num_tasks_spawned_ % num_tasks_) /
-                                  num_tasks_per_core_));
-
-    hpx::threads::register_thread_nullary(
-        hpx::util::deferred_call(std::forward<F>(f), std::forward<Ts>(ts)...),
-        desc, hpx::threads::pending, false,
-        hpx::threads::thread_priority_normal, hint,
-        hpx::threads::thread_stacksize_default);
-
-    ++num_tasks_spawned_;
-  }
-
-  template <typename F, typename Shape, typename... Ts>
-  std::vector<hpx::future<typename hpx::parallel::execution::detail::
-                              bulk_function_result<F, Shape, Ts...>::type>>
-  bulk_async_execute(F &&f, Shape const &shape, Ts &&... ts) {
-    hpx::util::thread_description desc(
-        f, "Kokkos::Impl::ChunkedRoundRobinExecutor::bulk_sync_execute");
-
-    hpx::lcos::local::latch l(hpx::util::size(shape));
-    // Keep a separate counter for bulk launch
-    std::size_t num_tasks_spawned = 0;
-
-    for (auto const &s : shape) {
-      hpx::threads::thread_schedule_hint const hint(
-          hpx::threads::thread_schedule_hint_mode_thread,
-          core_offset_ + std::floor(double(num_tasks_spawned % num_tasks_) /
-                                    num_tasks_per_core_));
-
-      hpx::threads::register_thread_nullary(
-          [&, s]() {
-            hpx::util::invoke(f, s, ts...);
-            l.count_down(1);
-          },
-          desc, hpx::threads::pending, false,
-          hpx::threads::thread_priority_normal, hint,
-          hpx::threads::thread_stacksize_default);
-
-      ++num_tasks_spawned;
-    }
-
-    // NOTE: We block here to avoid extra synchronization. Since this executor
-    // is only used in the HPX backend we get away with this.
-    l.wait();
-
-    return {};
-  }
-
- private:
-  std::size_t num_tasks_;
-  std::size_t core_offset_;
-  std::size_t num_cores_;
-  double num_tasks_per_core_;
-  mutable std::size_t num_tasks_spawned_;
-};
-
-}  // namespace Impl
-}  // namespace Kokkos
-
-namespace hpx {
-namespace parallel {
-namespace execution {
-
-template <>
-struct is_one_way_executor<Kokkos::Impl::ChunkedRoundRobinExecutor>
-    : std::true_type {};
-
-template <>
-struct is_two_way_executor<Kokkos::Impl::ChunkedRoundRobinExecutor>
-    : std::true_type {};
-
-template <>
-struct is_bulk_two_way_executor<Kokkos::Impl::ChunkedRoundRobinExecutor>
-    : std::true_type {};
-
-}  // namespace execution
-}  // namespace parallel
-}  // namespace hpx
-
-#endif
diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp b/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp
index 7bb3ca5d007023d99314c8d45de748da7836136d..d77b1c2c748a272a288ff5059a6fecd89312a965 100644
--- a/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp
+++ b/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp
@@ -50,11 +50,10 @@
 
 #include <Kokkos_TaskScheduler_fwd.hpp>
 
-#include <HPX/Kokkos_HPX_ChunkedRoundRobinExecutor.hpp>
 #include <Kokkos_HPX.hpp>
 
-#include <hpx/apply.hpp>
-#include <hpx/lcos/local/latch.hpp>
+#include <hpx/local/execution.hpp>
+#include <hpx/local/future.hpp>
 
 #include <type_traits>
 
@@ -89,8 +88,14 @@ class TaskQueueSpecialization<
 
   // Must provide task queue execution function
   void execute_task() const {
-    using hpx::apply;
-    using hpx::lcos::local::latch;
+    // See [note 1] in Kokkos_HPX.hpp for an explanation. The work graph policy
+    // does not store an execution space instance, so we only need to reset the
+    // parallel region count here.
+    Kokkos::Experimental::HPX::reset_count_on_exit_parallel reset_count_on_exit;
+
+    using hpx::for_loop;
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
     using task_base_type = typename scheduler_type::task_base_type;
 
     const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
@@ -100,51 +105,39 @@ class TaskQueueSpecialization<
 
     auto &queue = scheduler->queue();
 
-    latch num_tasks_remaining(num_worker_threads);
-    ChunkedRoundRobinExecutor exec(num_worker_threads);
-
-    for (int thread = 0; thread < num_worker_threads; ++thread) {
-      apply(exec, [this, &num_tasks_remaining, &queue, &buffer,
-                   num_worker_threads]() {
-        // NOTE: This implementation has been simplified based on the
-        // assumption that team_size = 1. The HPX backend currently only
-        // supports a team size of 1.
-        std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id();
-
-        buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id());
-        HPXTeamMember member(
-            TeamPolicyInternal<Kokkos::Experimental::HPX>(
-                Kokkos::Experimental::HPX(), num_worker_threads, 1),
-            0, t, buffer.get(t), 512);
-
-        member_type single_exec(*scheduler, member);
-        member_type &team_exec = single_exec;
-
-        auto &team_scheduler = team_exec.scheduler();
-        auto current_task    = OptionalRef<task_base_type>(nullptr);
-
-        while (!queue.is_done()) {
-          current_task =
-              queue.pop_ready_task(team_scheduler.team_scheduler_info());
-
-          if (current_task) {
-            KOKKOS_ASSERT(current_task->is_single_runnable() ||
-                          current_task->is_team_runnable());
-            current_task->as_runnable_task().run(single_exec);
-            queue.complete((*std::move(current_task)).as_runnable_task(),
-                           team_scheduler.team_scheduler_info());
-          }
-        }
-
-        num_tasks_remaining.count_down(1);
-      });
-    }
-
-    num_tasks_remaining.wait();
-
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
-    Kokkos::Experimental::HPX::impl_decrement_active_parallel_region_count();
-#endif
+    for_loop(par.with(static_chunk_size(1)), 0, num_worker_threads,
+             [this, &queue, &buffer, num_worker_threads](int) {
+               // NOTE: This implementation has been simplified based on the
+               // assumption that team_size = 1. The HPX backend currently only
+               // supports a team size of 1.
+               std::size_t t =
+                   Kokkos::Experimental::HPX::impl_hardware_thread_id();
+
+               buffer.get(t);
+               HPXTeamMember member(
+                   TeamPolicyInternal<Kokkos::Experimental::HPX>(
+                       Kokkos::Experimental::HPX(), num_worker_threads, 1),
+                   0, t, buffer.get(t), 512);
+
+               member_type single_exec(*scheduler, member);
+               member_type &team_exec = single_exec;
+
+               auto &team_scheduler = team_exec.scheduler();
+               auto current_task    = OptionalRef<task_base_type>(nullptr);
+
+               while (!queue.is_done()) {
+                 current_task =
+                     queue.pop_ready_task(team_scheduler.team_scheduler_info());
+
+                 if (current_task) {
+                   KOKKOS_ASSERT(current_task->is_single_runnable() ||
+                                 current_task->is_team_runnable());
+                   current_task->as_runnable_task().run(single_exec);
+                   queue.complete((*std::move(current_task)).as_runnable_task(),
+                                  team_scheduler.team_scheduler_info());
+                 }
+               }
+             });
   }
 
   static uint32_t get_max_team_count(execution_space const &espace) {
@@ -216,13 +209,22 @@ class TaskQueueSpecializationConstrained<
     task_queue.scheduler = &scheduler;
     Kokkos::Impl::dispatch_execute_task(&task_queue,
                                         Kokkos::Experimental::HPX());
-    Kokkos::Experimental::HPX().fence()"Kokkos::Impl::TaskQueueSpecializationConstrained::execute: fence after task execution";
+    Kokkos::Experimental::HPX().fence(
+        "Kokkos::Impl::TaskQueueSpecialization<SimpleTask>::execute: fence "
+        "after task execution");
   }
 
   // Must provide task queue execution function
   void execute_task() const {
-    using hpx::apply;
-    using hpx::lcos::local::latch;
+    // See [note 1] in Kokkos_HPX.hpp for an explanation. The work graph policy
+    // does not store an execution space instance, so we only need to reset the
+    // parallel region count here.
+    Kokkos::Experimental::HPX::reset_count_on_exit_parallel reset_count_on_exit;
+
+    using hpx::for_loop;
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
+
     using task_base_type = typename scheduler_type::task_base;
     using queue_type     = typename scheduler_type::queue_type;
 
@@ -236,58 +238,47 @@ class TaskQueueSpecializationConstrained<
     auto &queue = scheduler->queue();
     queue.initialize_team_queues(num_worker_threads);
 
-    latch num_tasks_remaining(num_worker_threads);
-    ChunkedRoundRobinExecutor exec(num_worker_threads);
-
-    for (int thread = 0; thread < num_worker_threads; ++thread) {
-      apply(exec, [this, &num_tasks_remaining, &buffer, num_worker_threads]() {
-        // NOTE: This implementation has been simplified based on the assumption
-        // that team_size = 1. The HPX backend currently only supports a team
-        // size of 1.
-        std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id();
-
-        buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id());
-        HPXTeamMember member(
-            TeamPolicyInternal<Kokkos::Experimental::HPX>(
-                Kokkos::Experimental::HPX(), num_worker_threads, 1),
-            0, t, buffer.get(t), 512);
-
-        member_type single_exec(*scheduler, member);
-        member_type &team_exec = single_exec;
-
-        auto &team_queue     = team_exec.scheduler().queue();
-        task_base_type *task = no_more_tasks_sentinel;
-
-        do {
-          if (task != no_more_tasks_sentinel && task != end) {
-            team_queue.complete(task);
-          }
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
+
+    for_loop(
+        par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads,
+        [this, &buffer, num_worker_threads](int t) {
+          // NOTE: This implementation has been simplified based on the
+          // assumption that team_size = 1. The HPX backend currently only
+          // supports a team size of 1.
+          buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id());
+          HPXTeamMember member(
+              TeamPolicyInternal<Kokkos::Experimental::HPX>(
+                  Kokkos::Experimental::HPX(), num_worker_threads, 1),
+              0, t, buffer.get(t), 512);
+
+          member_type single_exec(*scheduler, member);
+          member_type &team_exec = single_exec;
+
+          auto &team_queue     = team_exec.scheduler().queue();
+          task_base_type *task = no_more_tasks_sentinel;
+
+          do {
+            if (task != no_more_tasks_sentinel && task != end) {
+              team_queue.complete(task);
+            }
 
-          if (*((volatile int *)&team_queue.m_ready_count) > 0) {
-            task = end;
-            for (int i = 0; i < queue_type::NumQueue && end == task; ++i) {
-              for (int j = 0; j < 2 && end == task; ++j) {
-                task = queue_type::pop_ready_task(&team_queue.m_ready[i][j]);
+            if (*((volatile int *)&team_queue.m_ready_count) > 0) {
+              task = end;
+              for (int i = 0; i < queue_type::NumQueue && end == task; ++i) {
+                for (int j = 0; j < 2 && end == task; ++j) {
+                  task = queue_type::pop_ready_task(&team_queue.m_ready[i][j]);
+                }
               }
+            } else {
+              task = team_queue.attempt_to_steal_task();
             }
-          } else {
-            task = team_queue.attempt_to_steal_task();
-          }
 
-          if (task != no_more_tasks_sentinel && task != end) {
-            (*task->m_apply)(task, &single_exec);
-          }
-        } while (task != no_more_tasks_sentinel);
-
-        num_tasks_remaining.count_down(1);
-      });
-    }
-
-    num_tasks_remaining.wait();
-
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
-    Kokkos::Experimental::HPX::impl_decrement_active_parallel_region_count();
-#endif
+            if (task != no_more_tasks_sentinel && task != end) {
+              (*task->m_apply)(task, &single_exec);
+            }
+          } while (task != no_more_tasks_sentinel);
+        });
   }
 
   template <typename TaskType>
diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp b/packages/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp
index d7e13e28f054569926382933232b7119ca96a192..a3d4a6a60441827f82fff27f49daf63d463fa556 100644
--- a/packages/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp
+++ b/packages/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp
@@ -47,8 +47,8 @@
 
 #include <Kokkos_HPX.hpp>
 
-#include <hpx/apply.hpp>
-#include <hpx/lcos/local/latch.hpp>
+#include <hpx/local/algorithm.hpp>
+#include <hpx/local/execution.hpp>
 
 namespace Kokkos {
 namespace Impl {
@@ -85,31 +85,31 @@ class ParallelFor<FunctorType, Kokkos::WorkGraphPolicy<Traits...>,
   }
 
   void execute_task() const {
-    const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
-
-    using hpx::apply;
-    using hpx::lcos::local::latch;
+    // See [note 1] in Kokkos_HPX.hpp for an explanation. The work graph policy
+    // does not store an execution space instance, so we only need to reset the
+    // parallel region count here.
+    Kokkos::Experimental::HPX::reset_count_on_exit_parallel reset_count_on_exit;
 
-    latch num_tasks_remaining(num_worker_threads);
-    ChunkedRoundRobinExecutor exec(num_worker_threads);
+    const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
 
-    for (int thread = 0; thread < num_worker_threads; ++thread) {
-      apply(exec, [this, &num_tasks_remaining]() {
-        std::int32_t w = m_policy.pop_work();
-        while (w != Policy::COMPLETED_TOKEN) {
-          if (w != Policy::END_TOKEN) {
-            execute_functor<WorkTag>(w);
-            m_policy.completed_work(w);
-          }
+    using hpx::for_loop;
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
 
-          w = m_policy.pop_work();
-        }
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
 
-        num_tasks_remaining.count_down(1);
-      });
-    }
+    for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads,
+             [this](int) {
+               std::int32_t w = m_policy.pop_work();
+               while (w != Policy::COMPLETED_TOKEN) {
+                 if (w != Policy::END_TOKEN) {
+                   execute_functor<WorkTag>(w);
+                   m_policy.completed_work(w);
+                 }
 
-    num_tasks_remaining.wait();
+                 w = m_policy.pop_work();
+               }
+             });
   }
 
   inline ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy)
diff --git a/packages/kokkos/core/src/Kokkos_Array.hpp b/packages/kokkos/core/src/Kokkos_Array.hpp
index 0d1408df1d2486f00a947255fae54497020d2fa6..d2098d0b1a8c17eb492e812cc040b0e858eea9c4 100644
--- a/packages/kokkos/core/src/Kokkos_Array.hpp
+++ b/packages/kokkos/core/src/Kokkos_Array.hpp
@@ -66,14 +66,12 @@ struct ArrayBoundsCheck<Integral, true> {
   KOKKOS_INLINE_FUNCTION
   ArrayBoundsCheck(Integral i, size_t N) {
     if (i < 0) {
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-      std::string s = "Kokkos::Array: index ";
-      s += std::to_string(i);
-      s += " < 0";
-      Kokkos::Impl::throw_runtime_exception(s);
-#else
-      Kokkos::abort("Kokkos::Array: negative index in device code");
-#endif
+      KOKKOS_IF_ON_HOST((std::string s = "Kokkos::Array: index ";
+                         s += std::to_string(i); s += " < 0";
+                         Kokkos::Impl::throw_runtime_exception(s);))
+
+      KOKKOS_IF_ON_DEVICE(
+          (Kokkos::abort("Kokkos::Array: negative index in device code");))
     }
     ArrayBoundsCheck<Integral, false>(i, N);
   }
@@ -84,15 +82,12 @@ struct ArrayBoundsCheck<Integral, false> {
   KOKKOS_INLINE_FUNCTION
   ArrayBoundsCheck(Integral i, size_t N) {
     if (size_t(i) >= N) {
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-      std::string s = "Kokkos::Array: index ";
-      s += std::to_string(i);
-      s += " >= ";
-      s += std::to_string(N);
-      Kokkos::Impl::throw_runtime_exception(s);
-#else
-      Kokkos::abort("Kokkos::Array: index >= size");
-#endif
+      KOKKOS_IF_ON_HOST((std::string s = "Kokkos::Array: index ";
+                         s += std::to_string(i); s += " >= ";
+                         s += std::to_string(N);
+                         Kokkos::Impl::throw_runtime_exception(s);))
+
+      KOKKOS_IF_ON_DEVICE((Kokkos::abort("Kokkos::Array: index >= size");))
     }
   }
 };
diff --git a/packages/kokkos/core/src/Kokkos_Atomic.hpp b/packages/kokkos/core/src/Kokkos_Atomic.hpp
index a47208e97782ec424a2a96b5ec4de0c58fe2fef2..b07b5f2f60796aa3ad10bf395e5a2658a9aaff7e 100644
--- a/packages/kokkos/core/src/Kokkos_Atomic.hpp
+++ b/packages/kokkos/core/src/Kokkos_Atomic.hpp
@@ -71,9 +71,6 @@
 #include <Kokkos_Macros.hpp>
 
 #ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
-#ifdef KOKKOS_ENABLE_OPENMPTARGET
-#define DESUL_HAVE_OPENMP_ATOMICS
-#endif
 #include <Kokkos_Atomics_Desul_Wrapper.hpp>
 #include <Kokkos_Atomics_Desul_Volatile_Wrapper.hpp>
 #include <impl/Kokkos_Utilities.hpp>
@@ -146,8 +143,7 @@ KOKKOS_INLINE_FUNCTION T desul_atomic_compare_exchange(
 
 #define KOKKOS_ENABLE_CUDA_ATOMICS
 
-#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU) || \
-    defined(KOKKOS_IMPL_ENABLE_OVERLOAD_HOST_DEVICE)
+#elif defined(KOKKOS_ENABLE_HIP)
 
 #define KOKKOS_ENABLE_HIP_ATOMICS
 
diff --git a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..81ae34b9e03d3843c9c657ce3b2fccec60b70c9b
--- /dev/null
+++ b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp
@@ -0,0 +1,58 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_ATOMICS_DESUL_CONFIG_HPP
+#define KOKKOS_ATOMICS_DESUL_CONFIG_HPP
+
+#include <Kokkos_Macros.hpp>
+
+#ifdef KOKKOS_ENABLE_OPENMPTARGET
+#define DESUL_HAVE_OPENMP_ATOMICS
+#endif
+
+#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \
+    defined(KOKKOS_ARCH_PASCAL)
+#define DESUL_CUDA_ARCH_IS_PRE_VOLTA
+#endif
+
+#endif  // KOKKOS_ATOMICS_DESUL_CONFIG_HPP
diff --git a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp
index 0bcb3ea388beeaf72c862f9519572e5d9e13a530..d2bc9df89282989469cfe408d86e087decd0f44e 100644
--- a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp
+++ b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp
@@ -2,20 +2,27 @@
 #define KOKKOS_DESUL_ATOMICS_VOLATILE_WRAPPER_HPP_
 #include <Kokkos_Macros.hpp>
 #ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+#include <Kokkos_Atomics_Desul_Config.hpp>
 #include <desul/atomics.hpp>
 
+#ifdef KOKKOS_INTERNAL_NOT_PARALLEL
+#define KOKKOS_DESUL_MEM_SCOPE desul::MemoryScopeCaller()
+#else
+#define KOKKOS_DESUL_MEM_SCOPE desul::MemoryScopeDevice()
+#endif
+
 // clang-format off
-namespace Kokkos { 
+namespace Kokkos {
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_load(volatile T* const dest) { return desul::atomic_load(const_cast<T*>(dest), desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_load(volatile T* const dest) { return desul::atomic_load(const_cast<T*>(dest), desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_store(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_store(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_store(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_store(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 // atomic_fetch_op
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_add (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_add (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_add (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_add (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 #ifdef DESUL_IMPL_ATOMIC_CUDA_USE_DOUBLE_ATOMICADD
 KOKKOS_INLINE_FUNCTION
@@ -23,7 +30,7 @@ double atomic_fetch_add(volatile double* const dest, double val) {
   #ifdef __CUDA_ARCH__
   return atomicAdd(const_cast<double*>(dest),val);
   #else
-  return desul::atomic_fetch_add (const_cast<double*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice());
+  return desul::atomic_fetch_add (const_cast<double*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE);
   #endif
 };
 
@@ -32,158 +39,160 @@ double atomic_fetch_sub(volatile double* const dest, double val) {
   #ifdef __CUDA_ARCH__
   return atomicAdd(const_cast<double*>(dest),-val);
   #else
-  return desul::atomic_fetch_sub (const_cast<double*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice());
+  return desul::atomic_fetch_sub (const_cast<double*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE);
   #endif
 };
 #endif
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_sub (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_sub (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_sub (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_sub (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_max (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_max (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_max (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_max (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_min (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_min (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_min (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_min (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_mul (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_mul (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_mul (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_mul (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_div (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_div (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_div (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_div (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_mod (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_mod (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_mod (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_mod (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_and (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_and (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_and (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_and (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_or  (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_or  (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_or  (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_or  (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_xor (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_xor (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_xor (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_xor (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_nand(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_nand(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_nand(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_nand(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_lshift(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_lshift(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_lshift(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_lshift(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_rshift(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_rshift(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_rshift(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_rshift(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_inc(volatile T* const dest) { return desul::atomic_fetch_inc(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_inc(volatile T* const dest) { return desul::atomic_fetch_inc(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_dec(volatile T* const dest) { return desul::atomic_fetch_dec(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_dec(volatile T* const dest) { return desul::atomic_fetch_dec(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 
 // atomic_op_fetch
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_add_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_add_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_add_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_add_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_sub_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_sub_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_sub_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_sub_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_max_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_max_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_max_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_max_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_min_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_min_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_min_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_min_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_mul_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mul_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_mul_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mul_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_div_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_div_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_div_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_div_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_mod_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mod_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_mod_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mod_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_and_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_and_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_and_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_and_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_or_fetch  (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_or_fetch  (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_or_fetch  (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_or_fetch  (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_xor_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_xor_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_xor_fetch (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_xor_fetch (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_nand_fetch(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_nand_fetch(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_nand_fetch(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_nand_fetch(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_lshift_fetch(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_lshift_fetch(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_lshift_fetch(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_lshift_fetch(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_rshift_fetch(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_rshift_fetch(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_rshift_fetch(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_rshift_fetch(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_inc_fetch(volatile T* const dest) { return desul::atomic_inc_fetch(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_inc_fetch(volatile T* const dest) { return desul::atomic_inc_fetch(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_dec_fetch(volatile T* const dest) { return desul::atomic_dec_fetch(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_dec_fetch(volatile T* const dest) { return desul::atomic_dec_fetch(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 
 // atomic_op
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_add(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_add (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_add(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_add (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_sub(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_sub (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_sub(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_sub (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_mul(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mul (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_mul(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mul (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_div(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_div (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_div(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_div (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_min(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_min (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_min(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_min (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_max(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_max (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_max(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_max (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 // FIXME: Desul doesn't have atomic_and yet so call fetch_and
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_and(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { (void) desul::atomic_fetch_and (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_and(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { (void) desul::atomic_fetch_and (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 // FIXME: Desul doesn't have atomic_or yet so call fetch_or
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_or (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { (void) desul::atomic_fetch_or  (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_or (volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { (void) desul::atomic_fetch_or  (const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_inc(volatile T* const dest) { return desul::atomic_inc(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_inc(volatile T* const dest) { return desul::atomic_inc(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_dec(volatile T* const dest) { return desul::atomic_dec(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_dec(volatile T* const dest) { return desul::atomic_dec(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_increment(volatile T* const dest) { return desul::atomic_inc(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_increment(volatile T* const dest) { return desul::atomic_inc(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_decrement(volatile T* const dest) { return desul::atomic_dec(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_decrement(volatile T* const dest) { return desul::atomic_dec(const_cast<T*>(dest),desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 // Exchange
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_exchange(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_exchange(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_exchange(volatile T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_exchange(const_cast<T*>(dest), val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
 bool atomic_compare_exchange_strong(volatile T* const dest, T& expected, const T desired) {
   return desul::atomic_compare_exchange_strong(const_cast<T*>(dest),expected, desired,
-                  desul::MemoryOrderRelaxed(), desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice());
+                  desul::MemoryOrderRelaxed(), desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE);
 }
 
 template<class T> KOKKOS_INLINE_FUNCTION
 T atomic_compare_exchange(volatile T* const dest, const T compare, const T desired) {
   return desul::atomic_compare_exchange(const_cast<T*>(dest),compare, desired,
-                  desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice());
+                  desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE);
 }
 
 }
+#undef KOKKOS_DESUL_MEM_SCOPE
+
 // clang-format on
 #endif  // KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
 #endif
diff --git a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp
index 3a182a6a22b56ca424d13e3d9f0835070f1cb2f6..939cf950b57932909a2cc2b42473429acc7c3d3c 100644
--- a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp
+++ b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp
@@ -3,6 +3,7 @@
 #include <Kokkos_Macros.hpp>
 
 #ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+#include <Kokkos_Atomics_Desul_Config.hpp>
 #include <desul/atomics.hpp>
 
 #include <impl/Kokkos_Atomic_Memory_Order.hpp>
@@ -29,29 +30,35 @@ inline const char* atomic_query_version() { return "KOKKOS_DESUL_ATOMICS"; }
 #endif
 // ============================================================
 
+#ifdef KOKKOS_INTERNAL_NOT_PARALLEL
+#define KOKKOS_DESUL_MEM_SCOPE desul::MemoryScopeCaller()
+#else
+#define KOKKOS_DESUL_MEM_SCOPE desul::MemoryScopeDevice()
+#endif
+
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_load(T* const dest) { return desul::atomic_load(dest, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_load(T* const dest) { return desul::atomic_load(dest, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_store(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_store(dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_store(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_store(dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
 void atomic_assign(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { atomic_store(dest,val); }
 
 KOKKOS_INLINE_FUNCTION
 void memory_fence() {
-  desul::atomic_thread_fence(desul::MemoryOrderSeqCst(), desul::MemoryScopeDevice());
+  desul::atomic_thread_fence(desul::MemoryOrderSeqCst(), KOKKOS_DESUL_MEM_SCOPE);
 }
 
 KOKKOS_INLINE_FUNCTION
-void load_fence() { return desul::atomic_thread_fence(desul::MemoryOrderAcquire(), desul::MemoryScopeDevice()); }
+void load_fence() { return desul::atomic_thread_fence(desul::MemoryOrderAcquire(), KOKKOS_DESUL_MEM_SCOPE); }
 
 KOKKOS_INLINE_FUNCTION
-void store_fence() { return desul::atomic_thread_fence(desul::MemoryOrderRelease(), desul::MemoryScopeDevice()); }
+void store_fence() { return desul::atomic_thread_fence(desul::MemoryOrderRelease(), KOKKOS_DESUL_MEM_SCOPE); }
 
 // atomic_fetch_op
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_add (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_add (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_add (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_add (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 #ifdef DESUL_IMPL_ATOMIC_CUDA_USE_DOUBLE_ATOMICADD
 KOKKOS_INLINE_FUNCTION
@@ -59,7 +66,7 @@ double atomic_fetch_add(double* const dest, double val) {
   #ifdef __CUDA_ARCH__
   return atomicAdd(dest,val);
   #else
-  return desul::atomic_fetch_add (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice());
+  return desul::atomic_fetch_add (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE);
   #endif
 };
 
@@ -68,156 +75,156 @@ double atomic_fetch_sub(double* const dest, double val) {
   #ifdef __CUDA_ARCH__
   return atomicAdd(dest,-val);
   #else
-  return desul::atomic_fetch_sub (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice());
+  return desul::atomic_fetch_sub (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE);
   #endif
 };
 #endif
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_sub (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_sub (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_sub (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_sub (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_max (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_max (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_max (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_max (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_min (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_min (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_min (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_min (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_mul (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_mul (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_mul (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_mul (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_div (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_div (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_div (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_div (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_mod (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_mod (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_mod (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_mod (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_and (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_and (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_and (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_and (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_or  (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_or  (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_or  (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_or  (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_xor (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_xor (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_xor (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_xor (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_nand(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_nand(dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_nand(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_nand(dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_lshift(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_lshift(dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_lshift(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_lshift(dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_rshift(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_rshift(dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_rshift(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_fetch_rshift(dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_inc(T* const dest) { return desul::atomic_fetch_inc(dest, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_inc(T* const dest) { return desul::atomic_fetch_inc(dest, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_fetch_dec(T* const dest) { return desul::atomic_fetch_dec(dest, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_fetch_dec(T* const dest) { return desul::atomic_fetch_dec(dest, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 
 // atomic_op_fetch
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_add_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_add_fetch (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_add_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_add_fetch (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_sub_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_sub_fetch (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_sub_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_sub_fetch (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_max_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_max_fetch (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_max_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_max_fetch (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_min_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_min_fetch (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_min_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_min_fetch (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_mul_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mul_fetch (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_mul_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mul_fetch (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_div_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_div_fetch (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_div_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_div_fetch (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_mod_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mod_fetch (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_mod_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mod_fetch (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_and_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_and_fetch (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_and_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_and_fetch (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_or_fetch  (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_or_fetch  (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_or_fetch  (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_or_fetch  (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_xor_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_xor_fetch (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_xor_fetch (T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_xor_fetch (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_nand_fetch(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_nand_fetch(dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_nand_fetch(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_nand_fetch(dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_lshift_fetch(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_lshift_fetch(dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_lshift_fetch(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_lshift_fetch(dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_rshift_fetch(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_rshift_fetch(dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_rshift_fetch(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_rshift_fetch(dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_inc_fetch(T* const dest) { return desul::atomic_inc_fetch(dest, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_inc_fetch(T* const dest) { return desul::atomic_inc_fetch(dest, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_dec_fetch(T* const dest) { return desul::atomic_dec_fetch(dest, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_dec_fetch(T* const dest) { return desul::atomic_dec_fetch(dest, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 
 // atomic_op
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_add(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_add (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_add(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_add (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_sub(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_sub (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_sub(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_sub (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_mul(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mul (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_mul(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_mul (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_div(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_div (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_div(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_div (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_min(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_min (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_min(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_min (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_max(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_max (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_max(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_max (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 // FIXME: Desul doesn't have atomic_and yet so call fetch_and
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_and(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { (void) desul::atomic_fetch_and (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_and(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { (void) desul::atomic_fetch_and (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 // FIXME: Desul doesn't have atomic_or yet so call fetch_or
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_or(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val)  { (void) desul::atomic_fetch_or (dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_or(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val)  { (void) desul::atomic_fetch_or (dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_inc(T* const dest) { return desul::atomic_inc(dest, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_inc(T* const dest) { return desul::atomic_inc(dest, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_dec(T* const dest) { return desul::atomic_dec(dest, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_dec(T* const dest) { return desul::atomic_dec(dest, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_increment(T* const dest) { return desul::atomic_inc(dest, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_increment(T* const dest) { return desul::atomic_inc(dest, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
-void atomic_decrement(T* const dest) { return desul::atomic_dec(dest, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+void atomic_decrement(T* const dest) { return desul::atomic_dec(dest, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 // Exchange
 
 template<class T> KOKKOS_INLINE_FUNCTION
-T atomic_exchange(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_exchange(dest, val, desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice()); }
+T atomic_exchange(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> val) { return desul::atomic_exchange(dest, val, desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE); }
 
 template<class T> KOKKOS_INLINE_FUNCTION
 bool atomic_compare_exchange_strong(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> expected, desul::Impl::dont_deduce_this_parameter_t<const T> desired) {
   T expected_ref = expected;
   return desul::atomic_compare_exchange_strong(dest, expected_ref, desired,
-                  desul::MemoryOrderRelaxed(), desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice());
+                  desul::MemoryOrderRelaxed(), desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE);
 }
 
 template<class T> KOKKOS_INLINE_FUNCTION
 T atomic_compare_exchange(T* const dest, desul::Impl::dont_deduce_this_parameter_t<const T> compare, desul::Impl::dont_deduce_this_parameter_t<const T> desired) {
   return desul::atomic_compare_exchange(dest, compare, desired,
-                  desul::MemoryOrderRelaxed(), desul::MemoryScopeDevice());
+                  desul::MemoryOrderRelaxed(), KOKKOS_DESUL_MEM_SCOPE);
 }
 
 namespace Impl {
@@ -250,22 +257,25 @@ namespace Impl {
     return desul::atomic_compare_exchange_strong(dest, expected, desired,
                   typename KokkosToDesulMemoryOrder<MemOrderSuccess>::type(),
                   typename KokkosToDesulMemoryOrder<MemOrderFailure>::type(),
-                  desul::MemoryScopeDevice());
+                  KOKKOS_DESUL_MEM_SCOPE);
 
   }
   template<class T, class MemoryOrder>
   KOKKOS_INLINE_FUNCTION
   T atomic_load(const T* const src, MemoryOrder) {
-    return desul::atomic_load(src, typename KokkosToDesulMemoryOrder<MemoryOrder>::type(), desul::MemoryScopeDevice());
+    return desul::atomic_load(src, typename KokkosToDesulMemoryOrder<MemoryOrder>::type(), KOKKOS_DESUL_MEM_SCOPE);
   }
   template<class T, class MemoryOrder>
   KOKKOS_INLINE_FUNCTION
   void atomic_store(T* const src, const T val, MemoryOrder) {
-    return desul::atomic_store(src, val, typename KokkosToDesulMemoryOrder<MemoryOrder>::type(), desul::MemoryScopeDevice());
+    return desul::atomic_store(src, val, typename KokkosToDesulMemoryOrder<MemoryOrder>::type(), KOKKOS_DESUL_MEM_SCOPE);
   }
 }
 
 }
+
+#undef KOKKOS_DESUL_MEM_SCOPE
+
 // clang-format on
 #endif  // KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
 #endif
diff --git a/packages/kokkos/core/src/Kokkos_Concepts.hpp b/packages/kokkos/core/src/Kokkos_Concepts.hpp
index 97137387f264a869dacb6b3b5abdd7fa5a9ba5ff..5a1a571e43e74ce51aed4db7d9bbada584083f06 100644
--- a/packages/kokkos/core/src/Kokkos_Concepts.hpp
+++ b/packages/kokkos/core/src/Kokkos_Concepts.hpp
@@ -50,6 +50,8 @@
 // Needed for 'is_space<S>::host_mirror_space
 #include <Kokkos_Core_fwd.hpp>
 
+#include <Kokkos_DetectionIdiom.hpp>
+
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
@@ -146,24 +148,20 @@ struct LaunchBounds {
 
 namespace Kokkos {
 
-#define KOKKOS_IMPL_IS_CONCEPT(CONCEPT)                                        \
-  template <typename T>                                                        \
-  struct is_##CONCEPT {                                                        \
-   private:                                                                    \
-    template <typename, typename = std::true_type>                             \
-    struct have : std::false_type {};                                          \
-    template <typename U>                                                      \
-    struct have<U, typename std::is_base_of<typename U::CONCEPT, U>::type>     \
-        : std::true_type {};                                                   \
-    template <typename U>                                                      \
-    struct have<U,                                                             \
-                typename std::is_base_of<typename U::CONCEPT##_type, U>::type> \
-        : std::true_type {};                                                   \
-                                                                               \
-   public:                                                                     \
-    static constexpr bool value =                                              \
-        is_##CONCEPT::template have<typename std::remove_cv<T>::type>::value;  \
-    constexpr operator bool() const noexcept { return value; }                 \
+#define KOKKOS_IMPL_IS_CONCEPT(CONCEPT)                        \
+  template <typename T>                                        \
+  struct is_##CONCEPT {                                        \
+   private:                                                    \
+    template <typename U>                                      \
+    using have_t = typename U::CONCEPT;                        \
+    template <typename U>                                      \
+    using have_type_t = typename U::CONCEPT##_type;            \
+                                                               \
+   public:                                                     \
+    static constexpr bool value =                              \
+        std::is_base_of<detected_t<have_t, T>, T>::value ||    \
+        std::is_base_of<detected_t<have_type_t, T>, T>::value; \
+    constexpr operator bool() const noexcept { return value; } \
   };
 
 // Public concept:
diff --git a/packages/kokkos/core/src/Kokkos_CopyViews.hpp b/packages/kokkos/core/src/Kokkos_CopyViews.hpp
index 16946dd602b536793f8746165ebd4bb82631742b..ac516e31ea66672a043b87d325020204af96ad88 100644
--- a/packages/kokkos/core/src/Kokkos_CopyViews.hpp
+++ b/packages/kokkos/core/src/Kokkos_CopyViews.hpp
@@ -1470,14 +1470,13 @@ inline void deep_copy(
 
   if (src.data() == nullptr) {
     Kokkos::fence("Kokkos::deep_copy: copy into scalar, src is null");
-    if (Kokkos::Tools::Experimental::get_callbacks().end_deep_copy != nullptr) {
-      Kokkos::Profiling::endDeepCopy();
-    }
-    return;
+  } else {
+    Kokkos::fence("Kokkos::deep_copy: copy into scalar, pre copy fence");
+    Kokkos::Impl::DeepCopy<HostSpace, src_memory_space>(&dst, src.data(),
+                                                        sizeof(ST));
+    Kokkos::fence("Kokkos::deep_copy: copy into scalar, post copy fence");
   }
 
-  Kokkos::Impl::DeepCopy<HostSpace, src_memory_space>(&dst, src.data(),
-                                                      sizeof(ST));
   if (Kokkos::Tools::Experimental::get_callbacks().end_deep_copy != nullptr) {
     Kokkos::Profiling::endDeepCopy();
   }
@@ -1733,7 +1732,7 @@ template <class TeamType, class DT, class... DP, class ST, class... SP>
 void KOKKOS_INLINE_FUNCTION
 local_deep_copy_contiguous(const TeamType& team, const View<DT, DP...>& dst,
                            const View<ST, SP...>& src) {
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, src.span()),
+  Kokkos::parallel_for(Kokkos::TeamVectorRange(team, src.span()),
                        [&](const int& i) { dst.data()[i] = src.data()[i]; });
 }
 //----------------------------------------------------------------------------
@@ -1759,7 +1758,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
   const size_t N = dst.extent(0);
 
   team.team_barrier();
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N),
+  Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N),
                        [&](const int& i) { dst(i) = src(i); });
   team.team_barrier();
 }
@@ -1783,7 +1782,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0      = i % dst.extent(0);
       int i1      = i / dst.extent(0);
       dst(i0, i1) = src(i0, i1);
@@ -1811,7 +1810,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0          = i % dst.extent(0);
       int itmp        = i / dst.extent(0);
       int i1          = itmp % dst.extent(1);
@@ -1842,7 +1841,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0              = i % dst.extent(0);
       int itmp            = i / dst.extent(0);
       int i1              = itmp % dst.extent(1);
@@ -1875,7 +1874,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0                  = i % dst.extent(0);
       int itmp                = i / dst.extent(0);
       int i1                  = itmp % dst.extent(1);
@@ -1910,7 +1909,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0                      = i % dst.extent(0);
       int itmp                    = i / dst.extent(0);
       int i1                      = itmp % dst.extent(1);
@@ -1948,7 +1947,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0                          = i % dst.extent(0);
       int itmp                        = i / dst.extent(0);
       int i1                          = itmp % dst.extent(1);
@@ -2118,15 +2117,21 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
 template <class TeamType, class DT, class... DP>
 void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(
     const TeamType& team, const View<DT, DP...>& dst,
-    typename ViewTraits<DT, DP...>::const_value_type& value) {
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, dst.span()),
+    typename ViewTraits<DT, DP...>::const_value_type& value,
+    typename std::enable_if<std::is_same<
+        typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
+        nullptr) {
+  Kokkos::parallel_for(Kokkos::TeamVectorRange(team, dst.span()),
                        [&](const int& i) { dst.data()[i] = value; });
 }
 //----------------------------------------------------------------------------
 template <class DT, class... DP>
 void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(
     const View<DT, DP...>& dst,
-    typename ViewTraits<DT, DP...>::const_value_type& value) {
+    typename ViewTraits<DT, DP...>::const_value_type& value,
+    typename std::enable_if<std::is_same<
+        typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
+        nullptr) {
   for (size_t i = 0; i < dst.span(); ++i) {
     dst.data()[i] = value;
   }
@@ -2145,7 +2150,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
   const size_t N = dst.extent(0);
 
   team.team_barrier();
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N),
+  Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N),
                        [&](const int& i) { dst(i) = value; });
   team.team_barrier();
 }
@@ -2168,7 +2173,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0      = i % dst.extent(0);
       int i1      = i / dst.extent(0);
       dst(i0, i1) = value;
@@ -2195,7 +2200,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0          = i % dst.extent(0);
       int itmp        = i / dst.extent(0);
       int i1          = itmp % dst.extent(1);
@@ -2225,7 +2230,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0              = i % dst.extent(0);
       int itmp            = i / dst.extent(0);
       int i1              = itmp % dst.extent(1);
@@ -2257,7 +2262,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0                  = i % dst.extent(0);
       int itmp                = i / dst.extent(0);
       int i1                  = itmp % dst.extent(1);
@@ -2291,7 +2296,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0                      = i % dst.extent(0);
       int itmp                    = i / dst.extent(0);
       int i1                      = itmp % dst.extent(1);
@@ -2328,7 +2333,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy(
     team.team_barrier();
   } else {
     team.team_barrier();
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) {
+    Kokkos::parallel_for(Kokkos::TeamVectorRange(team, N), [&](const int& i) {
       int i0                          = i % dst.extent(0);
       int itmp                        = i / dst.extent(0);
       int i1                          = itmp % dst.extent(1);
@@ -2884,6 +2889,10 @@ bool size_mismatch(const ViewType& view, unsigned int max_extent,
     if (new_extents[dim] != view.extent(dim)) {
       return true;
     }
+  for (unsigned int dim = max_extent; dim < 8; ++dim)
+    if (new_extents[dim] != KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+      return true;
+    }
   return false;
 }
 
@@ -2891,20 +2900,15 @@ bool size_mismatch(const ViewType& view, unsigned int max_extent,
 
 /** \brief  Resize a view with copying old data to new data at the corresponding
  * indices. */
-template <class T, class... P>
+template <class... I, class T, class... P>
 inline typename std::enable_if<
     std::is_same<typename Kokkos::View<T, P...>::array_layout,
                  Kokkos::LayoutLeft>::value ||
     std::is_same<typename Kokkos::View<T, P...>::array_layout,
                  Kokkos::LayoutRight>::value>::type
-resize(Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-       const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-       const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-       const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-       const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-       const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-       const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
-       const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+impl_resize(Kokkos::View<T, P...>& v, const size_t n0, const size_t n1,
+            const size_t n2, const size_t n3, const size_t n4, const size_t n5,
+            const size_t n6, const size_t n7, const I&... arg_prop) {
   using view_type = Kokkos::View<T, P...>;
 
   static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
@@ -2921,7 +2925,8 @@ resize(Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
   const bool sizeMismatch = Impl::size_mismatch(v, v.rank_dynamic, new_extents);
 
   if (sizeMismatch) {
-    view_type v_resized(v.label(), n0, n1, n2, n3, n4, n5, n6, n7);
+    view_type v_resized(view_alloc(v.label(), arg_prop...), n0, n1, n2, n3, n4,
+                        n5, n6, n7);
 
     Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v);
     Kokkos::fence("Kokkos::resize(View)");
@@ -2930,14 +2935,32 @@ resize(Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
   }
 }
 
-/** \brief  Resize a view with copying old data to new data at the corresponding
- * indices. */
-template <class I, class T, class... P>
+template <class T, class... P>
 inline typename std::enable_if<
     std::is_same<typename Kokkos::View<T, P...>::array_layout,
                  Kokkos::LayoutLeft>::value ||
     std::is_same<typename Kokkos::View<T, P...>::array_layout,
                  Kokkos::LayoutRight>::value>::type
+resize(Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+       const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+       const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+       const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+       const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+       const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+       const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+       const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+  impl_resize(v, n0, n1, n2, n3, n4, n5, n6, n7);
+}
+
+/** \brief  Resize a view with copying old data to new data at the corresponding
+ * indices. */
+template <class I, class T, class... P>
+inline typename std::enable_if<
+    Impl::is_view_ctor_property<I>::value &&
+    (std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                  Kokkos::LayoutLeft>::value ||
+     std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                  Kokkos::LayoutRight>::value)>::type
 resize(const I& arg_prop, Kokkos::View<T, P...>& v,
        const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
        const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
@@ -2947,37 +2970,12 @@ resize(const I& arg_prop, Kokkos::View<T, P...>& v,
        const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
        const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
        const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
-  using view_type = Kokkos::View<T, P...>;
-
-  static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
-                "Can only resize managed views");
-
-  // TODO (mfh 27 Jun 2017) If the old View has enough space but just
-  // different dimensions (e.g., if the product of the dimensions,
-  // including extra space for alignment, will not change), then
-  // consider just reusing storage.  For now, Kokkos always
-  // reallocates if any of the dimensions change, even if the old View
-  // has enough space.
-
-  const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7};
-  const bool sizeMismatch = Impl::size_mismatch(v, v.rank_dynamic, new_extents);
-
-  if (sizeMismatch) {
-    view_type v_resized(view_alloc(v.label(), std::forward<const I>(arg_prop)),
-                        n0, n1, n2, n3, n4, n5, n6, n7);
-
-    Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v);
-    // This fence really ought to look for an execution space in
-    // arg_prop, and just fence that if there is one
-    Kokkos::fence("Kokkos::resize(View)");
-
-    v = v_resized;
-  }
+  impl_resize(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop);
 }
 
 /** \brief  Resize a view with copying old data to new data at the corresponding
  * indices. */
-template <class T, class... P>
+template <class... I, class T, class... P>
 inline std::enable_if_t<
     std::is_same<typename Kokkos::View<T, P...>::array_layout,
                  Kokkos::LayoutLeft>::value ||
@@ -2986,15 +2984,16 @@ inline std::enable_if_t<
     std::is_same<typename Kokkos::View<T, P...>::array_layout,
                  Kokkos::LayoutStride>::value ||
     is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value>
-resize(Kokkos::View<T, P...>& v,
-       const typename Kokkos::View<T, P...>::array_layout& layout) {
+impl_resize(Kokkos::View<T, P...>& v,
+            const typename Kokkos::View<T, P...>::array_layout& layout,
+            const I&... arg_prop) {
   using view_type = Kokkos::View<T, P...>;
 
   static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
                 "Can only resize managed views");
 
   if (v.layout() != layout) {
-    view_type v_resized(v.label(), layout);
+    view_type v_resized(view_alloc(v.label(), arg_prop...), layout);
 
     Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v);
     Kokkos::fence("Kokkos::resize(View)");
@@ -3006,7 +3005,7 @@ resize(Kokkos::View<T, P...>& v,
 // FIXME User-provided (custom) layouts are not required to have a comparison
 // operator. Hence, there is no way to check if the requested layout is actually
 // the same as the existing one.
-template <class T, class... P>
+template <class... I, class T, class... P>
 inline std::enable_if_t<
     !(std::is_same<typename Kokkos::View<T, P...>::array_layout,
                    Kokkos::LayoutLeft>::value ||
@@ -3015,21 +3014,62 @@ inline std::enable_if_t<
       std::is_same<typename Kokkos::View<T, P...>::array_layout,
                    Kokkos::LayoutStride>::value ||
       is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value)>
-resize(Kokkos::View<T, P...>& v,
-       const typename Kokkos::View<T, P...>::array_layout& layout) {
+impl_resize(Kokkos::View<T, P...>& v,
+            const typename Kokkos::View<T, P...>::array_layout& layout,
+            const I&... arg_prop) {
   using view_type = Kokkos::View<T, P...>;
 
   static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
                 "Can only resize managed views");
 
-  view_type v_resized(v.label(), layout);
+  view_type v_resized(view_alloc(v.label(), arg_prop...), layout);
 
   Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v);
 
   v = v_resized;
 }
 
+template <class I, class T, class... P>
+inline std::enable_if_t<Impl::is_view_ctor_property<I>::value> resize(
+    const I& arg_prop, Kokkos::View<T, P...>& v,
+    const typename Kokkos::View<T, P...>::array_layout& layout) {
+  impl_resize(v, layout, arg_prop);
+}
+
+template <class T, class... P>
+inline void resize(Kokkos::View<T, P...>& v,
+                   const typename Kokkos::View<T, P...>::array_layout& layout) {
+  impl_resize(v, layout);
+}
+
 /** \brief  Resize a view with discarding old data. */
+template <class... I, class T, class... P>
+inline typename std::enable_if<
+    std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                 Kokkos::LayoutLeft>::value ||
+    std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                 Kokkos::LayoutRight>::value>::type
+impl_realloc(Kokkos::View<T, P...>& v, const size_t n0, const size_t n1,
+             const size_t n2, const size_t n3, const size_t n4, const size_t n5,
+             const size_t n6, const size_t n7, const I&... arg_prop) {
+  using view_type = Kokkos::View<T, P...>;
+
+  static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
+                "Can only realloc managed views");
+
+  const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7};
+  const bool sizeMismatch = Impl::size_mismatch(v, v.rank_dynamic, new_extents);
+
+  if (sizeMismatch) {
+    const std::string label = v.label();
+
+    v = view_type();  // Deallocate first, if the only view to allocation
+    v = view_type(view_alloc(label, arg_prop...), n0, n1, n2, n3, n4, n5, n6,
+                  n7);
+  } else if (!Kokkos::Impl::has_type<Impl::WithoutInitializing_t, I...>::value)
+    Kokkos::deep_copy(v, typename view_type::value_type{});
+}
+
 template <class T, class... P>
 inline typename std::enable_if<
     std::is_same<typename Kokkos::View<T, P...>::array_layout,
@@ -3045,28 +3085,68 @@ realloc(Kokkos::View<T, P...>& v,
         const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
         const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
         const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+  impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7);
+}
+
+template <class I, class T, class... P>
+inline typename std::enable_if<
+    Impl::is_view_ctor_property<I>::value &&
+    (std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                  Kokkos::LayoutLeft>::value ||
+     std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                  Kokkos::LayoutRight>::value)>::type
+realloc(const I& arg_prop, Kokkos::View<T, P...>& v,
+        const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+        const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+        const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+        const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+        const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+        const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+        const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
+        const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
+  impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop);
+}
+
+template <class... I, class T, class... P>
+inline std::enable_if_t<
+    std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                 Kokkos::LayoutLeft>::value ||
+    std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                 Kokkos::LayoutRight>::value ||
+    std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                 Kokkos::LayoutStride>::value ||
+    is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value>
+impl_realloc(Kokkos::View<T, P...>& v,
+             const typename Kokkos::View<T, P...>::array_layout& layout,
+             const I&... arg_prop) {
   using view_type = Kokkos::View<T, P...>;
 
   static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
                 "Can only realloc managed views");
 
-  const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7};
-  const bool sizeMismatch = Impl::size_mismatch(v, v.rank_dynamic, new_extents);
-
-  if (sizeMismatch) {
+  if (v.layout() != layout) {
     const std::string label = v.label();
 
     v = view_type();  // Deallocate first, if the only view to allocation
-    v = view_type(label, n0, n1, n2, n3, n4, n5, n6, n7);
-  } else
-    Kokkos::deep_copy(v, typename view_type::value_type{});
+    v = view_type(view_alloc(label, arg_prop...), layout);
+  }
 }
 
-/** \brief  Resize a view with discarding old data. */
-template <class T, class... P>
-inline void realloc(
-    Kokkos::View<T, P...>& v,
-    const typename Kokkos::View<T, P...>::array_layout& layout) {
+// FIXME User-provided (custom) layouts are not required to have a comparison
+// operator. Hence, there is no way to check if the requested layout is actually
+// the same as the existing one.
+template <class... I, class T, class... P>
+inline std::enable_if_t<
+    !(std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                   Kokkos::LayoutLeft>::value ||
+      std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                   Kokkos::LayoutRight>::value ||
+      std::is_same<typename Kokkos::View<T, P...>::array_layout,
+                   Kokkos::LayoutStride>::value ||
+      is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value)>
+impl_realloc(Kokkos::View<T, P...>& v,
+             const typename Kokkos::View<T, P...>::array_layout& layout,
+             const I&... arg_prop) {
   using view_type = Kokkos::View<T, P...>;
 
   static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
@@ -3075,8 +3155,23 @@ inline void realloc(
   const std::string label = v.label();
 
   v = view_type();  // Deallocate first, if the only view to allocation
-  v = view_type(label, layout);
+  v = view_type(view_alloc(label, arg_prop...), layout);
+}
+
+template <class I, class T, class... P>
+inline std::enable_if_t<Impl::is_view_ctor_property<I>::value> realloc(
+    const I& arg_prop, Kokkos::View<T, P...>& v,
+    const typename Kokkos::View<T, P...>::array_layout& layout) {
+  impl_realloc(v, layout, arg_prop);
 }
+
+template <class T, class... P>
+inline void realloc(
+    Kokkos::View<T, P...>& v,
+    const typename Kokkos::View<T, P...>::array_layout& layout) {
+  impl_realloc(v, layout);
+}
+
 } /* namespace Kokkos */
 
 //----------------------------------------------------------------------------
@@ -3130,20 +3225,18 @@ struct MirrorType {
   using view_type = Kokkos::View<data_type, array_layout, Space>;
 };
 
-}  // namespace Impl
-
-template <class T, class... P>
-inline typename Kokkos::View<T, P...>::HostMirror create_mirror(
-    const Kokkos::View<T, P...>& src,
-    typename std::enable_if<
-        std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
-        !std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
-                      Kokkos::LayoutStride>::value>::type* = nullptr) {
+template <class T, class... P, class... I>
+inline typename std::enable_if<
+    !std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
+                  Kokkos::LayoutStride>::value,
+    typename Kokkos::View<T, P...>::HostMirror>::type
+create_mirror(const Kokkos::View<T, P...>& src, const I&... arg_prop) {
   using src_type = View<T, P...>;
   using dst_type = typename src_type::HostMirror;
 
   return dst_type(
-      std::string(src.label()).append("_mirror"),
+      Kokkos::view_alloc(std::string(src.label()).append("_mirror"),
+                         arg_prop...),
       src.rank_dynamic > 0 ? src.extent(0) : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
       src.rank_dynamic > 1 ? src.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
       src.rank_dynamic > 2 ? src.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
@@ -3154,13 +3247,12 @@ inline typename Kokkos::View<T, P...>::HostMirror create_mirror(
       src.rank_dynamic > 7 ? src.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG);
 }
 
-template <class T, class... P>
-inline typename Kokkos::View<T, P...>::HostMirror create_mirror(
-    const Kokkos::View<T, P...>& src,
-    typename std::enable_if<
-        std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
-        std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
-                     Kokkos::LayoutStride>::value>::type* = nullptr) {
+template <class T, class... P, class... I>
+inline typename std::enable_if<
+    std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
+                 Kokkos::LayoutStride>::value,
+    typename Kokkos::View<T, P...>::HostMirror>::type
+create_mirror(const Kokkos::View<T, P...>& src, const I&... arg_prop) {
   using src_type = View<T, P...>;
   using dst_type = typename src_type::HostMirror;
 
@@ -3184,65 +3276,131 @@ inline typename Kokkos::View<T, P...>::HostMirror create_mirror(
   layout.stride[6] = src.stride_6();
   layout.stride[7] = src.stride_7();
 
-  return dst_type(std::string(src.label()).append("_mirror"), layout);
+  return dst_type(Kokkos::view_alloc(std::string(src.label()).append("_mirror"),
+                                     arg_prop...),
+                  layout);
 }
 
 // Create a mirror in a new space (specialization for different space)
-template <class Space, class T, class... P>
+template <class Space, class T, class... P, class... I>
 typename Impl::MirrorType<Space, T, P...>::view_type create_mirror(
-    const Space&, const Kokkos::View<T, P...>& src,
-    typename std::enable_if<std::is_same<
-        typename ViewTraits<T, P...>::specialize, void>::value>::type* =
-        nullptr) {
-  return typename Impl::MirrorType<Space, T, P...>::view_type(src.label(),
-                                                              src.layout());
+    const Space&, const Kokkos::View<T, P...>& src, const I&... arg_prop) {
+  return typename Impl::MirrorType<Space, T, P...>::view_type(
+      Kokkos::view_alloc(src.label(), arg_prop...), src.layout());
 }
+}  // namespace Impl
 
 template <class T, class... P>
-inline typename Kokkos::View<T, P...>::HostMirror create_mirror_view(
-    const Kokkos::View<T, P...>& src,
-    typename std::enable_if<
-        (std::is_same<
-             typename Kokkos::View<T, P...>::memory_space,
-             typename Kokkos::View<T, P...>::HostMirror::memory_space>::value &&
-         std::is_same<typename Kokkos::View<T, P...>::data_type,
-                      typename Kokkos::View<T, P...>::HostMirror::data_type>::
-             value)>::type* = nullptr) {
-  return src;
+std::enable_if_t<
+    std::is_same<typename ViewTraits<T, P...>::specialize, void>::value,
+    typename Kokkos::View<T, P...>::HostMirror>
+create_mirror(Kokkos::View<T, P...> const& v) {
+  return Impl::create_mirror(v);
 }
 
 template <class T, class... P>
-inline typename Kokkos::View<T, P...>::HostMirror create_mirror_view(
-    const Kokkos::View<T, P...>& src,
-    typename std::enable_if<!(
-        std::is_same<
-            typename Kokkos::View<T, P...>::memory_space,
-            typename Kokkos::View<T, P...>::HostMirror::memory_space>::value &&
-        std::is_same<typename Kokkos::View<T, P...>::data_type,
-                     typename Kokkos::View<T, P...>::HostMirror::data_type>::
-            value)>::type* = nullptr) {
-  return Kokkos::create_mirror(src);
+std::enable_if_t<
+    std::is_same<typename ViewTraits<T, P...>::specialize, void>::value,
+    typename Kokkos::View<T, P...>::HostMirror>
+create_mirror(Kokkos::Impl::WithoutInitializing_t wi,
+              Kokkos::View<T, P...> const& v) {
+  return Impl::create_mirror(v, wi);
+}
+
+template <class Space, class T, class... P,
+          typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
+std::enable_if_t<
+    std::is_same<typename ViewTraits<T, P...>::specialize, void>::value,
+    typename Impl::MirrorType<Space, T, P...>::view_type>
+create_mirror(Space const& space, Kokkos::View<T, P...> const& v) {
+  return Impl::create_mirror(space, v);
+}
+
+template <class Space, class T, class... P,
+          typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
+std::enable_if_t<
+    std::is_same<typename ViewTraits<T, P...>::specialize, void>::value,
+    typename Impl::MirrorType<Space, T, P...>::view_type>
+create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Space const& space,
+              Kokkos::View<T, P...> const& v) {
+  return Impl::create_mirror(space, v, wi);
+}
+
+namespace Impl {
+
+template <class T, class... P, class... I>
+inline typename std::enable_if<
+    (std::is_same<
+         typename Kokkos::View<T, P...>::memory_space,
+         typename Kokkos::View<T, P...>::HostMirror::memory_space>::value &&
+     std::is_same<
+         typename Kokkos::View<T, P...>::data_type,
+         typename Kokkos::View<T, P...>::HostMirror::data_type>::value),
+    typename Kokkos::View<T, P...>::HostMirror>::type
+create_mirror_view(const Kokkos::View<T, P...>& src, const I&...) {
+  return src;
+}
+
+template <class T, class... P, class... I>
+inline typename std::enable_if<
+    !(std::is_same<
+          typename Kokkos::View<T, P...>::memory_space,
+          typename Kokkos::View<T, P...>::HostMirror::memory_space>::value &&
+      std::is_same<
+          typename Kokkos::View<T, P...>::data_type,
+          typename Kokkos::View<T, P...>::HostMirror::data_type>::value),
+    typename Kokkos::View<T, P...>::HostMirror>::type
+create_mirror_view(const Kokkos::View<T, P...>& src, const I&... arg_prop) {
+  return Kokkos::create_mirror(arg_prop..., src);
 }
 
 // Create a mirror view in a new space (specialization for same space)
-template <class Space, class T, class... P>
-typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
-    const Space&, const Kokkos::View<T, P...>& src,
-    typename std::enable_if<
-        Impl::MirrorViewType<Space, T, P...>::is_same_memspace>::type* =
-        nullptr) {
+template <class Space, class T, class... P, class... I>
+typename std::enable_if<
+    Impl::MirrorViewType<Space, T, P...>::is_same_memspace,
+    typename Impl::MirrorViewType<Space, T, P...>::view_type>::type
+create_mirror_view(const Space&, const Kokkos::View<T, P...>& src,
+                   const I&...) {
   return src;
 }
 
 // Create a mirror view in a new space (specialization for different space)
-template <class Space, class T, class... P>
+template <class Space, class T, class... P, class... I>
+typename std::enable_if<
+    !Impl::MirrorViewType<Space, T, P...>::is_same_memspace,
+    typename Impl::MirrorViewType<Space, T, P...>::view_type>::type
+create_mirror_view(const Space&, const Kokkos::View<T, P...>& src,
+                   const I&... arg_prop) {
+  return typename Impl::MirrorViewType<Space, T, P...>::view_type(
+      Kokkos::view_alloc(src.label(), arg_prop...), src.layout());
+}
+}  // namespace Impl
+
+template <class T, class... P>
+typename Kokkos::View<T, P...>::HostMirror create_mirror_view(
+    Kokkos::View<T, P...> const& v) {
+  return Impl::create_mirror_view(v);
+}
+
+template <class T, class... P>
+typename Kokkos::View<T, P...>::HostMirror create_mirror_view(
+    Kokkos::Impl::WithoutInitializing_t wi, Kokkos::View<T, P...> const& v) {
+  return Impl::create_mirror_view(v, wi);
+}
+
+template <class Space, class T, class... P,
+          typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
 typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
-    const Space&, const Kokkos::View<T, P...>& src,
-    typename std::enable_if<
-        !Impl::MirrorViewType<Space, T, P...>::is_same_memspace>::type* =
-        nullptr) {
-  return typename Impl::MirrorViewType<Space, T, P...>::view_type(src.label(),
-                                                                  src.layout());
+    Space const& space, Kokkos::View<T, P...> const& v) {
+  return Impl::create_mirror_view(space, v);
+}
+
+template <class Space, class T, class... P,
+          typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
+typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
+    Kokkos::Impl::WithoutInitializing_t wi, Space const& space,
+    Kokkos::View<T, P...> const& v) {
+  return Impl::create_mirror_view(space, v, wi);
 }
 
 // Create a mirror view and deep_copy in a new space (specialization for same
@@ -3253,6 +3411,7 @@ create_mirror_view_and_copy(
     const Space&, const Kokkos::View<T, P...>& src,
     std::string const& name = "",
     typename std::enable_if<
+        std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
         Impl::MirrorViewType<Space, T, P...>::is_same_memspace>::type* =
         nullptr) {
   (void)name;
@@ -3269,6 +3428,7 @@ create_mirror_view_and_copy(
     const Space&, const Kokkos::View<T, P...>& src,
     std::string const& name = "",
     typename std::enable_if<
+        std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
         !Impl::MirrorViewType<Space, T, P...>::is_same_memspace>::type* =
         nullptr) {
   using Mirror      = typename Impl::MirrorViewType<Space, T, P...>::view_type;
@@ -3279,9 +3439,12 @@ create_mirror_view_and_copy(
   return mirror;
 }
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
 // Create a mirror view in a new space without initializing (specialization for
 // same space)
 template <class Space, class T, class... P>
+KOKKOS_DEPRECATED_WITH_COMMENT(
+    "Use the version taking WithoutInitializing as first argument")
 typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
     const Space&, const Kokkos::View<T, P...>& src,
     Kokkos::Impl::WithoutInitializing_t,
@@ -3294,6 +3457,8 @@ typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
 // Create a mirror view in a new space without initializing (specialization for
 // different space)
 template <class Space, class T, class... P>
+KOKKOS_DEPRECATED_WITH_COMMENT(
+    "Use the version taking WithoutInitializing as first argument")
 typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
     const Space&, const Kokkos::View<T, P...>& src,
     Kokkos::Impl::WithoutInitializing_t,
@@ -3303,6 +3468,7 @@ typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
   using Mirror = typename Impl::MirrorViewType<Space, T, P...>::view_type;
   return Mirror(view_alloc(WithoutInitializing, src.label()), src.layout());
 }
+#endif
 
 } /* namespace Kokkos */
 
diff --git a/packages/kokkos/core/src/Kokkos_Core.hpp b/packages/kokkos/core/src/Kokkos_Core.hpp
index 60e748589df593dbb9e549f6433daea77b5bc6b0..232873d3f780777c1e25f2ba1430d8f4f3d7661d 100644
--- a/packages/kokkos/core/src/Kokkos_Core.hpp
+++ b/packages/kokkos/core/src/Kokkos_Core.hpp
@@ -50,14 +50,14 @@
 
 #include <Kokkos_Core_fwd.hpp>
 
-// Fundamental type description for half precision
-// Should not rely on other backend infrastructure
-#include <Kokkos_Half.hpp>
 #include <KokkosCore_Config_DeclareBackend.hpp>
 
+#include <Kokkos_Half.hpp>
 #include <Kokkos_AnonymousSpace.hpp>
 #include <Kokkos_LogicalSpaces.hpp>
 #include <Kokkos_Pair.hpp>
+#include <Kokkos_MinMaxClamp.hpp>
+#include <Kokkos_MathematicalConstants.hpp>
 #include <Kokkos_MathematicalFunctions.hpp>
 #include <Kokkos_MathematicalSpecialFunctions.hpp>
 #include <Kokkos_MemoryPool.hpp>
@@ -102,6 +102,22 @@ struct InitArguments {
         skip_device{9999},
         disable_warnings{dw},
         tune_internals{ti} {}
+  Tools::InitArguments impl_get_tools_init_arguments() const {
+    Tools::InitArguments init_tools;
+    init_tools.tune_internals =
+        tune_internals ? Tools::InitArguments::PossiblyUnsetOption::on
+                       : Tools::InitArguments::PossiblyUnsetOption::unset;
+    init_tools.help = tool_help
+                          ? Tools::InitArguments::PossiblyUnsetOption::on
+                          : Tools::InitArguments::PossiblyUnsetOption::unset;
+    init_tools.lib = tool_lib.empty()
+                         ? Kokkos::Tools::InitArguments::unset_string_option
+                         : tool_lib;
+    init_tools.args = tool_args.empty()
+                          ? Kokkos::Tools::InitArguments::unset_string_option
+                          : tool_args;
+    return init_tools;
+  }
 };
 
 namespace Impl {
diff --git a/packages/kokkos/core/src/Kokkos_Core_fwd.hpp b/packages/kokkos/core/src/Kokkos_Core_fwd.hpp
index a610ee76dffb6fd23fadae40437b893eaab5cc87..d04e6a75c7240e5d8351ca7aa74d544fa10a5422 100644
--- a/packages/kokkos/core/src/Kokkos_Core_fwd.hpp
+++ b/packages/kokkos/core/src/Kokkos_Core_fwd.hpp
@@ -83,7 +83,7 @@ struct InvalidType {};
 }  // namespace Kokkos
 
 //----------------------------------------------------------------------------
-// Forward declarations for class inter-relationships
+// Forward declarations for class interrelationships
 
 namespace Kokkos {
 
@@ -183,19 +183,69 @@ using DefaultHostExecutionSpace KOKKOS_IMPL_DEFAULT_HOST_EXEC_SPACE_ANNOTATION =
 // a given memory space.
 
 namespace Kokkos {
+
+template <class AccessSpace, class MemorySpace>
+struct SpaceAccessibility;
+
 namespace Impl {
 
+// primary template: memory space is accessible, do nothing.
+template <class MemorySpace, class AccessSpace,
+          bool = SpaceAccessibility<AccessSpace, MemorySpace>::accessible>
+struct RuntimeCheckMemoryAccessViolation {
+  KOKKOS_FUNCTION RuntimeCheckMemoryAccessViolation(char const *const) {}
+};
+
+// explicit specialization: memory access violation will occur, call abort with
+// the specified error message.
+template <class MemorySpace, class AccessSpace>
+struct RuntimeCheckMemoryAccessViolation<AccessSpace, MemorySpace, false> {
+  KOKKOS_FUNCTION RuntimeCheckMemoryAccessViolation(char const *const msg) {
+    Kokkos::abort(msg);
+  }
+};
+
+// calls abort with default error message at runtime if memory access violation
+// will occur
+template <class MemorySpace>
+KOKKOS_FUNCTION void runtime_check_memory_access_violation() {
+  KOKKOS_IF_ON_HOST((
+      RuntimeCheckMemoryAccessViolation<MemorySpace, DefaultHostExecutionSpace>(
+          "ERROR: attempt to access inaccessible memory space");))
+  KOKKOS_IF_ON_DEVICE(
+      (RuntimeCheckMemoryAccessViolation<MemorySpace, DefaultExecutionSpace>(
+           "ERROR: attempt to access inaccessible memory space");))
+}
+
+// calls abort with specified error message at runtime if memory access
+// violation will occur
+template <class MemorySpace>
+KOKKOS_FUNCTION void runtime_check_memory_access_violation(
+    char const *const msg) {
+  KOKKOS_IF_ON_HOST((
+      (void)RuntimeCheckMemoryAccessViolation<MemorySpace,
+                                              DefaultHostExecutionSpace>(msg);))
+  KOKKOS_IF_ON_DEVICE((
+      (void)
+          RuntimeCheckMemoryAccessViolation<MemorySpace, DefaultExecutionSpace>(
+              msg);))
+}
+
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
+
 #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) && \
     defined(KOKKOS_ENABLE_CUDA)
-using ActiveExecutionMemorySpace = Kokkos::CudaSpace;
+using ActiveExecutionMemorySpace KOKKOS_DEPRECATED = Kokkos::CudaSpace;
 #elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL)
-using ActiveExecutionMemorySpace = Kokkos::Experimental::SYCLDeviceUSMSpace;
+using ActiveExecutionMemorySpace KOKKOS_DEPRECATED =
+    Kokkos::Experimental::SYCLDeviceUSMSpace;
 #elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU)
-using ActiveExecutionMemorySpace = Kokkos::Experimental::HIPSpace;
+using ActiveExecutionMemorySpace KOKKOS_DEPRECATED =
+    Kokkos::Experimental::HIPSpace;
 #elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-using ActiveExecutionMemorySpace = Kokkos::HostSpace;
+using ActiveExecutionMemorySpace KOKKOS_DEPRECATED = Kokkos::HostSpace;
 #else
-using ActiveExecutionMemorySpace = void;
+using ActiveExecutionMemorySpace KOKKOS_DEPRECATED = void;
 #endif
 
 template <typename DstMemorySpace, typename SrcMemorySpace>
@@ -205,16 +255,17 @@ template <typename DstMemorySpace, typename SrcMemorySpace,
           bool = Kokkos::Impl::MemorySpaceAccess<DstMemorySpace,
                                                  SrcMemorySpace>::accessible>
 struct verify_space {
-  KOKKOS_FUNCTION static void check() {}
+  KOKKOS_DEPRECATED KOKKOS_FUNCTION static void check() {}
 };
 
 template <typename DstMemorySpace, typename SrcMemorySpace>
 struct verify_space<DstMemorySpace, SrcMemorySpace, false> {
-  KOKKOS_FUNCTION static void check() {
+  KOKKOS_DEPRECATED KOKKOS_FUNCTION static void check() {
     Kokkos::abort(
         "Kokkos::View ERROR: attempt to access inaccessible memory space");
   };
 };
+#endif
 
 // Base class for exec space initializer factories
 class ExecSpaceInitializerBase;
@@ -228,6 +279,7 @@ class LogicalMemorySpace;
 
 }  // namespace Kokkos
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
 #define KOKKOS_RESTRICT_EXECUTION_TO_DATA(DATA_SPACE, DATA_PTR)        \
   Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \
                              DATA_SPACE>::check();
@@ -235,6 +287,7 @@ class LogicalMemorySpace;
 #define KOKKOS_RESTRICT_EXECUTION_TO_(DATA_SPACE)                      \
   Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \
                              DATA_SPACE>::check();
+#endif
 
 //----------------------------------------------------------------------------
 
@@ -337,6 +390,32 @@ struct LAnd;
 template <class ScalarType, class Space = HostSpace>
 struct LOr;
 
+template <class Scalar, class Index, class Space = HostSpace>
+struct MaxFirstLoc;
+template <class Scalar, class Index, class ComparatorType,
+          class Space = HostSpace>
+struct MaxFirstLocCustomComparator;
+
+template <class Scalar, class Index, class Space = HostSpace>
+struct MinFirstLoc;
+template <class Scalar, class Index, class ComparatorType,
+          class Space = HostSpace>
+struct MinFirstLocCustomComparator;
+
+template <class Scalar, class Index, class Space = HostSpace>
+struct MinMaxFirstLastLoc;
+template <class Scalar, class Index, class ComparatorType,
+          class Space = HostSpace>
+struct MinMaxFirstLastLocCustomComparator;
+
+template <class Index, class Space = HostSpace>
+struct FirstLoc;
+template <class Index, class Space = HostSpace>
+struct LastLoc;
+template <class Index, class Space = HostSpace>
+struct StdIsPartitioned;
+template <class Index, class Space = HostSpace>
+struct StdPartitionPoint;
 }  // namespace Kokkos
 
 #endif /* #ifndef KOKKOS_CORE_FWD_HPP */
diff --git a/packages/kokkos/core/src/Kokkos_Crs.hpp b/packages/kokkos/core/src/Kokkos_Crs.hpp
index 897402d37643bf8876360b3e828c685c6251fe19..0657146bbd058aa1f80a49f7e33b1b78957f0d33 100644
--- a/packages/kokkos/core/src/Kokkos_Crs.hpp
+++ b/packages/kokkos/core/src/Kokkos_Crs.hpp
@@ -191,7 +191,8 @@ class CrsRowMapFromCounts {
   using execution_space = typename InCounts::execution_space;
   using value_type      = typename OutRowMap::value_type;
   using index_type      = typename InCounts::size_type;
-  using last_value_type = Kokkos::View<value_type, execution_space>;
+  using last_value_type =
+      Kokkos::View<value_type, typename InCounts::device_type>;
 
  private:
   InCounts m_in;
@@ -224,8 +225,8 @@ class CrsRowMapFromCounts {
     using closure_type = Kokkos::Impl::ParallelScan<self_type, policy_type>;
     closure_type closure(*this, policy_type(0, m_in.size() + 1));
     closure.execute();
-    auto last_value = Kokkos::create_mirror_view(m_last_value);
-    Kokkos::deep_copy(last_value, m_last_value);
+    auto last_value =
+        Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, m_last_value);
     return last_value();
   }
 };
diff --git a/packages/kokkos/core/src/Kokkos_Cuda.hpp b/packages/kokkos/core/src/Kokkos_Cuda.hpp
index c5a6b0f7d7d579e2ccad3c05f97d042d4ed63471..6305a1fa5d2f21ea9ae9e996a7415a7ee6b6a239 100644
--- a/packages/kokkos/core/src/Kokkos_Cuda.hpp
+++ b/packages/kokkos/core/src/Kokkos_Cuda.hpp
@@ -109,7 +109,7 @@ struct CudaDispatchProperties {
 ///
 /// An "execution space" represents a parallel execution model.  It tells Kokkos
 /// how to parallelize the execution of kernels in a parallel_for or
-/// parallel_reduce.  For example, the Threads execution space uses Pthreads or
+/// parallel_reduce.  For example, the Threads execution space uses
 /// C++11 threads on a CPU, the OpenMP execution space uses the OpenMP language
 /// extensions, and the Serial execution space executes "parallel" kernels
 /// sequentially.  The Cuda execution space uses NVIDIA's CUDA programming
diff --git a/packages/kokkos/core/src/Kokkos_GraphNode.hpp b/packages/kokkos/core/src/Kokkos_GraphNode.hpp
index 56e7d706f6a641467864ec9459660be21493de37..e34d1353e7cff7046301d238719800f63bc13230 100644
--- a/packages/kokkos/core/src/Kokkos_GraphNode.hpp
+++ b/packages/kokkos/core/src/Kokkos_GraphNode.hpp
@@ -397,9 +397,6 @@ class GraphNodeRef {
     using return_value_adapter =
         Kokkos::Impl::ParallelReduceReturnValue<void, return_type,
                                                 functor_type>;
-    using functor_adaptor = Kokkos::Impl::ParallelReduceFunctorType<
-        functor_type, Policy, typename return_value_adapter::value_type,
-        execution_space>;
     // End of Kokkos reducer disaster
     //----------------------------------------
 
@@ -408,8 +405,8 @@ class GraphNodeRef {
 
     using next_policy_t = decltype(policy);
     using next_kernel_t = Kokkos::Impl::GraphNodeKernelImpl<
-        ExecutionSpace, next_policy_t, typename functor_adaptor::functor_type,
-        Kokkos::ParallelReduceTag, typename return_value_adapter::reducer_type>;
+        ExecutionSpace, next_policy_t, functor_type, Kokkos::ParallelReduceTag,
+        typename return_value_adapter::reducer_type>;
 
     return this->_then_kernel(next_kernel_t{
         std::move(arg_name), graph_impl_ptr->get_execution_space(),
diff --git a/packages/kokkos/core/src/Kokkos_HBWSpace.hpp b/packages/kokkos/core/src/Kokkos_HBWSpace.hpp
index f6cdb2ec46cd4b5329a987e947ec356ff4efb0e9..47810f17aa77d87b34c5c30481334c9993bb3403 100644
--- a/packages/kokkos/core/src/Kokkos_HBWSpace.hpp
+++ b/packages/kokkos/core/src/Kokkos_HBWSpace.hpp
@@ -216,11 +216,10 @@ class SharedAllocationRecord<Kokkos::Experimental::HBWSpace, void>
   KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
       const Kokkos::Experimental::HBWSpace& arg_space,
       const std::string& arg_label, const size_t arg_alloc_size) {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size);
-#else
-    return (SharedAllocationRecord*)0;
-#endif
+    KOKKOS_IF_ON_HOST((return new SharedAllocationRecord(arg_space, arg_label,
+                                                         arg_alloc_size);))
+    KOKKOS_IF_ON_DEVICE(((void)arg_space; (void)arg_label; (void)arg_alloc_size;
+                         return nullptr;))
   }
 
   /**\brief  Allocate tracked memory in the space */
@@ -281,41 +280,86 @@ namespace Kokkos {
 
 namespace Impl {
 
+template <>
+struct DeepCopy<Kokkos::Experimental::HBWSpace, Kokkos::Experimental::HBWSpace,
+                DefaultHostExecutionSpace> {
+  DeepCopy(void* dst, const void* src, size_t n) {
+    hostspace_parallel_deepcopy(dst, src, n);
+  }
+
+  DeepCopy(const DefaultHostExecutionSpace& exec, void* dst, const void* src,
+           size_t n) {
+    hostspace_parallel_deepcopy(exec, dst, src, n);
+  }
+};
+
 template <class ExecutionSpace>
 struct DeepCopy<Kokkos::Experimental::HBWSpace, Kokkos::Experimental::HBWSpace,
                 ExecutionSpace> {
-  DeepCopy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); }
+  DeepCopy(void* dst, const void* src, size_t n) {
+    hostspace_parallel_deepcopy(dst, src, n);
+  }
 
   DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, size_t n) {
     exec.fence(
         "Kokkos::Impl::DeepCopy<Kokkos::Experimental::HBWSpace, "
-        "Kokkos::Experimental::HBWSpace,ExecutionSpace::DeepCopy: fence before "
-        "copy");
-    memcpy(dst, src, n);
+        "Kokkos::Experimental::HBWSpace,ExecutionSpace::DeepCopy: fence "
+        "before copy");
+    hostspace_parallel_deepcopy_async(dst, src, n);
+  }
+};
+
+template <>
+struct DeepCopy<HostSpace, Kokkos::Experimental::HBWSpace,
+                DefaultHostExecutionSpace> {
+  DeepCopy(void* dst, const void* src, size_t n) {
+    hostspace_parallel_deepcopy(dst, src, n);
+  }
+
+  DeepCopy(const DefaultHostExecutionSpace& exec, void* dst, const void* src,
+           size_t n) {
+    hostspace_parallel_deepcopy(exec, dst, src, n);
   }
 };
 
 template <class ExecutionSpace>
 struct DeepCopy<HostSpace, Kokkos::Experimental::HBWSpace, ExecutionSpace> {
-  DeepCopy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); }
+  DeepCopy(void* dst, const void* src, size_t n) {
+    hostspace_parallel_deepcopy(dst, src, n);
+  }
 
   DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, size_t n) {
     exec.fence(
         "Kokkos::Impl::DeepCopy<HostSpace, Kokkos::Experimental::HBWSpace, "
         "ExecutionSpace>::DeepCopy: fence before copy");
-    memcpy(dst, src, n);
+    hostspace_parallel_deepcopy_async(copy_space, dst, src, n);
+  }
+};
+
+template <>
+struct DeepCopy<Kokkos::Experimental::HBWSpace, HostSpace,
+                DefaultHostExecutionSpace> {
+  DeepCopy(void* dst, const void* src, size_t n) {
+    hostspace_parallel_deepcopy(dst, src, n);
+  }
+
+  DeepCopy(const DefaultHostExecutionSpace& exec, void* dst, const void* src,
+           size_t n) {
+    hostspace_parallel_deepcopy(exec, dst, src, n);
   }
 };
 
 template <class ExecutionSpace>
 struct DeepCopy<Kokkos::Experimental::HBWSpace, HostSpace, ExecutionSpace> {
-  DeepCopy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); }
+  DeepCopy(void* dst, const void* src, size_t n) {
+    hostspace_parallel_deepcopy(dst, src, n);
+  }
 
   DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, size_t n) {
     exec.fence(
         "Kokkos::Impl::DeepCopy<Kokkos::Experimental::HBWSpace, HostSpace, "
         "ExecutionSpace>::DeepCopy: fence before copy");
-    memcpy(dst, src, n);
+    hostspace_parallel_deepcopy_async(dst, src, n);
   }
 };
 
diff --git a/packages/kokkos/core/src/Kokkos_HIP.hpp b/packages/kokkos/core/src/Kokkos_HIP.hpp
index 09df4f2fed4d8c5499ec339391de0730474b1f80..7241bb6c359045468a4db1aef582dbb5aeb33d1c 100644
--- a/packages/kokkos/core/src/Kokkos_HIP.hpp
+++ b/packages/kokkos/core/src/Kokkos_HIP.hpp
@@ -55,6 +55,8 @@
 #include <Kokkos_HIP_Space.hpp>
 #include <Kokkos_Parallel.hpp>
 
+#include <HIP/Kokkos_HIP_Half_Impl_Type.hpp>
+#include <HIP/Kokkos_HIP_Half_Conversion.hpp>
 #include <HIP/Kokkos_HIP_Instance.hpp>
 #include <HIP/Kokkos_HIP_MDRangePolicy.hpp>
 #include <HIP/Kokkos_HIP_Parallel_Range.hpp>
diff --git a/packages/kokkos/core/src/Kokkos_HIP_Space.hpp b/packages/kokkos/core/src/Kokkos_HIP_Space.hpp
index d20d533645b2f6bfc721820cb7a43adf4434f8e8..1371d21d388b681a1327dc6330121686c95d165d 100644
--- a/packages/kokkos/core/src/Kokkos_HIP_Space.hpp
+++ b/packages/kokkos/core/src/Kokkos_HIP_Space.hpp
@@ -506,7 +506,7 @@ class HIP {
   //@{
 
   KOKKOS_INLINE_FUNCTION static int in_parallel() {
-#if defined(__HIP_ARCH__)
+#if defined(__HIP_DEVICE_COMPILE__)
     return true;
 #else
     return false;
diff --git a/packages/kokkos/core/src/Kokkos_HPX.hpp b/packages/kokkos/core/src/Kokkos_HPX.hpp
index 236211864ee8f00f2bce884dc6a16666174bdadf..d2ae9c0ec2b3f755f21cc15b3614df8e1f23ed88 100644
--- a/packages/kokkos/core/src/Kokkos_HPX.hpp
+++ b/packages/kokkos/core/src/Kokkos_HPX.hpp
@@ -58,7 +58,6 @@
 #include <Kokkos_HBWSpace.hpp>
 #endif
 
-#include <HPX/Kokkos_HPX_ChunkedRoundRobinExecutor.hpp>
 #include <Kokkos_HostSpace.hpp>
 #include <Kokkos_Layout.hpp>
 #include <Kokkos_MemoryTraits.hpp>
@@ -74,18 +73,15 @@
 
 #include <KokkosExp_MDRangePolicy.hpp>
 
-#include <hpx/apply.hpp>
-#include <hpx/hpx_start.hpp>
-#include <hpx/include/util.hpp>
-#include <hpx/lcos/local/barrier.hpp>
-#include <hpx/lcos/local/latch.hpp>
-#include <hpx/parallel/algorithms/for_loop.hpp>
-#include <hpx/parallel/algorithms/reduce.hpp>
-#include <hpx/parallel/executors/static_chunk_size.hpp>
-#include <hpx/runtime.hpp>
-#include <hpx/runtime/threads/run_as_hpx_thread.hpp>
-#include <hpx/runtime/threads/threadmanager.hpp>
-#include <hpx/runtime/thread_pool_helpers.hpp>
+#include <hpx/local/algorithm.hpp>
+#include <hpx/local/barrier.hpp>
+#include <hpx/local/condition_variable.hpp>
+#include <hpx/local/execution.hpp>
+#include <hpx/local/future.hpp>
+#include <hpx/local/init.hpp>
+#include <hpx/local/mutex.hpp>
+#include <hpx/local/runtime.hpp>
+#include <hpx/local/thread.hpp>
 
 #include <Kokkos_UniqueToken.hpp>
 
@@ -102,10 +98,8 @@
 //
 // - 0: The HPX way. Unfortunately, this comes with unnecessary
 //      overheads at the moment, so there is
-// - 1: The manual way. This way is more verbose and does not take advantage of
-//      e.g. parallel::for_loop in HPX but it is significantly faster in many
-//      benchmarks.
-// - 2: Like 1, but spawn tasks using for_loop and a custom executor.
+// - 1: The manual way. This uses for_loop, but only spawns one task per worker
+//      thread. This is significantly faster in most cases.
 //
 // In the long run 0 should be the preferred implementation, but until HPX is
 // improved 1 will be the default.
@@ -113,7 +107,7 @@
 #define KOKKOS_HPX_IMPLEMENTATION 1
 #endif
 
-#if (KOKKOS_HPX_IMPLEMENTATION < 0) || (KOKKOS_HPX_IMPLEMENTATION > 2)
+#if (KOKKOS_HPX_IMPLEMENTATION < 0) || (KOKKOS_HPX_IMPLEMENTATION > 1)
 #error "You have chosen an invalid value for KOKKOS_HPX_IMPLEMENTATION"
 #endif
 
@@ -202,33 +196,42 @@ class thread_buffer {
 
 namespace Experimental {
 class HPX {
+ public:
+  static constexpr uint32_t impl_default_instance_id() { return 1; }
+
  private:
   static bool m_hpx_initialized;
-  static std::atomic<uint32_t> m_next_instance_id;
-  uint32_t m_instance_id = 0;
+  uint32_t m_instance_id = impl_default_instance_id();
 
 #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
+  static std::atomic<uint32_t> m_next_instance_id;
+
  public:
-  enum class instance_mode { global, independent };
-  instance_mode m_mode;
+  enum class instance_mode { default_, independent };
 
  private:
-  static std::atomic<uint32_t> m_active_parallel_region_count;
+  instance_mode m_mode;
+
+  static uint32_t m_active_parallel_region_count;
+  static hpx::spinlock m_active_parallel_region_count_mutex;
+  static hpx::condition_variable_any m_active_parallel_region_count_cond;
 
   struct instance_data {
     instance_data() = default;
     instance_data(hpx::shared_future<void> future) : m_future(future) {}
     Kokkos::Impl::thread_buffer m_buffer;
     hpx::shared_future<void> m_future = hpx::make_ready_future<void>();
+    hpx::spinlock m_future_mutex;
   };
 
   mutable std::shared_ptr<instance_data> m_independent_instance_data;
-  static instance_data m_global_instance_data;
+  static instance_data m_default_instance_data;
 
   std::reference_wrapper<Kokkos::Impl::thread_buffer> m_buffer;
   std::reference_wrapper<hpx::shared_future<void>> m_future;
+  std::reference_wrapper<hpx::spinlock> m_future_mutex;
 #else
-  static Kokkos::Impl::thread_buffer m_global_buffer;
+  static Kokkos::Impl::thread_buffer m_default_buffer;
 #endif
 
  public:
@@ -242,24 +245,29 @@ class HPX {
 #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
   HPX()
   noexcept
-      : m_instance_id(0),
-        m_mode(instance_mode::global),
-        m_buffer(m_global_instance_data.m_buffer),
-        m_future(m_global_instance_data.m_future) {}
+      : m_instance_id(impl_default_instance_id()),
+        m_mode(instance_mode::default_),
+        m_buffer(m_default_instance_data.m_buffer),
+        m_future(m_default_instance_data.m_future),
+        m_future_mutex(m_default_instance_data.m_future_mutex) {}
 
   HPX(instance_mode mode)
-      : m_instance_id(mode == instance_mode::independent ? m_next_instance_id++
-                                                         : 0),
+      : m_instance_id(mode == instance_mode::independent
+                          ? m_next_instance_id++
+                          : impl_default_instance_id()),
         m_mode(mode),
         m_independent_instance_data(mode == instance_mode::independent
                                         ? (new instance_data())
                                         : nullptr),
         m_buffer(mode == instance_mode::independent
                      ? m_independent_instance_data->m_buffer
-                     : m_global_instance_data.m_buffer),
+                     : m_default_instance_data.m_buffer),
         m_future(mode == instance_mode::independent
                      ? m_independent_instance_data->m_future
-                     : m_global_instance_data.m_future) {}
+                     : m_default_instance_data.m_future),
+        m_future_mutex(mode == instance_mode::independent
+                           ? m_independent_instance_data->m_future_mutex
+                           : m_default_instance_data.m_future_mutex) {}
 
   HPX(hpx::shared_future<void> future)
       : m_instance_id(m_next_instance_id++),
@@ -267,28 +275,13 @@ class HPX {
 
         m_independent_instance_data(new instance_data(future)),
         m_buffer(m_independent_instance_data->m_buffer),
-        m_future(m_independent_instance_data->m_future) {}
-
-  HPX(const HPX &other)
-      : m_instance_id(other.m_instance_id),
-        m_mode(other.m_mode),
-        m_independent_instance_data(other.m_independent_instance_data),
-        m_buffer(other.m_buffer),
-        m_future(other.m_future) {}
-
-  HPX &operator=(const HPX &other) {
-    m_instance_id =
-        other.m_mode == instance_mode::independent ? m_next_instance_id++ : 0;
-    m_mode                      = other.m_mode;
-    m_independent_instance_data = other.m_independent_instance_data;
-    m_buffer                    = m_mode == instance_mode::independent
-                   ? m_independent_instance_data->m_buffer
-                   : m_global_instance_data.m_buffer;
-    m_future = m_mode == instance_mode::independent
-                   ? m_independent_instance_data->m_future
-                   : m_global_instance_data.m_future;
-    return *this;
-  }
+        m_future(m_independent_instance_data->m_future),
+        m_future_mutex(m_independent_instance_data->m_future_mutex) {}
+
+  HPX(HPX &&other) = default;
+  HPX &operator=(HPX &&other) = default;
+  HPX(const HPX &other)       = default;
+  HPX &operator=(const HPX &other) = default;
 #else
   HPX() noexcept {}
 #endif
@@ -309,62 +302,67 @@ class HPX {
 
 #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
   static void impl_decrement_active_parallel_region_count() {
-    --m_active_parallel_region_count;
+    std::unique_lock<hpx::spinlock> l(m_active_parallel_region_count_mutex);
+    if (--m_active_parallel_region_count == 0) {
+      l.unlock();
+      m_active_parallel_region_count_cond.notify_all();
+    };
   }
 
   static void impl_increment_active_parallel_region_count() {
+    std::unique_lock<hpx::spinlock> l(m_active_parallel_region_count_mutex);
     ++m_active_parallel_region_count;
   }
-
-  void impl_fence_instance() const {
-    impl_fence_instance(
-        "Kokkos::Experimental::HPX::impl_fence_instance: Unnamed Instance "
-        "Fence");
-  }
-  void impl_fence_instance(const std::string &name) const {
-    Kokkos::Tools::Experimental::Impl::profile_fence_event(name, *this, [&]() {
-      if (hpx::threads::get_self_ptr() == nullptr) {
-        hpx::threads::run_as_hpx_thread([this]() { impl_get_future().wait(); });
-      } else {
-        impl_get_future().wait();
-      }
-    });
-  }
-
-  void impl_fence_all_instances() const {
-    impl_fence_instance(
-        "Kokkos::Experimental::HPX::impl_fence_all_instances: Unnamed Global "
-        "HPX Fence");
-  }
-  void impl_fence_all_instances(const std::string &namename) const {
-    Kokkos::Tools::Experimental::Impl::profile_fence_event(name, *this, [&]() {
-      hpx::util::yield_while(
-          []() { return m_active_parallel_region_count.load() != 0; });
-    });
-  }
 #endif
 
-  void fence() const {
+  void impl_fence_instance(const std::string &name =
+                               "Kokkos::Experimental::HPX::impl_fence_instance:"
+                               " Unnamed Instance Fence") const {
+    Kokkos::Tools::Experimental::Impl::profile_fence_event<
+        Kokkos::Experimental::HPX>(
+        name,
+        Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{
+            impl_instance_id()},
+        [&]() {
 #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
-    if (m_mode == instance_mode::global) {
-      impl_fence_all_instances(
-          "Kokkos::Experimental::HPX::fence: Unnamed Global HPX Fence");
-    } else {
-      impl_fence_instance(
-          "Kokkos::Experimental::HPX::fence: Unnamed HPX Instance Fence");
-    }
+          impl_get_future().wait();
+          // Reset the future to free variables that may have been captured in
+          // parallel regions.
+          impl_get_future() = hpx::make_ready_future<void>();
 #endif
+        });
   }
-  void fence(const std::string &name) const {
+
+  static void impl_fence_global(const std::string &name =
+                                    "Kokkos::Experimental::HPX::impl_fence_"
+                                    "global: Unnamed Global Fence") {
+    Kokkos::Tools::Experimental::Impl::profile_fence_event<
+        Kokkos::Experimental::HPX>(
+        name,
+        Kokkos::Tools::Experimental::SpecialSynchronizationCases::
+            GlobalDeviceSynchronization,
+        [&]() {
 #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
-    if (m_mode == instance_mode::global) {
-      impl_fence_all_instances(name);
-    } else {
-      impl_fence_instance(name);
-    }
+          std::unique_lock<hpx::spinlock> l(
+              m_active_parallel_region_count_mutex);
+          m_active_parallel_region_count_cond.wait(
+              l, [&]() { return m_active_parallel_region_count == 0; });
+          // Reset the future to free variables that may have been captured in
+          // parallel regions (however, we don't have access to futures from
+          // instances other than the default instances, they will only be
+          // released by impl_fence_instance).
+          HPX().impl_get_future() = hpx::make_ready_future<void>();
 #endif
+        });
+  }
+
+  static hpx::execution::parallel_executor impl_get_executor() {
+    return hpx::execution::parallel_executor();
   }
 
+  void fence() const { impl_fence_instance(); }
+  void fence(const std::string &name) const { impl_fence_instance(name); }
+
   static bool is_asynchronous(HPX const & = HPX()) noexcept {
 #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     return true;
@@ -380,15 +378,17 @@ class HPX {
     return std::vector<HPX>();
   }
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
   template <typename F>
-  static void partition_master(F const &, int requested_num_partitions = 0,
-                               int = 0) {
+  KOKKOS_DEPRECATED static void partition_master(
+      F const &, int requested_num_partitions = 0, int = 0) {
     if (requested_num_partitions > 1) {
       Kokkos::abort(
           "Kokkos::Experimental::HPX::partition_master: can't partition an "
           "HPX instance\n");
     }
   }
+#endif
 
   static int concurrency();
   static void impl_initialize(int thread_count);
@@ -442,7 +442,7 @@ class HPX {
 #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     return m_buffer.get();
 #else
-    return m_global_buffer;
+    return m_default_buffer;
 #endif
   }
 
@@ -450,6 +450,10 @@ class HPX {
   hpx::shared_future<void> &impl_get_future() const noexcept {
     return m_future;
   }
+
+  hpx::spinlock &impl_get_future_mutex() const noexcept {
+    return m_future_mutex;
+  }
 #endif
 
 #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
@@ -464,6 +468,27 @@ class HPX {
       HPX::impl_decrement_active_parallel_region_count();
     }
   };
+
+  // This struct is identical to the above except it does not reset the shared
+  // data. It does, however, still decrement the parallel region count. It is
+  // meant for use in parallel regions which do not capture the execution space
+  // instance.
+  struct KOKKOS_ATTRIBUTE_NODISCARD reset_count_on_exit_parallel {
+    reset_count_on_exit_parallel() {}
+    ~reset_count_on_exit_parallel() {
+      HPX::impl_decrement_active_parallel_region_count();
+    }
+  };
+#else
+  struct KOKKOS_ATTRIBUTE_NODISCARD reset_on_exit_parallel {
+    reset_on_exit_parallel(HPX const &) {}
+    ~reset_on_exit_parallel() {}
+  };
+
+  struct KOKKOS_ATTRIBUTE_NODISCARD reset_count_on_exit_parallel {
+    reset_count_on_exit_parallel() {}
+    ~reset_count_on_exit_parallel() {}
+  };
 #endif
 
   static constexpr const char *name() noexcept { return "HPX"; }
@@ -499,20 +524,18 @@ inline void dispatch_execute_task(Closure *closure,
                                   bool force_synchronous = false) {
   Kokkos::Experimental::HPX::impl_increment_active_parallel_region_count();
 
-  if (hpx::threads::get_self_ptr() == nullptr) {
-    hpx::threads::run_as_hpx_thread([closure, &instance]() {
-      hpx::shared_future<void> &fut = instance.impl_get_future();
-      Closure closure_copy          = *closure;
-      fut = fut.then([closure_copy](hpx::shared_future<void> &&) {
-        closure_copy.execute_task();
-      });
-    });
-  } else {
+  Closure closure_copy = *closure;
+
+  {
+    std::unique_lock<hpx::spinlock> l(instance.impl_get_future_mutex());
+    hpx::util::ignore_lock(&instance.impl_get_future_mutex());
     hpx::shared_future<void> &fut = instance.impl_get_future();
-    Closure closure_copy          = *closure;
-    fut = fut.then([closure_copy](hpx::shared_future<void> &&) {
-      closure_copy.execute_task();
-    });
+
+    fut = fut.then(hpx::execution::parallel_executor(
+                       hpx::threads::thread_schedule_hint(0)),
+                   [closure_copy](hpx::shared_future<void> &&) {
+                     return closure_copy.execute_task();
+                   });
   }
 
   if (force_synchronous) {
@@ -526,15 +549,7 @@ template <typename Closure>
 inline void dispatch_execute_task(Closure *closure,
                                   Kokkos::Experimental::HPX const &,
                                   bool = false) {
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
-  Kokkos::Experimental::HPX::impl_increment_active_parallel_region_count();
-#endif
-
-  if (hpx::threads::get_self_ptr() == nullptr) {
-    hpx::threads::run_as_hpx_thread([closure]() { closure->execute_task(); });
-  } else {
-    closure->execute_task();
-  }
+  closure->execute_task();
 }
 #endif
 }  // namespace Impl
@@ -594,36 +609,33 @@ class UniqueToken<HPX, UniqueTokenScope::Instance> {
   /// \brief acquire value such that 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   int acquire() const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    if (m_buffer == nullptr) {
-      return execution_space::impl_hardware_thread_id();
-    } else {
-      const ::Kokkos::pair<int, int> result =
-          ::Kokkos::Impl::concurrent_bitset::acquire_bounded(
-              m_buffer, m_count, ::Kokkos::Impl::clock_tic() % m_count);
-
-      if (result.first < 0) {
-        ::Kokkos::abort(
-            "UniqueToken<HPX> failure to acquire tokens, no tokens "
-            "available");
-      }
-      return result.first;
-    }
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST((
+        if (m_buffer == nullptr) {
+          return execution_space::impl_hardware_thread_id();
+        } else {
+          const ::Kokkos::pair<int, int> result =
+              ::Kokkos::Impl::concurrent_bitset::acquire_bounded(
+                  m_buffer, m_count, ::Kokkos::Impl::clock_tic() % m_count);
+
+          if (result.first < 0) {
+            ::Kokkos::abort(
+                "UniqueToken<HPX> failure to acquire tokens, no tokens "
+                "available");
+          }
+          return result.first;
+        }))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
   /// \brief release a value acquired by generate
   KOKKOS_INLINE_FUNCTION
   void release(int i) const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    if (m_buffer != nullptr) {
+    KOKKOS_IF_ON_HOST((if (m_buffer != nullptr) {
       ::Kokkos::Impl::concurrent_bitset::release(m_buffer, i);
-    }
-#else
-    (void)i;
-#endif
+    }))
+
+    KOKKOS_IF_ON_DEVICE(((void)i;))
   }
 };
 
@@ -704,16 +716,13 @@ struct HPXTeamMember {
   void team_barrier() const {}
 
   template <class ValueType>
-  KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType &, const int &) const {
-    static_assert(std::is_trivially_default_constructible<ValueType>(),
-                  "Only trivial constructible types can be broadcasted");
-  }
+  KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType &, const int &) const {}
 
   template <class Closure, class ValueType>
-  KOKKOS_INLINE_FUNCTION void team_broadcast(const Closure &, ValueType &,
+  KOKKOS_INLINE_FUNCTION void team_broadcast(const Closure &closure,
+                                             ValueType &value,
                                              const int &) const {
-    static_assert(std::is_trivially_default_constructible<ValueType>(),
-                  "Only trivial constructible types can be broadcasted");
+    closure(value);
   }
 
   template <class ValueType, class JoinOp>
@@ -741,8 +750,6 @@ struct HPXTeamMember {
 template <class... Properties>
 class TeamPolicyInternal<Kokkos::Experimental::HPX, Properties...>
     : public PolicyTraits<Properties...> {
-  using traits = PolicyTraits<Properties...>;
-
   int m_league_size;
   int m_team_size;
   std::size_t m_team_scratch_size[2];
@@ -750,6 +757,8 @@ class TeamPolicyInternal<Kokkos::Experimental::HPX, Properties...>
   int m_chunk_size;
 
  public:
+  using traits = PolicyTraits<Properties...>;
+
   //! Tag this class as a kokkos execution policy
   using execution_policy = TeamPolicyInternal;
 
@@ -902,7 +911,7 @@ class TeamPolicyInternal<Kokkos::Experimental::HPX, Properties...>
     init(league_size_request, 1);
   }
 
-  TeamPolicyInternal(const typename traits::execution_space &space,
+  TeamPolicyInternal(const typename traits::execution_space &,
                      int league_size_request,
                      const Kokkos::AUTO_t &, /* team_size_request */
                      const Kokkos::AUTO_t & /* vector_length_request */)
@@ -912,7 +921,7 @@ class TeamPolicyInternal<Kokkos::Experimental::HPX, Properties...>
     init(league_size_request, 1);
   }
 
-  TeamPolicyInternal(const typename traits::execution_space &space,
+  TeamPolicyInternal(const typename traits::execution_space &,
                      int league_size_request, int team_size_request,
                      const Kokkos::AUTO_t & /* vector_length_request */
                      )
@@ -990,6 +999,19 @@ class TeamPolicyInternal<Kokkos::Experimental::HPX, Properties...>
 namespace Kokkos {
 namespace Impl {
 
+template <typename Policy>
+typename Policy::member_type get_hpx_adjusted_chunk_size(Policy const &policy) {
+  const int concurrency = Kokkos::Experimental::HPX::concurrency();
+  const typename Policy::member_type n        = policy.end() - policy.begin();
+  typename Policy::member_type new_chunk_size = policy.chunk_size();
+
+  while (n >= 4 * concurrency * new_chunk_size) {
+    new_chunk_size *= 2;
+  }
+
+  return new_chunk_size;
+}
+
 template <class FunctorType, class... Traits>
 class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>,
                   Kokkos::Experimental::HPX> {
@@ -1041,59 +1063,31 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>,
 
   void execute_task() const {
     // See [note 1] for an explanation.
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
         m_policy.space());
-#endif
+
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
+
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
 
 #if KOKKOS_HPX_IMPLEMENTATION == 0
-    using hpx::parallel::for_loop;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
+    using hpx::for_loop;
 
-    for_loop(par.with(static_chunk_size(m_policy.chunk_size())),
+    for_loop(par.on(exec).with(static_chunk_size(m_policy.chunk_size())),
              m_policy.begin(), m_policy.end(), [this](const Member i) {
                execute_functor<WorkTag>(m_functor, i);
              });
 
 #elif KOKKOS_HPX_IMPLEMENTATION == 1
-    using hpx::apply;
-    using hpx::lcos::local::latch;
-
-    const int num_tasks =
-        (m_policy.end() - m_policy.begin() + m_policy.chunk_size() - 1) /
-        m_policy.chunk_size();
-    latch num_tasks_remaining(num_tasks);
-    ChunkedRoundRobinExecutor exec(num_tasks);
-
-    for (Member i_begin = m_policy.begin(); i_begin < m_policy.end();
-         i_begin += m_policy.chunk_size()) {
-      apply(exec, [this, &num_tasks_remaining, i_begin]() {
-        const Member i_end =
-            (std::min)(i_begin + m_policy.chunk_size(), m_policy.end());
-        execute_functor_range<WorkTag>(m_functor, i_begin, i_end);
-
-        num_tasks_remaining.count_down(1);
-      });
-    }
-
-    num_tasks_remaining.wait();
+    using hpx::for_loop_strided;
 
-#elif KOKKOS_HPX_IMPLEMENTATION == 2
-    using hpx::parallel::for_loop_strided;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
-
-    const int num_tasks =
-        (m_policy.end() - m_policy.begin() + m_policy.chunk_size() - 1) /
-        m_policy.chunk_size();
-    ChunkedRoundRobinExecutor exec(num_tasks);
+    const Member chunk_size = get_hpx_adjusted_chunk_size(m_policy);
 
     for_loop_strided(
-        par.on(exec).with(static_chunk_size(1)), m_policy.begin(),
-        m_policy.end(), m_policy.chunk_size(), [this](const Member i_begin) {
-          const Member i_end =
-              (std::min)(i_begin + m_policy.chunk_size(), m_policy.end());
+        par.on(exec), m_policy.begin(), m_policy.end(), chunk_size,
+        [this, chunk_size](const Member i_begin) {
+          const Member i_end = (std::min)(i_begin + chunk_size, m_policy.end());
           execute_functor_range<WorkTag>(m_functor, i_begin, i_end);
         });
 #endif
@@ -1125,65 +1119,36 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
 
   inline void execute_task() const {
     // See [note 1] for an explanation.
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
         m_mdr_policy.space());
-#endif
+
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
+
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
 
 #if KOKKOS_HPX_IMPLEMENTATION == 0
-    using hpx::parallel::for_loop;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
+    using hpx::for_loop;
 
-    for_loop(par.with(static_chunk_size(m_policy.chunk_size())),
+    for_loop(par.on(exec).with(
+                 static_chunk_size(get_hpx_adjusted_chunk_size(m_policy))),
              m_policy.begin(), m_policy.end(), [this](const Member i) {
                iterate_type(m_mdr_policy, m_functor)(i);
              });
 
 #elif KOKKOS_HPX_IMPLEMENTATION == 1
-    using hpx::apply;
-    using hpx::lcos::local::latch;
-
-    const int num_tasks =
-        (m_policy.end() - m_policy.begin() + m_policy.chunk_size() - 1) /
-        m_policy.chunk_size();
-    latch num_tasks_remaining(num_tasks);
-    ChunkedRoundRobinExecutor exec(num_tasks);
-
-    for (Member i_begin = m_policy.begin(); i_begin < m_policy.end();
-         i_begin += m_policy.chunk_size()) {
-      apply(exec, [this, &num_tasks_remaining, i_begin]() {
-        const Member i_end =
-            (std::min)(i_begin + m_policy.chunk_size(), m_policy.end());
-        for (Member i = i_begin; i < i_end; ++i) {
-          iterate_type(m_mdr_policy, m_functor)(i);
-        }
-
-        num_tasks_remaining.count_down(1);
-      });
-    }
-
-    num_tasks_remaining.wait();
-
-#elif KOKKOS_HPX_IMPLEMENTATION == 2
-    using hpx::parallel::for_loop_strided;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
-
-    const int num_tasks =
-        (m_policy.end() - m_policy.begin() + m_policy.chunk_size() - 1) /
-        m_policy.chunk_size();
-    ChunkedRoundRobinExecutor exec(num_tasks);
-
-    for_loop_strided(
-        par.on(exec).with(static_chunk_size(1)), m_policy.begin(),
-        m_policy.end(), m_policy.chunk_size(), [this](const Member i_begin) {
-          const Member i_end =
-              (std::min)(i_begin + m_policy.chunk_size(), m_policy.end());
-          for (Member i = i_begin; i < i_end; ++i) {
-            iterate_type(m_mdr_policy, m_functor)(i);
-          }
-        });
+    using hpx::for_loop_strided;
+
+    const Member chunk_size = get_hpx_adjusted_chunk_size(m_policy);
+
+    for_loop_strided(par.on(exec), m_policy.begin(), m_policy.end(), chunk_size,
+                     [this, chunk_size](const Member i_begin) {
+                       const Member i_end =
+                           (std::min)(i_begin + chunk_size, m_policy.end());
+                       for (Member i = i_begin; i < i_end; ++i) {
+                         iterate_type(m_mdr_policy, m_functor)(i);
+                       }
+                     });
 #endif
   }
 
@@ -1362,23 +1327,24 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
 
   inline void execute_task() const {
     // See [note 1] for an explanation.
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
         m_policy.space());
-#endif
 
     const std::size_t value_size =
         Analysis::value_size(ReducerConditional::select(m_functor, m_reducer));
 
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
+
+    using hpx::for_loop;
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
+
 #if KOKKOS_HPX_IMPLEMENTATION == 0
     // NOTE: This version makes the most use of HPX functionality, but
     // requires the struct value_type_wrapper to handle different
     // reference_types. It is also significantly slower than the version
     // below due to not reusing the buffer used by other functions.
-    using hpx::parallel::for_loop;
     using hpx::parallel::reduction;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
 
     value_type_wrapper final_value(value_size);
     value_type_wrapper identity(value_size);
@@ -1388,7 +1354,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
     ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
                     identity.pointer());
 
-    for_loop(par.with(static_chunk_size(m_policy.chunk_size())),
+    for_loop(par.on(exec).with(
+                 static_chunk_size(get_hpx_adjusted_chunk_size(m_policy))),
              m_policy.begin(), m_policy.end(),
              reduction(final_value, identity,
                        [this](value_type_wrapper &a,
@@ -1405,97 +1372,29 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
     pointer_type final_value_ptr = final_value.pointer();
 
 #elif KOKKOS_HPX_IMPLEMENTATION == 1
+    using hpx::for_loop_strided;
+
     const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
 
     thread_buffer &buffer = m_policy.space().impl_get_buffer();
     buffer.resize(num_worker_threads, value_size);
 
-    using hpx::apply;
-    using hpx::lcos::local::latch;
-
-    {
-      latch num_tasks_remaining(num_worker_threads);
-      ChunkedRoundRobinExecutor exec(num_worker_threads);
-
-      for (int t = 0; t < num_worker_threads; ++t) {
-        apply(exec, [this, &num_tasks_remaining, &buffer, t]() {
+    for_loop(
+        par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads,
+        [ this, &buffer ](const int t) noexcept {
           ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
                           reinterpret_cast<pointer_type>(buffer.get(t)));
-
-          num_tasks_remaining.count_down(1);
         });
-      }
-
-      num_tasks_remaining.wait();
-    }
-
-    const int num_tasks =
-        (m_policy.end() - m_policy.begin() + m_policy.chunk_size() - 1) /
-        m_policy.chunk_size();
-    latch num_tasks_remaining(num_tasks);
-    ChunkedRoundRobinExecutor exec(num_tasks);
-
-    for (Member i_begin = m_policy.begin(); i_begin < m_policy.end();
-         i_begin += m_policy.chunk_size()) {
-      apply(exec, [this, &num_tasks_remaining, &buffer, i_begin]() {
-        reference_type update =
-            ValueOps::reference(reinterpret_cast<pointer_type>(buffer.get(
-                Kokkos::Experimental::HPX::impl_hardware_thread_id())));
-        const Member i_end =
-            (std::min)(i_begin + m_policy.chunk_size(), m_policy.end());
-        execute_functor_range<WorkTag>(update, i_begin, i_end);
-
-        num_tasks_remaining.count_down(1);
-      });
-    }
-
-    num_tasks_remaining.wait();
-
-    for (int i = 1; i < num_worker_threads; ++i) {
-      ValueJoin::join(ReducerConditional::select(m_functor, m_reducer),
-                      reinterpret_cast<pointer_type>(buffer.get(0)),
-                      reinterpret_cast<pointer_type>(buffer.get(i)));
-    }
-
-    pointer_type final_value_ptr =
-        reinterpret_cast<pointer_type>(buffer.get(0));
-
-#elif KOKKOS_HPX_IMPLEMENTATION == 2
-    const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
-
-    thread_buffer &buffer = m_policy.space().impl_get_buffer();
-    buffer.resize(num_worker_threads, value_size);
-
-    using hpx::parallel::for_loop;
-    using hpx::parallel::for_loop_strided;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
-
-    {
-      ChunkedRoundRobinExecutor exec(num_worker_threads);
-
-      for_loop(par.on(exec).with(static_chunk_size(1)), std::size_t(0),
-               num_worker_threads, [this, &buffer](const std::size_t t) {
-                 ValueInit::init(
-                     ReducerConditional::select(m_functor, m_reducer),
-                     reinterpret_cast<pointer_type>(buffer.get(t)));
-               });
-    }
 
-    const int num_tasks =
-        (m_policy.end() - m_policy.begin() + m_policy.chunk_size() - 1) /
-        m_policy.chunk_size();
-    ChunkedRoundRobinExecutor exec(num_tasks);
+    const Member chunk_size = get_hpx_adjusted_chunk_size(m_policy);
 
     for_loop_strided(
-        par.on(exec).with(static_chunk_size(1)), m_policy.begin(),
-        m_policy.end(), m_policy.chunk_size(),
-        [this, &buffer](const Member i_begin) {
+        par.on(exec), m_policy.begin(), m_policy.end(), chunk_size,
+        [this, &buffer, chunk_size](const Member i_begin) {
           reference_type update =
               ValueOps::reference(reinterpret_cast<pointer_type>(buffer.get(
                   Kokkos::Experimental::HPX::impl_hardware_thread_id())));
-          const Member i_end =
-              (std::min)(i_begin + m_policy.chunk_size(), m_policy.end());
+          const Member i_end = (std::min)(i_begin + chunk_size, m_policy.end());
           execute_functor_range<WorkTag>(update, i_begin, i_end);
         });
 
@@ -1588,10 +1487,8 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
 
   inline void execute_task() const {
     // See [note 1] for an explanation.
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
         m_mdr_policy.space());
-#endif
 
     const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
     const std::size_t value_size =
@@ -1600,17 +1497,22 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
     thread_buffer &buffer = m_mdr_policy.space().impl_get_buffer();
     buffer.resize(num_worker_threads, value_size);
 
+    using hpx::for_loop;
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
+
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
+
 #if KOKKOS_HPX_IMPLEMENTATION == 0
-    using hpx::parallel::for_loop;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
 
-    for_loop(par, 0, num_worker_threads, [this, &buffer](std::size_t t) {
-      ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
-                      reinterpret_cast<pointer_type>(buffer.get(t)));
-    });
+    for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads,
+             [this, &buffer](std::size_t t) {
+               ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
+                               reinterpret_cast<pointer_type>(buffer.get(t)));
+             });
 
-    for_loop(par.with(static_chunk_size(m_policy.chunk_size())),
+    for_loop(par.on(exec).with(
+                 static_chunk_size(get_hpx_adjusted_chunk_size(m_policy))),
              m_policy.begin(), m_policy.end(), [this, &buffer](const Member i) {
                reference_type update = ValueOps::reference(
                    reinterpret_cast<pointer_type>(buffer.get(
@@ -1619,81 +1521,23 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
              });
 
 #elif KOKKOS_HPX_IMPLEMENTATION == 1
-    using hpx::apply;
-    using hpx::lcos::local::latch;
+    using hpx::for_loop_strided;
 
-    {
-      latch num_tasks_remaining(num_worker_threads);
-      ChunkedRoundRobinExecutor exec(num_worker_threads);
-
-      for (int t = 0; t < num_worker_threads; ++t) {
-        apply(exec, [this, &buffer, &num_tasks_remaining, t]() {
-          ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
-                          reinterpret_cast<pointer_type>(buffer.get(t)));
-
-          num_tasks_remaining.count_down(1);
-        });
-      }
-
-      num_tasks_remaining.wait();
-    }
-
-    const int num_tasks =
-        (m_policy.end() - m_policy.begin() + m_policy.chunk_size() - 1) /
-        m_policy.chunk_size();
-    latch num_tasks_remaining(num_tasks);
-    ChunkedRoundRobinExecutor exec(num_tasks);
-
-    for (Member i_begin = m_policy.begin(); i_begin < m_policy.end();
-         i_begin += m_policy.chunk_size()) {
-      apply(exec, [this, &num_tasks_remaining, &buffer, i_begin]() {
-        reference_type update =
-            ValueOps::reference(reinterpret_cast<pointer_type>(buffer.get(
-                Kokkos::Experimental::HPX::impl_hardware_thread_id())));
-        const Member i_end =
-            (std::min)(i_begin + m_policy.chunk_size(), m_policy.end());
-
-        for (Member i = i_begin; i < i_end; ++i) {
-          iterate_type(m_mdr_policy, m_functor, update)(i);
-        }
-
-        num_tasks_remaining.count_down(1);
-      });
-    }
-
-    num_tasks_remaining.wait();
-
-#elif KOKKOS_HPX_IMPLEMENTATION == 2
-    using hpx::parallel::for_loop;
-    using hpx::parallel::for_loop_strided;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
-
-    {
-      ChunkedRoundRobinExecutor exec(num_worker_threads);
-
-      for_loop(par.on(exec).with(static_chunk_size(1)), std::size_t(0),
-               num_worker_threads, [this, &buffer](const std::size_t t) {
-                 ValueInit::init(
-                     ReducerConditional::select(m_functor, m_reducer),
-                     reinterpret_cast<pointer_type>(buffer.get(t)));
-               });
-    }
+    for_loop(par.on(exec).with(static_chunk_size(1)), std::size_t(0),
+             num_worker_threads, [this, &buffer](const std::size_t t) {
+               ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
+                               reinterpret_cast<pointer_type>(buffer.get(t)));
+             });
 
-    const int num_tasks =
-        (m_policy.end() - m_policy.begin() + m_policy.chunk_size() - 1) /
-        m_policy.chunk_size();
-    ChunkedRoundRobinExecutor exec(num_tasks);
+    const Member chunk_size = get_hpx_adjusted_chunk_size(m_policy);
 
     for_loop_strided(
-        par.on(exec).with(static_chunk_size(1)), m_policy.begin(),
-        m_policy.end(), m_policy.chunk_size(),
-        [this, &buffer](const Member i_begin) {
+        par.on(exec), m_policy.begin(), m_policy.end(), chunk_size,
+        [this, &buffer, chunk_size](const Member i_begin) {
           reference_type update =
               ValueOps::reference(reinterpret_cast<pointer_type>(buffer.get(
                   Kokkos::Experimental::HPX::impl_hardware_thread_id())));
-          const Member i_end =
-              (std::min)(i_begin + m_policy.chunk_size(), m_policy.end());
+          const Member i_end = (std::min)(i_begin + chunk_size, m_policy.end());
 
           for (Member i = i_begin; i < i_end; ++i) {
             iterate_type(m_mdr_policy, m_functor, update)(i);
@@ -1807,10 +1651,8 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>,
 
   inline void execute_task() const {
     // See [note 1] for an explanation.
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
         m_policy.space());
-#endif
 
     const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
     const int value_count        = Analysis::value_count(m_functor);
@@ -1819,59 +1661,54 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>,
     thread_buffer &buffer = m_policy.space().impl_get_buffer();
     buffer.resize(num_worker_threads, 2 * value_size);
 
-    using hpx::apply;
-    using hpx::lcos::local::barrier;
-    using hpx::lcos::local::latch;
+    using hpx::barrier;
+    using hpx::for_loop;
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
 
-    barrier bar(num_worker_threads);
-    latch num_tasks_remaining(num_worker_threads);
-    ChunkedRoundRobinExecutor exec(num_worker_threads);
+    barrier<> bar(num_worker_threads);
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
 
-    for (int t = 0; t < num_worker_threads; ++t) {
-      apply(exec, [this, &bar, &buffer, &num_tasks_remaining,
-                   num_worker_threads, value_count, value_size, t]() {
-        reference_type update_sum = ValueInit::init(
-            m_functor, reinterpret_cast<pointer_type>(buffer.get(t)));
+    for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads,
+             [this, &bar, &buffer, num_worker_threads, value_count,
+              value_size](int t) {
+               reference_type update_sum = ValueInit::init(
+                   m_functor, reinterpret_cast<pointer_type>(buffer.get(t)));
 
-        const WorkRange range(m_policy, t, num_worker_threads);
-        execute_functor_range<WorkTag>(m_functor, range.begin(), range.end(),
-                                       update_sum, false);
+               const WorkRange range(m_policy, t, num_worker_threads);
+               execute_functor_range<WorkTag>(m_functor, range.begin(),
+                                              range.end(), update_sum, false);
 
-        bar.wait();
+               bar.arrive_and_wait();
 
-        if (t == 0) {
-          ValueInit::init(m_functor, reinterpret_cast<pointer_type>(
-                                         buffer.get(0) + value_size));
+               if (t == 0) {
+                 ValueInit::init(m_functor, reinterpret_cast<pointer_type>(
+                                                buffer.get(0) + value_size));
 
-          for (int i = 1; i < num_worker_threads; ++i) {
-            pointer_type ptr_1_prev =
-                reinterpret_cast<pointer_type>(buffer.get(i - 1));
-            pointer_type ptr_2_prev =
-                reinterpret_cast<pointer_type>(buffer.get(i - 1) + value_size);
-            pointer_type ptr_2 =
-                reinterpret_cast<pointer_type>(buffer.get(i) + value_size);
+                 for (int i = 1; i < num_worker_threads; ++i) {
+                   pointer_type ptr_1_prev =
+                       reinterpret_cast<pointer_type>(buffer.get(i - 1));
+                   pointer_type ptr_2_prev = reinterpret_cast<pointer_type>(
+                       buffer.get(i - 1) + value_size);
+                   pointer_type ptr_2 = reinterpret_cast<pointer_type>(
+                       buffer.get(i) + value_size);
 
-            for (int j = 0; j < value_count; ++j) {
-              ptr_2[j] = ptr_2_prev[j];
-            }
+                   for (int j = 0; j < value_count; ++j) {
+                     ptr_2[j] = ptr_2_prev[j];
+                   }
 
-            ValueJoin::join(m_functor, ptr_2, ptr_1_prev);
-          }
-        }
-
-        bar.wait();
+                   ValueJoin::join(m_functor, ptr_2, ptr_1_prev);
+                 }
+               }
 
-        reference_type update_base = ValueOps::reference(
-            reinterpret_cast<pointer_type>(buffer.get(t) + value_size));
+               bar.arrive_and_wait();
 
-        execute_functor_range<WorkTag>(m_functor, range.begin(), range.end(),
-                                       update_base, true);
-
-        num_tasks_remaining.count_down(1);
-      });
-    }
+               reference_type update_base = ValueOps::reference(
+                   reinterpret_cast<pointer_type>(buffer.get(t) + value_size));
 
-    num_tasks_remaining.wait();
+               execute_functor_range<WorkTag>(m_functor, range.begin(),
+                                              range.end(), update_base, true);
+             });
   }
 
   inline ParallelScan(const FunctorType &arg_functor, const Policy &arg_policy)
@@ -1927,10 +1764,8 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>,
 
   inline void execute_task() const {
     // See [note 1] for an explanation.
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
         m_policy.space());
-#endif
 
     const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
     const int value_count        = Analysis::value_count(m_functor);
@@ -1939,63 +1774,58 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>,
     thread_buffer &buffer = m_policy.space().impl_get_buffer();
     buffer.resize(num_worker_threads, 2 * value_size);
 
-    using hpx::apply;
-    using hpx::lcos::local::barrier;
-    using hpx::lcos::local::latch;
+    using hpx::barrier;
+    using hpx::for_loop;
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
 
-    barrier bar(num_worker_threads);
-    latch num_tasks_remaining(num_worker_threads);
-    ChunkedRoundRobinExecutor exec(num_worker_threads);
+    barrier<> bar(num_worker_threads);
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
 
-    for (int t = 0; t < num_worker_threads; ++t) {
-      apply(exec, [this, &bar, &buffer, &num_tasks_remaining,
-                   num_worker_threads, value_count, value_size, t]() {
-        reference_type update_sum = ValueInit::init(
-            m_functor, reinterpret_cast<pointer_type>(buffer.get(t)));
+    for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads,
+             [this, &bar, &buffer, num_worker_threads, value_count,
+              value_size](int t) {
+               reference_type update_sum = ValueInit::init(
+                   m_functor, reinterpret_cast<pointer_type>(buffer.get(t)));
 
-        const WorkRange range(m_policy, t, num_worker_threads);
-        execute_functor_range<WorkTag>(m_functor, range.begin(), range.end(),
-                                       update_sum, false);
+               const WorkRange range(m_policy, t, num_worker_threads);
+               execute_functor_range<WorkTag>(m_functor, range.begin(),
+                                              range.end(), update_sum, false);
 
-        bar.wait();
+               bar.arrive_and_wait();
 
-        if (t == 0) {
-          ValueInit::init(m_functor, reinterpret_cast<pointer_type>(
-                                         buffer.get(0) + value_size));
+               if (t == 0) {
+                 ValueInit::init(m_functor, reinterpret_cast<pointer_type>(
+                                                buffer.get(0) + value_size));
 
-          for (int i = 1; i < num_worker_threads; ++i) {
-            pointer_type ptr_1_prev =
-                reinterpret_cast<pointer_type>(buffer.get(i - 1));
-            pointer_type ptr_2_prev =
-                reinterpret_cast<pointer_type>(buffer.get(i - 1) + value_size);
-            pointer_type ptr_2 =
-                reinterpret_cast<pointer_type>(buffer.get(i) + value_size);
+                 for (int i = 1; i < num_worker_threads; ++i) {
+                   pointer_type ptr_1_prev =
+                       reinterpret_cast<pointer_type>(buffer.get(i - 1));
+                   pointer_type ptr_2_prev = reinterpret_cast<pointer_type>(
+                       buffer.get(i - 1) + value_size);
+                   pointer_type ptr_2 = reinterpret_cast<pointer_type>(
+                       buffer.get(i) + value_size);
 
-            for (int j = 0; j < value_count; ++j) {
-              ptr_2[j] = ptr_2_prev[j];
-            }
+                   for (int j = 0; j < value_count; ++j) {
+                     ptr_2[j] = ptr_2_prev[j];
+                   }
 
-            ValueJoin::join(m_functor, ptr_2, ptr_1_prev);
-          }
-        }
-
-        bar.wait();
-
-        reference_type update_base = ValueOps::reference(
-            reinterpret_cast<pointer_type>(buffer.get(t) + value_size));
+                   ValueJoin::join(m_functor, ptr_2, ptr_1_prev);
+                 }
+               }
 
-        execute_functor_range<WorkTag>(m_functor, range.begin(), range.end(),
-                                       update_base, true);
+               bar.arrive_and_wait();
 
-        if (t == num_worker_threads - 1) {
-          m_returnvalue = update_base;
-        }
+               reference_type update_base = ValueOps::reference(
+                   reinterpret_cast<pointer_type>(buffer.get(t) + value_size));
 
-        num_tasks_remaining.count_down(1);
-      });
-    }
+               execute_functor_range<WorkTag>(m_functor, range.begin(),
+                                              range.end(), update_base, true);
 
-    num_tasks_remaining.wait();
+               if (t == num_worker_threads - 1) {
+                 m_returnvalue = update_base;
+               }
+             });
   }
 
   inline ParallelScanWithTotal(const FunctorType &arg_functor,
@@ -2076,23 +1906,24 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
 
   inline void execute_task() const {
     // See [note 1] for an explanation.
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
         m_policy.space());
-#endif
 
     const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
 
     thread_buffer &buffer = m_policy.space().impl_get_buffer();
     buffer.resize(num_worker_threads, m_shared);
 
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
+
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
+
 #if KOKKOS_HPX_IMPLEMENTATION == 0
-    using hpx::parallel::for_loop;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
+    using hpx::for_loop;
 
     for_loop(
-        par.with(static_chunk_size(m_policy.chunk_size())), 0,
+        par.on(exec).with(static_chunk_size(m_policy.chunk_size())), 0,
         m_policy.league_size(), [this, &buffer](const int league_rank) {
           execute_functor<WorkTag>(
               m_functor, m_policy, league_rank,
@@ -2101,42 +1932,11 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
         });
 
 #elif KOKKOS_HPX_IMPLEMENTATION == 1
-    using hpx::apply;
-    using hpx::lcos::local::latch;
-
-    const int num_tasks = (m_policy.league_size() + m_policy.chunk_size() - 1) /
-                          m_policy.chunk_size();
-    latch num_tasks_remaining(num_tasks);
-    ChunkedRoundRobinExecutor exec(num_tasks);
-
-    for (int league_rank_begin = 0; league_rank_begin < m_policy.league_size();
-         league_rank_begin += m_policy.chunk_size()) {
-      apply(exec, [this, &buffer, &num_tasks_remaining, league_rank_begin]() {
-        const int league_rank_end = (std::min)(
-            league_rank_begin + m_policy.chunk_size(), m_policy.league_size());
-        execute_functor_range<WorkTag>(
-            m_functor, m_policy, league_rank_begin, league_rank_end,
-            buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()),
-            m_shared);
-
-        num_tasks_remaining.count_down(1);
-      });
-    }
-
-    num_tasks_remaining.wait();
-
-#elif KOKKOS_HPX_IMPLEMENTATION == 2
-    using hpx::parallel::for_loop_strided;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
-
-    const int num_tasks = (m_policy.league_size() + m_policy.chunk_size() - 1) /
-                          m_policy.chunk_size();
-    ChunkedRoundRobinExecutor exec(num_tasks);
+    using hpx::for_loop_strided;
 
     for_loop_strided(
-        par.on(exec).with(static_chunk_size(1)), 0, m_policy.league_size(),
-        m_policy.chunk_size(), [this, &buffer](const int league_rank_begin) {
+        par.on(exec), 0, m_policy.league_size(), m_policy.chunk_size(),
+        [this, &buffer](const int league_rank_begin) {
           const int league_rank_end =
               (std::min)(league_rank_begin + m_policy.chunk_size(),
                          m_policy.league_size());
@@ -2261,10 +2061,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
 
   inline void execute_task() const {
     // See [note 1] for an explanation.
-#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)
     Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit(
         m_policy.space());
-#endif
 
     const int num_worker_threads = Kokkos::Experimental::HPX::concurrency();
     const std::size_t value_size =
@@ -2273,98 +2071,44 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
     thread_buffer &buffer = m_policy.space().impl_get_buffer();
     buffer.resize(num_worker_threads, value_size + m_shared);
 
-#if KOKKOS_HPX_IMPLEMENTATION == 0
-    using hpx::parallel::for_loop;
-    using hpx::parallel::execution::par;
-
-    for_loop(par, 0, num_worker_threads, [this, &buffer](const std::size_t t) {
-      ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
-                      reinterpret_cast<pointer_type>(buffer.get(t)));
-    });
-
-    using hpx::parallel::execution::static_chunk_size;
+    auto exec = Kokkos::Experimental::HPX::impl_get_executor();
 
-    hpx::parallel::for_loop(
-        par.with(static_chunk_size(m_policy.chunk_size())), 0,
-        m_policy.league_size(),
-        [this, &buffer, value_size](const int league_rank) {
-          std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id();
-          reference_type update = ValueOps::reference(
-              reinterpret_cast<pointer_type>(buffer.get(t)));
+    using hpx::for_loop;
+    using hpx::execution::par;
+    using hpx::execution::static_chunk_size;
 
-          execute_functor<WorkTag>(m_functor, m_policy, league_rank,
-                                   buffer.get(t) + value_size, m_shared,
-                                   update);
-        });
-
-#elif KOKKOS_HPX_IMPLEMENTATION == 1
-    using hpx::apply;
-    using hpx::lcos::local::latch;
-
-    {
-      latch num_tasks_remaining(num_worker_threads);
-      ChunkedRoundRobinExecutor exec(num_worker_threads);
-
-      for (int t = 0; t < num_worker_threads; ++t) {
-        apply(exec, [this, &buffer, &num_tasks_remaining, t]() {
-          ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
-                          reinterpret_cast<pointer_type>(buffer.get(t)));
-
-          num_tasks_remaining.count_down(1);
-        });
-      }
-
-      num_tasks_remaining.wait();
-    }
-
-    const int num_tasks = (m_policy.league_size() + m_policy.chunk_size() - 1) /
-                          m_policy.chunk_size();
-    latch num_tasks_remaining(num_tasks);
-    ChunkedRoundRobinExecutor exec(num_tasks);
-
-    for (int league_rank_begin = 0; league_rank_begin < m_policy.league_size();
-         league_rank_begin += m_policy.chunk_size()) {
-      apply(exec, [this, &buffer, &num_tasks_remaining, league_rank_begin,
-                   value_size]() {
-        std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id();
-        reference_type update =
-            ValueOps::reference(reinterpret_cast<pointer_type>(buffer.get(t)));
-        const int league_rank_end = (std::min)(
-            league_rank_begin + m_policy.chunk_size(), m_policy.league_size());
-        execute_functor_range<WorkTag>(
-            m_functor, m_policy, league_rank_begin, league_rank_end,
-            buffer.get(t) + value_size, m_shared, update);
-
-        num_tasks_remaining.count_down(1);
-      });
-    }
+#if KOKKOS_HPX_IMPLEMENTATION == 0
 
-    num_tasks_remaining.wait();
+    for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads,
+             [this, &buffer](const std::size_t t) {
+               ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
+                               reinterpret_cast<pointer_type>(buffer.get(t)));
+             });
 
-#elif KOKKOS_HPX_IMPLEMENTATION == 2
-    using hpx::parallel::for_loop;
-    using hpx::parallel::for_loop_strided;
-    using hpx::parallel::execution::par;
-    using hpx::parallel::execution::static_chunk_size;
+    for_loop(par.on(exec).with(static_chunk_size(m_policy.chunk_size())), 0,
+             m_policy.league_size(),
+             [this, &buffer, value_size](const int league_rank) {
+               std::size_t t =
+                   Kokkos::Experimental::HPX::impl_hardware_thread_id();
+               reference_type update = ValueOps::reference(
+                   reinterpret_cast<pointer_type>(buffer.get(t)));
 
-    {
-      ChunkedRoundRobinExecutor exec(num_worker_threads);
+               execute_functor<WorkTag>(m_functor, m_policy, league_rank,
+                                        buffer.get(t) + value_size, m_shared,
+                                        update);
+             });
 
-      for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads,
-               [this, &buffer](std::size_t const t) {
-                 ValueInit::init(
-                     ReducerConditional::select(m_functor, m_reducer),
-                     reinterpret_cast<pointer_type>(buffer.get(t)));
-               });
-    }
+#elif KOKKOS_HPX_IMPLEMENTATION == 1
+    using hpx::for_loop_strided;
 
-    const int num_tasks = (m_policy.league_size() + m_policy.chunk_size() - 1) /
-                          m_policy.chunk_size();
-    ChunkedRoundRobinExecutor exec(num_tasks);
+    for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads,
+             [this, &buffer](std::size_t const t) {
+               ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
+                               reinterpret_cast<pointer_type>(buffer.get(t)));
+             });
 
     for_loop_strided(
-        par.on(exec).with(static_chunk_size(1)), 0, m_policy.league_size(),
-        m_policy.chunk_size(),
+        par.on(exec), 0, m_policy.league_size(), m_policy.chunk_size(),
         [this, &buffer, value_size](int const league_rank_begin) {
           std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id();
           reference_type update = ValueOps::reference(
diff --git a/packages/kokkos/core/src/Kokkos_Half.hpp b/packages/kokkos/core/src/Kokkos_Half.hpp
index e4b351381afbd6f7bac2e14d340ca1888bead0be..7382ffbd47e1d38a17050bbb86c764489bb293bc 100644
--- a/packages/kokkos/core/src/Kokkos_Half.hpp
+++ b/packages/kokkos/core/src/Kokkos_Half.hpp
@@ -47,12 +47,867 @@
 
 #include <type_traits>
 #include <Kokkos_Macros.hpp>
+#include <iosfwd>  // istream & ostream for extraction and insertion ops
+#include <string>
 
-// Include special backend specific versions here
-#include <Cuda/Kokkos_Cuda_Half.hpp>
+#ifdef KOKKOS_IMPL_HALF_TYPE_DEFINED
 
-// Potentially include special compiler specific versions here
-// e.g. for Intel
+// KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH: A macro to select which
+// floating_pointer_wrapper operator paths should be used. For CUDA, let the
+// compiler conditionally select when device ops are used For SYCL, we have a
+// full half type on both host and device
+#if defined(__CUDA_ARCH__) || defined(KOKKOS_ENABLE_SYCL)
+#define KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+#endif
+
+/************************* BEGIN forward declarations *************************/
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+template <class FloatType>
+class floating_point_wrapper;
+}
+
+// Declare half_t (binary16)
+using half_t = Kokkos::Experimental::Impl::floating_point_wrapper<
+    Kokkos::Impl::half_impl_t ::type>;
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(float val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(bool val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(double val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(short val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(int val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long long val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned short val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned int val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long long val);
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(half_t);
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
+    cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, bool>::value, T>
+    cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
+    cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
+    cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
+    cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
+    cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
+    cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
+        cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned int>::value, T>
+    cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
+        cast_from_half(half_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
+        cast_from_half(half_t);
+
+// declare bhalf_t
+#ifdef KOKKOS_IMPL_BHALF_TYPE_DEFINED
+using bhalf_t = Kokkos::Experimental::Impl::floating_point_wrapper<
+    Kokkos::Impl ::bhalf_impl_t ::type>;
+
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(float val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(bool val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(double val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(short val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(int val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(long val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(long long val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned short val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned int val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned long val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned long long val);
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(bhalf_t val);
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
+    cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, bool>::value, T>
+    cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
+    cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
+    cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
+    cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
+    cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
+    cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
+        cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned int>::value, T>
+    cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
+        cast_from_bhalf(bhalf_t);
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
+        cast_from_bhalf(bhalf_t);
+#endif  // KOKKOS_IMPL_BHALF_TYPE_DEFINED
+
+template <class T>
+static KOKKOS_INLINE_FUNCTION Kokkos::Experimental::half_t cast_to_wrapper(
+    T x, const volatile Kokkos::Impl::half_impl_t::type&);
+
+#ifdef KOKKOS_IMPL_BHALF_TYPE_DEFINED
+template <class T>
+static KOKKOS_INLINE_FUNCTION Kokkos::Experimental::bhalf_t cast_to_wrapper(
+    T x, const volatile Kokkos::Impl::bhalf_impl_t::type&);
+#endif  // KOKKOS_IMPL_BHALF_TYPE_DEFINED
+
+template <class T>
+static KOKKOS_INLINE_FUNCTION T
+cast_from_wrapper(const Kokkos::Experimental::half_t& x);
+
+#ifdef KOKKOS_IMPL_BHALF_TYPE_DEFINED
+template <class T>
+static KOKKOS_INLINE_FUNCTION T
+cast_from_wrapper(const Kokkos::Experimental::bhalf_t& x);
+#endif  // KOKKOS_IMPL_BHALF_TYPE_DEFINED
+/************************** END forward declarations **************************/
+
+namespace Impl {
+template <class FloatType>
+class alignas(FloatType) floating_point_wrapper {
+ public:
+  using impl_type = FloatType;
+
+ private:
+  impl_type val;
+  using fixed_width_integer_type = std::conditional_t<
+      sizeof(impl_type) == 2, uint16_t,
+      std::conditional_t<
+          sizeof(impl_type) == 4, uint32_t,
+          std::conditional_t<sizeof(impl_type) == 8, uint64_t, void>>>;
+  static_assert(!std::is_void<fixed_width_integer_type>::value,
+                "Invalid impl_type");
+
+ public:
+  // In-class initialization and defaulted default constructors not used
+  // since Cuda supports half precision initialization via the below constructor
+  KOKKOS_FUNCTION
+  floating_point_wrapper() : val(0.0F) {}
+
+// Copy constructors
+// Getting "C2580: multiple versions of a defaulted special
+// member function are not allowed" with VS 16.11.3 and CUDA 11.4.2
+#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA)
+  KOKKOS_FUNCTION
+  floating_point_wrapper(const floating_point_wrapper& rhs) : val(rhs.val) {}
+#else
+  KOKKOS_DEFAULTED_FUNCTION
+  floating_point_wrapper(const floating_point_wrapper&) noexcept = default;
+#endif
+
+  KOKKOS_INLINE_FUNCTION
+  floating_point_wrapper(const volatile floating_point_wrapper& rhs) {
+#if defined(KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH) && !defined(KOKKOS_ENABLE_SYCL)
+    val = rhs.val;
+#else
+    const volatile fixed_width_integer_type* rv_ptr =
+        reinterpret_cast<const volatile fixed_width_integer_type*>(&rhs.val);
+    const fixed_width_integer_type rv_val = *rv_ptr;
+    val       = reinterpret_cast<const impl_type&>(rv_val);
+#endif  // KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+  }
+
+  // Don't support implicit conversion back to impl_type.
+  // impl_type is a storage only type on host.
+  KOKKOS_FUNCTION
+  explicit operator impl_type() const { return val; }
+  KOKKOS_FUNCTION
+  explicit operator float() const { return cast_from_wrapper<float>(*this); }
+  KOKKOS_FUNCTION
+  explicit operator bool() const { return cast_from_wrapper<bool>(*this); }
+  KOKKOS_FUNCTION
+  explicit operator double() const { return cast_from_wrapper<double>(*this); }
+  KOKKOS_FUNCTION
+  explicit operator short() const { return cast_from_wrapper<short>(*this); }
+  KOKKOS_FUNCTION
+  explicit operator int() const { return cast_from_wrapper<int>(*this); }
+  KOKKOS_FUNCTION
+  explicit operator long() const { return cast_from_wrapper<long>(*this); }
+  KOKKOS_FUNCTION
+  explicit operator long long() const {
+    return cast_from_wrapper<long long>(*this);
+  }
+  KOKKOS_FUNCTION
+  explicit operator unsigned short() const {
+    return cast_from_wrapper<unsigned short>(*this);
+  }
+  KOKKOS_FUNCTION
+  explicit operator unsigned int() const {
+    return cast_from_wrapper<unsigned int>(*this);
+  }
+  KOKKOS_FUNCTION
+  explicit operator unsigned long() const {
+    return cast_from_wrapper<unsigned long>(*this);
+  }
+  KOKKOS_FUNCTION
+  explicit operator unsigned long long() const {
+    return cast_from_wrapper<unsigned long long>(*this);
+  }
+
+  /**
+   * Conversion constructors.
+   *
+   * Support implicit conversions from impl_type, float, double ->
+   * floating_point_wrapper. Mixed precision expressions require upcasting which
+   * is done in the
+   * "// Binary Arithmetic" operator overloads below.
+   *
+   * Support implicit conversions from integral types -> floating_point_wrapper.
+   * Expressions involving floating_point_wrapper with integral types require
+   * downcasting the integral types to floating_point_wrapper. Existing operator
+   * overloads can handle this with the addition of the below implicit
+   * conversion constructors.
+   */
+  KOKKOS_FUNCTION
+  constexpr floating_point_wrapper(impl_type rhs) : val(rhs) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(float rhs) : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(double rhs) : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  explicit floating_point_wrapper(bool rhs)
+      : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(short rhs) : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(int rhs) : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(long rhs) : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(long long rhs) : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(unsigned short rhs)
+      : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(unsigned int rhs)
+      : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(unsigned long rhs)
+      : val(cast_to_wrapper(rhs, val).val) {}
+  KOKKOS_FUNCTION
+  floating_point_wrapper(unsigned long long rhs)
+      : val(cast_to_wrapper(rhs, val).val) {}
+
+  // Unary operators
+  KOKKOS_FUNCTION
+  floating_point_wrapper operator+() const {
+    floating_point_wrapper tmp = *this;
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    tmp.val = +tmp.val;
+#else
+    tmp.val   = cast_to_wrapper(+cast_from_wrapper<float>(tmp), val).val;
+#endif
+    return tmp;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper operator-() const {
+    floating_point_wrapper tmp = *this;
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    tmp.val = -tmp.val;
+#else
+    tmp.val   = cast_to_wrapper(-cast_from_wrapper<float>(tmp), val).val;
+#endif
+    return tmp;
+  }
+
+  // Prefix operators
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator++() {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    val = val + impl_type(1.0F);  // cuda has no operator++ for __nv_bfloat
+#else
+    float tmp = cast_from_wrapper<float>(*this);
+    ++tmp;
+    val       = cast_to_wrapper(tmp, val).val;
+#endif
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator--() {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    val = val - impl_type(1.0F);  // cuda has no operator-- for __nv_bfloat
+#else
+    float tmp = cast_from_wrapper<float>(*this);
+    --tmp;
+    val = cast_to_wrapper(tmp, val).val;
+#endif
+    return *this;
+  }
+
+  // Postfix operators
+  KOKKOS_FUNCTION
+  floating_point_wrapper operator++(int) {
+    floating_point_wrapper tmp = *this;
+    operator++();
+    return tmp;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper operator--(int) {
+    floating_point_wrapper tmp = *this;
+    operator--();
+    return tmp;
+  }
+
+  // Binary operators
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator=(impl_type rhs) {
+    val = rhs;
+    return *this;
+  }
+
+  template <class T>
+  KOKKOS_FUNCTION floating_point_wrapper& operator=(T rhs) {
+    val = cast_to_wrapper(rhs, val).val;
+    return *this;
+  }
+
+  template <class T>
+  KOKKOS_FUNCTION void operator=(T rhs) volatile {
+    impl_type new_val = cast_to_wrapper(rhs, val).val;
+    volatile fixed_width_integer_type* val_ptr =
+        reinterpret_cast<volatile fixed_width_integer_type*>(
+            const_cast<impl_type*>(&val));
+    *val_ptr = reinterpret_cast<fixed_width_integer_type&>(new_val);
+  }
+
+  // Compound operators
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator+=(floating_point_wrapper rhs) {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    val = val + rhs.val;  // cuda has no operator+= for __nv_bfloat
+#else
+    val = cast_to_wrapper(
+              cast_from_wrapper<float>(*this) + cast_from_wrapper<float>(rhs),
+              val)
+              .val;
+#endif
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  void operator+=(const volatile floating_point_wrapper& rhs) volatile {
+    floating_point_wrapper tmp_rhs = rhs;
+    floating_point_wrapper tmp_lhs = *this;
+
+    tmp_lhs += tmp_rhs;
+    *this = tmp_lhs;
+  }
+
+  // Compound operators: upcast overloads for +=
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator+=(T& lhs, floating_point_wrapper rhs) {
+    lhs += static_cast<T>(rhs);
+    return lhs;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator+=(float rhs) {
+    float result = static_cast<float>(val) + rhs;
+    val          = static_cast<impl_type>(result);
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator+=(double rhs) {
+    double result = static_cast<double>(val) + rhs;
+    val           = static_cast<impl_type>(result);
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator-=(floating_point_wrapper rhs) {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    val = val - rhs.val;  // cuda has no operator-= for __nv_bfloat
+#else
+    val = cast_to_wrapper(
+              cast_from_wrapper<float>(*this) - cast_from_wrapper<float>(rhs),
+              val)
+              .val;
+#endif
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  void operator-=(const volatile floating_point_wrapper& rhs) volatile {
+    floating_point_wrapper tmp_rhs = rhs;
+    floating_point_wrapper tmp_lhs = *this;
+
+    tmp_lhs -= tmp_rhs;
+    *this = tmp_lhs;
+  }
+
+  // Compund operators: upcast overloads for -=
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator-=(T& lhs, floating_point_wrapper rhs) {
+    lhs -= static_cast<T>(rhs);
+    return lhs;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator-=(float rhs) {
+    float result = static_cast<float>(val) - rhs;
+    val          = static_cast<impl_type>(result);
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator-=(double rhs) {
+    double result = static_cast<double>(val) - rhs;
+    val           = static_cast<impl_type>(result);
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator*=(floating_point_wrapper rhs) {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    val = val * rhs.val;  // cuda has no operator*= for __nv_bfloat
+#else
+    val = cast_to_wrapper(
+              cast_from_wrapper<float>(*this) * cast_from_wrapper<float>(rhs),
+              val)
+              .val;
+#endif
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  void operator*=(const volatile floating_point_wrapper& rhs) volatile {
+    floating_point_wrapper tmp_rhs = rhs;
+    floating_point_wrapper tmp_lhs = *this;
+
+    tmp_lhs *= tmp_rhs;
+    *this = tmp_lhs;
+  }
+
+  // Compund operators: upcast overloads for *=
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator*=(T& lhs, floating_point_wrapper rhs) {
+    lhs *= static_cast<T>(rhs);
+    return lhs;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator*=(float rhs) {
+    float result = static_cast<float>(val) * rhs;
+    val          = static_cast<impl_type>(result);
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator*=(double rhs) {
+    double result = static_cast<double>(val) * rhs;
+    val           = static_cast<impl_type>(result);
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator/=(floating_point_wrapper rhs) {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    val = val / rhs.val;  // cuda has no operator/= for __nv_bfloat
+#else
+    val = cast_to_wrapper(
+              cast_from_wrapper<float>(*this) / cast_from_wrapper<float>(rhs),
+              val)
+              .val;
+#endif
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  void operator/=(const volatile floating_point_wrapper& rhs) volatile {
+    floating_point_wrapper tmp_rhs = rhs;
+    floating_point_wrapper tmp_lhs = *this;
+
+    tmp_lhs /= tmp_rhs;
+    *this = tmp_lhs;
+  }
+
+  // Compund operators: upcast overloads for /=
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator/=(T& lhs, floating_point_wrapper rhs) {
+    lhs /= static_cast<T>(rhs);
+    return lhs;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator/=(float rhs) {
+    float result = static_cast<float>(val) / rhs;
+    val          = static_cast<impl_type>(result);
+    return *this;
+  }
+
+  KOKKOS_FUNCTION
+  floating_point_wrapper& operator/=(double rhs) {
+    double result = static_cast<double>(val) / rhs;
+    val           = static_cast<impl_type>(result);
+    return *this;
+  }
+
+  // Binary Arithmetic
+  KOKKOS_FUNCTION
+  friend floating_point_wrapper operator+(floating_point_wrapper lhs,
+                                          floating_point_wrapper rhs) {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    lhs += rhs;
+#else
+    lhs.val = cast_to_wrapper(
+                  cast_from_wrapper<float>(lhs) + cast_from_wrapper<float>(rhs),
+                  lhs.val)
+                  .val;
+#endif
+    return lhs;
+  }
+
+  // Binary Arithmetic upcast operators for +
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator+(floating_point_wrapper lhs, T rhs) {
+    return T(lhs) + rhs;
+  }
+
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator+(T lhs, floating_point_wrapper rhs) {
+    return lhs + T(rhs);
+  }
+
+  KOKKOS_FUNCTION
+  friend floating_point_wrapper operator-(floating_point_wrapper lhs,
+                                          floating_point_wrapper rhs) {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    lhs -= rhs;
+#else
+    lhs.val = cast_to_wrapper(
+                  cast_from_wrapper<float>(lhs) - cast_from_wrapper<float>(rhs),
+                  lhs.val)
+                  .val;
+#endif
+    return lhs;
+  }
+
+  // Binary Arithmetic upcast operators for -
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator-(floating_point_wrapper lhs, T rhs) {
+    return T(lhs) - rhs;
+  }
+
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator-(T lhs, floating_point_wrapper rhs) {
+    return lhs - T(rhs);
+  }
+
+  KOKKOS_FUNCTION
+  friend floating_point_wrapper operator*(floating_point_wrapper lhs,
+                                          floating_point_wrapper rhs) {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    lhs *= rhs;
+#else
+    lhs.val = cast_to_wrapper(
+                  cast_from_wrapper<float>(lhs) * cast_from_wrapper<float>(rhs),
+                  lhs.val)
+                  .val;
+#endif
+    return lhs;
+  }
+
+  // Binary Arithmetic upcast operators for *
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator*(floating_point_wrapper lhs, T rhs) {
+    return T(lhs) * rhs;
+  }
+
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator*(T lhs, floating_point_wrapper rhs) {
+    return lhs * T(rhs);
+  }
+
+  KOKKOS_FUNCTION
+  friend floating_point_wrapper operator/(floating_point_wrapper lhs,
+                                          floating_point_wrapper rhs) {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    lhs /= rhs;
+#else
+    lhs.val = cast_to_wrapper(
+                  cast_from_wrapper<float>(lhs) / cast_from_wrapper<float>(rhs),
+                  lhs.val)
+                  .val;
+#endif
+    return lhs;
+  }
+
+  // Binary Arithmetic upcast operators for /
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator/(floating_point_wrapper lhs, T rhs) {
+    return T(lhs) / rhs;
+  }
+
+  template <class T>
+  KOKKOS_FUNCTION friend std::enable_if_t<
+      std::is_same<T, float>::value || std::is_same<T, double>::value, T>
+  operator/(T lhs, floating_point_wrapper rhs) {
+    return lhs / T(rhs);
+  }
+
+  // Logical operators
+  KOKKOS_FUNCTION
+  bool operator!() const {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    return static_cast<bool>(!val);
+#else
+    return !cast_from_wrapper<float>(*this);
+#endif
+  }
+
+  // NOTE: Loses short-circuit evaluation
+  KOKKOS_FUNCTION
+  bool operator&&(floating_point_wrapper rhs) const {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    return static_cast<bool>(val && rhs.val);
+#else
+    return cast_from_wrapper<float>(*this) && cast_from_wrapper<float>(rhs);
+#endif
+  }
+
+  // NOTE: Loses short-circuit evaluation
+  KOKKOS_FUNCTION
+  bool operator||(floating_point_wrapper rhs) const {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    return static_cast<bool>(val || rhs.val);
+#else
+    return cast_from_wrapper<float>(*this) || cast_from_wrapper<float>(rhs);
+#endif
+  }
+
+  // Comparison operators
+  KOKKOS_FUNCTION
+  bool operator==(floating_point_wrapper rhs) const {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    return static_cast<bool>(val == rhs.val);
+#else
+    return cast_from_wrapper<float>(*this) == cast_from_wrapper<float>(rhs);
+#endif
+  }
+
+  KOKKOS_FUNCTION
+  bool operator!=(floating_point_wrapper rhs) const {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    return static_cast<bool>(val != rhs.val);
+#else
+    return cast_from_wrapper<float>(*this) != cast_from_wrapper<float>(rhs);
+#endif
+  }
+
+  KOKKOS_FUNCTION
+  bool operator<(floating_point_wrapper rhs) const {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    return static_cast<bool>(val < rhs.val);
+#else
+    return cast_from_wrapper<float>(*this) < cast_from_wrapper<float>(rhs);
+#endif
+  }
+
+  KOKKOS_FUNCTION
+  bool operator>(floating_point_wrapper rhs) const {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    return static_cast<bool>(val > rhs.val);
+#else
+    return cast_from_wrapper<float>(*this) > cast_from_wrapper<float>(rhs);
+#endif
+  }
+
+  KOKKOS_FUNCTION
+  bool operator<=(floating_point_wrapper rhs) const {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    return static_cast<bool>(val <= rhs.val);
+#else
+    return cast_from_wrapper<float>(*this) <= cast_from_wrapper<float>(rhs);
+#endif
+  }
+
+  KOKKOS_FUNCTION
+  bool operator>=(floating_point_wrapper rhs) const {
+#ifdef KOKKOS_HALF_IS_FULL_TYPE_ON_ARCH
+    return static_cast<bool>(val >= rhs.val);
+#else
+    return cast_from_wrapper<float>(*this) >= cast_from_wrapper<float>(rhs);
+#endif
+  }
+
+  KOKKOS_FUNCTION
+  friend bool operator==(const volatile floating_point_wrapper& lhs,
+                         const volatile floating_point_wrapper& rhs) {
+    floating_point_wrapper tmp_lhs = lhs, tmp_rhs = rhs;
+    return tmp_lhs == tmp_rhs;
+  }
+
+  KOKKOS_FUNCTION
+  friend bool operator!=(const volatile floating_point_wrapper& lhs,
+                         const volatile floating_point_wrapper& rhs) {
+    floating_point_wrapper tmp_lhs = lhs, tmp_rhs = rhs;
+    return tmp_lhs != tmp_rhs;
+  }
+
+  KOKKOS_FUNCTION
+  friend bool operator<(const volatile floating_point_wrapper& lhs,
+                        const volatile floating_point_wrapper& rhs) {
+    floating_point_wrapper tmp_lhs = lhs, tmp_rhs = rhs;
+    return tmp_lhs < tmp_rhs;
+  }
+
+  KOKKOS_FUNCTION
+  friend bool operator>(const volatile floating_point_wrapper& lhs,
+                        const volatile floating_point_wrapper& rhs) {
+    floating_point_wrapper tmp_lhs = lhs, tmp_rhs = rhs;
+    return tmp_lhs > tmp_rhs;
+  }
+
+  KOKKOS_FUNCTION
+  friend bool operator<=(const volatile floating_point_wrapper& lhs,
+                         const volatile floating_point_wrapper& rhs) {
+    floating_point_wrapper tmp_lhs = lhs, tmp_rhs = rhs;
+    return tmp_lhs <= tmp_rhs;
+  }
+
+  KOKKOS_FUNCTION
+  friend bool operator>=(const volatile floating_point_wrapper& lhs,
+                         const volatile floating_point_wrapper& rhs) {
+    floating_point_wrapper tmp_lhs = lhs, tmp_rhs = rhs;
+    return tmp_lhs >= tmp_rhs;
+  }
+
+  // Insertion and extraction operators
+  friend std::ostream& operator<<(std::ostream& os,
+                                  const floating_point_wrapper& x) {
+    const std::string out = std::to_string(static_cast<double>(x));
+    os << out;
+    return os;
+  }
+
+  friend std::istream& operator>>(std::istream& is, floating_point_wrapper& x) {
+    std::string in;
+    is >> in;
+    x = std::stod(in);
+    return is;
+  }
+};
+}  // namespace Impl
+
+// Declare wrapper overloads now that floating_point_wrapper is declared
+template <class T>
+static KOKKOS_INLINE_FUNCTION Kokkos::Experimental::half_t cast_to_wrapper(
+    T x, const volatile Kokkos::Impl::half_impl_t::type&) {
+  return Kokkos::Experimental::cast_to_half(x);
+}
+
+#ifdef KOKKOS_IMPL_BHALF_TYPE_DEFINED
+template <class T>
+static KOKKOS_INLINE_FUNCTION Kokkos::Experimental::bhalf_t cast_to_wrapper(
+    T x, const volatile Kokkos::Impl::bhalf_impl_t::type&) {
+  return Kokkos::Experimental::cast_to_bhalf(x);
+}
+#endif  // KOKKOS_IMPL_BHALF_TYPE_DEFINED
+
+template <class T>
+static KOKKOS_INLINE_FUNCTION T
+cast_from_wrapper(const Kokkos::Experimental::half_t& x) {
+  return Kokkos::Experimental::cast_from_half<T>(x);
+}
+
+#ifdef KOKKOS_IMPL_BHALF_TYPE_DEFINED
+template <class T>
+static KOKKOS_INLINE_FUNCTION T
+cast_from_wrapper(const Kokkos::Experimental::bhalf_t& x) {
+  return Kokkos::Experimental::cast_from_bhalf<T>(x);
+}
+#endif  // KOKKOS_IMPL_BHALF_TYPE_DEFINED
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif  // KOKKOS_IMPL_HALF_TYPE_DEFINED
 
 // If none of the above actually did anything and defined a half precision type
 // define a fallback implementation here using float
@@ -116,4 +971,62 @@ cast_from_half(half_t val) {
 #else
 #define KOKKOS_HALF_T_IS_FLOAT false
 #endif  // KOKKOS_IMPL_HALF_TYPE_DEFINED
+
+#ifndef KOKKOS_IMPL_BHALF_TYPE_DEFINED
+#define KOKKOS_IMPL_BHALF_TYPE_DEFINED
+#define KOKKOS_BHALF_T_IS_FLOAT true
+namespace Kokkos {
+namespace Impl {
+struct bhalf_impl_t {
+  using type = float;
+};
+}  // namespace Impl
+
+namespace Experimental {
+
+using bhalf_t = Kokkos::Impl::bhalf_impl_t::type;
+
+// cast_to_bhalf
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(float val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(bool val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(double val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(short val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned short val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(int val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned int val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(long val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned long val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(long long val) { return bhalf_t(val); }
+KOKKOS_INLINE_FUNCTION
+bhalf_t cast_to_bhalf(unsigned long long val) { return bhalf_t(val); }
+
+// cast_from_bhalf
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<
+    std::is_same<T, float>::value || std::is_same<T, bool>::value ||
+        std::is_same<T, double>::value || std::is_same<T, short>::value ||
+        std::is_same<T, unsigned short>::value || std::is_same<T, int>::value ||
+        std::is_same<T, unsigned int>::value || std::is_same<T, long>::value ||
+        std::is_same<T, unsigned long>::value ||
+        std::is_same<T, long long>::value ||
+        std::is_same<T, unsigned long long>::value,
+    T>
+cast_from_bhalf(bhalf_t val) {
+  return T(val);
+}
+}  // namespace Experimental
+}  // namespace Kokkos
+#else
+#define KOKKOS_BHALF_T_IS_FLOAT false
+#endif  // KOKKOS_IMPL_BHALF_TYPE_DEFINED
 #endif  // KOKKOS_HALF_HPP_
diff --git a/packages/kokkos/core/src/Kokkos_HostSpace.hpp b/packages/kokkos/core/src/Kokkos_HostSpace.hpp
index c96cf5fbbe1b3a07f75da83a2557d2bbe4cb38c0..034d31fca0978e23420895d83b72cb576d6ba75e 100644
--- a/packages/kokkos/core/src/Kokkos_HostSpace.hpp
+++ b/packages/kokkos/core/src/Kokkos_HostSpace.hpp
@@ -112,24 +112,7 @@ class HostSpace {
   /// Every memory space has a default execution space.  This is
   /// useful for things like initializing a View (which happens in
   /// parallel using the View's default execution space).
-#if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP)
-  using execution_space = Kokkos::OpenMP;
-#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS)
-  using execution_space = Kokkos::Threads;
-#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX)
-  using execution_space = Kokkos::Experimental::HPX;
-#elif defined(KOKKOS_ENABLE_OPENMP)
-  using execution_space = Kokkos::OpenMP;
-#elif defined(KOKKOS_ENABLE_THREADS)
-  using execution_space = Kokkos::Threads;
-#elif defined(KOKKOS_ENABLE_HPX)
-  using execution_space = Kokkos::Experimental::HPX;
-#elif defined(KOKKOS_ENABLE_SERIAL)
-  using execution_space = Kokkos::Serial;
-#else
-#error \
-    "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Threads, or Kokkos::Serial.  You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
-#endif
+  using execution_space = DefaultHostExecutionSpace;
 
   //! This memory space preferred device_type
   using device_type = Kokkos::Device<execution_space, memory_space>;
@@ -278,14 +261,10 @@ class SharedAllocationRecord<Kokkos::HostSpace, void>
   KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
       const Kokkos::HostSpace& arg_space, const std::string& arg_label,
       const size_t arg_alloc_size) {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size);
-#else
-    (void)arg_space;
-    (void)arg_label;
-    (void)arg_alloc_size;
-    return (SharedAllocationRecord*)0;
-#endif
+    KOKKOS_IF_ON_HOST((return new SharedAllocationRecord(arg_space, arg_label,
+                                                         arg_alloc_size);))
+    KOKKOS_IF_ON_DEVICE(((void)arg_space; (void)arg_label; (void)arg_alloc_size;
+                         return nullptr;))
   }
 };
 
@@ -313,6 +292,18 @@ struct ZeroMemset<typename HostSpace::execution_space, DT, DP...> {
   }
 };
 
+template <>
+struct DeepCopy<HostSpace, HostSpace, DefaultHostExecutionSpace> {
+  DeepCopy(void* dst, const void* src, size_t n) {
+    hostspace_parallel_deepcopy(dst, src, n);
+  }
+
+  DeepCopy(const DefaultHostExecutionSpace& exec, void* dst, const void* src,
+           size_t n) {
+    hostspace_parallel_deepcopy_async(exec, dst, src, n);
+  }
+};
+
 template <class ExecutionSpace>
 struct DeepCopy<HostSpace, HostSpace, ExecutionSpace> {
   DeepCopy(void* dst, const void* src, size_t n) {
@@ -323,10 +314,7 @@ struct DeepCopy<HostSpace, HostSpace, ExecutionSpace> {
     exec.fence(
         "Kokkos::Impl::DeepCopy<HostSpace, HostSpace, "
         "ExecutionSpace>::DeepCopy: fence before copy");
-    hostspace_parallel_deepcopy(dst, src, n);
-    exec.fence(
-        "Kokkos::Impl::DeepCopy<HostSpace, HostSpace, "
-        "ExecutionSpace>::DeepCopy: fence after copy");
+    hostspace_parallel_deepcopy_async(dst, src, n);
   }
 };
 
diff --git a/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp b/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp
index caa41b79b096dd2e7f2697f164d2cc3819834fc2..6dcbe27900c8905b6810dec67e22b9c55b22544e 100644
--- a/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp
+++ b/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp
@@ -257,7 +257,8 @@ class SharedAllocationRecord<Kokkos::Experimental::LogicalMemorySpace<
 #endif
             Impl::checked_allocation_with_header(arg_space, arg_label,
                                                  arg_alloc_size),
-            sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+            sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+            arg_label),
         m_space(arg_space) {
     // Fill in the Header information
     RecordBase::m_alloc_ptr->m_record =
@@ -277,14 +278,10 @@ class SharedAllocationRecord<Kokkos::Experimental::LogicalMemorySpace<
   KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
       const SpaceType& arg_space, const std::string& arg_label,
       const size_t arg_alloc_size) {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size);
-#else
-    (void)arg_space;
-    (void)arg_label;
-    (void)arg_alloc_size;
-    return (SharedAllocationRecord*)nullptr;
-#endif
+    KOKKOS_IF_ON_HOST((return new SharedAllocationRecord(arg_space, arg_label,
+                                                         arg_alloc_size);))
+    KOKKOS_IF_ON_DEVICE(((void)arg_space; (void)arg_label; (void)arg_alloc_size;
+                         return nullptr;))
   }
 
   /**\brief  Allocate tracked memory in the space */
@@ -310,6 +307,9 @@ class SharedAllocationRecord<Kokkos::Experimental::LogicalMemorySpace<
 
     Kokkos::Impl::DeepCopy<SpaceType, SpaceType>(
         r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size()));
+    Kokkos::fence(
+        "SharedAllocationRecord<Kokkos::Experimental::LogicalMemorySpace, "
+        "void>::reallocate_tracked: fence after copying data");
 
     RecordBase::increment(r_new);
     RecordBase::decrement(r_old);
diff --git a/packages/kokkos/core/src/Kokkos_Macros.hpp b/packages/kokkos/core/src/Kokkos_Macros.hpp
index 8d0fd925a27070dcd97160ba25ba19f06d3842b2..8c2d414a99603730b10e13c8d6b94c8e2392c543 100644
--- a/packages/kokkos/core/src/Kokkos_Macros.hpp
+++ b/packages/kokkos/core/src/Kokkos_Macros.hpp
@@ -80,7 +80,7 @@
  *  KOKKOS_COMPILER_PGI
  *  KOKKOS_COMPILER_MSVC
  *
- *  Macros for which compiler extension to use for atomics on intrinsice types
+ *  Macros for which compiler extension to use for atomics on intrinsic types
  *
  *  KOKKOS_ENABLE_CUDA_ATOMICS
  *  KOKKOS_ENABLE_GNU_ATOMICS
@@ -187,6 +187,12 @@
 #endif
 #endif
 
+#if defined(__NVCOMPILER)
+#define KOKKOS_COMPILER_NVHPC                              \
+  __NVCOMPILER_MAJOR__ * 100 + __NVCOMPILER_MINOR__ * 10 + \
+      __NVCOMPILER_PATCHLEVEL__
+#endif
+
 #if defined(_MSC_VER) && !defined(KOKKOS_COMPILER_INTEL)
 #define KOKKOS_COMPILER_MSVC _MSC_VER
 #endif
@@ -198,6 +204,16 @@
 //  of the supported OpenMP API version.
 #endif  // #if defined( _OPENMP )
 
+#if defined(KOKKOS_ENABLE_CXX17)
+#define KOKKOS_IMPL_FALLTHROUGH [[fallthrough]];
+#elif defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU >= 710)
+#define KOKKOS_IMPL_FALLTHROUGH [[gnu::fallthrough]];
+#elif defined(KOKKOS_COMPILER_CLANG)
+#define KOKKOS_IMPL_FALLTHROUGH [[clang::fallthrough]];
+#else
+#define KOKKOS_IMPL_FALLTHROUGH
+#endif
+
 //----------------------------------------------------------------------------
 // Intel compiler macros
 
@@ -471,11 +487,6 @@
 #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
 #elif defined(KOKKOS_ENABLE_HIP)
 #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HIP
-#if defined(__HIP__)
-// mark that HIP-clang can use __host__ and __device__
-// as valid overload criteria
-#define KOKKOS_IMPL_ENABLE_OVERLOAD_HOST_DEVICE
-#endif
 #elif defined(KOKKOS_ENABLE_SYCL)
 #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SYCL
 #elif defined(KOKKOS_ENABLE_OPENMPTARGET)
@@ -506,6 +517,69 @@
 
 //----------------------------------------------------------------------------
 
+// Remove surrounding parentheses if present
+#define KOKKOS_IMPL_STRIP_PARENS(X) KOKKOS_IMPL_ESC(KOKKOS_IMPL_ISH X)
+#define KOKKOS_IMPL_ISH(...) KOKKOS_IMPL_ISH __VA_ARGS__
+#define KOKKOS_IMPL_ESC(...) KOKKOS_IMPL_ESC_(__VA_ARGS__)
+#define KOKKOS_IMPL_ESC_(...) KOKKOS_IMPL_VAN_##__VA_ARGS__
+#define KOKKOS_IMPL_VAN_KOKKOS_IMPL_ISH
+
+#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_NVHPC)
+#include <nv/target>
+#define KOKKOS_IF_ON_DEVICE(CODE) NV_IF_TARGET(NV_IS_DEVICE, CODE)
+#define KOKKOS_IF_ON_HOST(CODE) NV_IF_TARGET(NV_IS_HOST, CODE)
+#endif
+
+#ifdef KOKKOS_ENABLE_OPENMPTARGET
+#ifdef KOKKOS_COMPILER_NVHPC
+#define KOKKOS_IF_ON_DEVICE(CODE)   \
+  if (__builtin_is_device_code()) { \
+    KOKKOS_IMPL_STRIP_PARENS(CODE)  \
+  }
+#define KOKKOS_IF_ON_HOST(CODE)      \
+  if (!__builtin_is_device_code()) { \
+    KOKKOS_IMPL_STRIP_PARENS(CODE)   \
+  }
+#else
+// Base function.
+static constexpr bool kokkos_omp_on_host() { return true; }
+
+#pragma omp begin declare variant match(device = {kind(host)})
+static constexpr bool kokkos_omp_on_host() { return true; }
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {kind(nohost)})
+static constexpr bool kokkos_omp_on_host() { return false; }
+#pragma omp end declare variant
+
+#define KOKKOS_IF_ON_DEVICE(CODE)        \
+  if constexpr (!kokkos_omp_on_host()) { \
+    KOKKOS_IMPL_STRIP_PARENS(CODE)       \
+  }
+#define KOKKOS_IF_ON_HOST(CODE)         \
+  if constexpr (kokkos_omp_on_host()) { \
+    KOKKOS_IMPL_STRIP_PARENS(CODE)      \
+  }
+#endif
+#endif
+
+#if !defined(KOKKOS_IF_ON_HOST) && !defined(KOKKOS_IF_ON_DEVICE)
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || \
+    defined(__SYCL_DEVICE_ONLY__)
+#define KOKKOS_IF_ON_DEVICE(CODE) \
+  { KOKKOS_IMPL_STRIP_PARENS(CODE) }
+#define KOKKOS_IF_ON_HOST(CODE) \
+  {}
+#else
+#define KOKKOS_IF_ON_DEVICE(CODE) \
+  {}
+#define KOKKOS_IF_ON_HOST(CODE) \
+  { KOKKOS_IMPL_STRIP_PARENS(CODE) }
+#endif
+#endif
+
+//----------------------------------------------------------------------------
+
 #if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) || \
     (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE >= 600)
 #if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN)
@@ -514,8 +588,8 @@
 #endif
 
 //----------------------------------------------------------------------------
-// If compiling with CUDA, we must use relocateable device code
-// to enable the task policy.
+// If compiling with CUDA, we must use relocatable device code to enable the
+// task policy.
 
 #if defined(KOKKOS_ENABLE_CUDA)
 #if defined(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
@@ -539,7 +613,7 @@
 // intel error #2651: attribute does not apply to any entity
 // using <deprecated_type> KOKKOS_DEPRECATED = ...
 #if defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS) && !defined(__NVCC__) && \
-    (KOKKOS_COMPILER_INTEL > 1900)
+    (!defined(KOKKOS_COMPILER_INTEL) || KOKKOS_COMPILER_INTEL > 1900)
 #define KOKKOS_DEPRECATED [[deprecated]]
 #define KOKKOS_DEPRECATED_WITH_COMMENT(comment) [[deprecated(comment)]]
 #else
diff --git a/packages/kokkos/core/src/Kokkos_MathematicalConstants.hpp b/packages/kokkos/core/src/Kokkos_MathematicalConstants.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c6b8c08dc962bc1c1f848be5ec0149c29736771d
--- /dev/null
+++ b/packages/kokkos/core/src/Kokkos_MathematicalConstants.hpp
@@ -0,0 +1,85 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_MATHEMATICAL_CONSTANTS_HPP
+#define KOKKOS_MATHEMATICAL_CONSTANTS_HPP
+
+#include <Kokkos_Macros.hpp>
+#include <type_traits>
+
+namespace Kokkos {
+namespace Experimental {
+
+#if defined(KOKKOS_ENABLE_CXX17)
+#define KOKKOS_IMPL_MATH_CONSTANT(TRAIT, VALUE) \
+  template <class T>                            \
+  inline constexpr auto TRAIT##_v =             \
+      std::enable_if_t<std::is_floating_point_v<T>, T>(VALUE)
+#else
+#define KOKKOS_IMPL_MATH_CONSTANT(TRAIT, VALUE) \
+  template <class T>                            \
+  constexpr auto TRAIT##_v =                    \
+      std::enable_if_t<std::is_floating_point<T>::value, T>(VALUE)
+#endif
+
+// clang-format off
+KOKKOS_IMPL_MATH_CONSTANT(e,          2.718281828459045235360287471352662498L);
+KOKKOS_IMPL_MATH_CONSTANT(log2e,      1.442695040888963407359924681001892137L);
+KOKKOS_IMPL_MATH_CONSTANT(log10e,     0.434294481903251827651128918916605082L);
+KOKKOS_IMPL_MATH_CONSTANT(pi,         3.141592653589793238462643383279502884L);
+KOKKOS_IMPL_MATH_CONSTANT(inv_pi,     0.318309886183790671537767526745028724L);
+KOKKOS_IMPL_MATH_CONSTANT(inv_sqrtpi, 0.564189583547756286948079451560772586L);
+KOKKOS_IMPL_MATH_CONSTANT(ln2,        0.693147180559945309417232121458176568L);
+KOKKOS_IMPL_MATH_CONSTANT(ln10,       2.302585092994045684017991454684364208L);
+KOKKOS_IMPL_MATH_CONSTANT(sqrt2,      1.414213562373095048801688724209698079L);
+KOKKOS_IMPL_MATH_CONSTANT(sqrt3,      1.732050807568877293527446341505872367L);
+KOKKOS_IMPL_MATH_CONSTANT(inv_sqrt3,  0.577350269189625764509148780501957456L);
+KOKKOS_IMPL_MATH_CONSTANT(egamma,     0.577215664901532860606512090082402431L);
+KOKKOS_IMPL_MATH_CONSTANT(phi,        1.618033988749894848204586834365638118L);
+// clang-format on
+
+#undef KOKKOS_IMPL_MATH_CONSTANT
+
+}  // namespace Experimental
+}  // namespace Kokkos
+#endif
diff --git a/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp b/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp
index 50fde82d77a7c37dfa0d5f3d1a565df470f680e0..6ee8d7745711141373a6ed74999b41bb2798ecef 100644
--- a/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp
+++ b/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp
@@ -47,7 +47,6 @@
 
 #include <Kokkos_Macros.hpp>
 #include <cmath>
-#include <algorithm>
 #include <type_traits>
 
 #ifdef KOKKOS_ENABLE_SYCL
@@ -87,21 +86,19 @@ using promote_2_t = typename promote_2<T, U>::type;
 
 namespace Experimental {
 
+// NOTE long double overloads are not available on the device
+
 #if defined(KOKKOS_ENABLE_SYCL)
 #define KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE sycl
 #else
+#if defined(KOKKOS_COMPILER_NVCC) && defined(__GNUC__) && (__GNUC__ < 6) && \
+    !defined(__clang__)
+#define KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE
+#else
 #define KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE std
 #endif
-
-// NOTE long double overloads are not available on the device
-#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \
-    defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET)
-#else
-#define KOKKOS_IMPL_MATH_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS
 #endif
 
-#if defined(KOKKOS_IMPL_MATH_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS)
-
 #define KOKKOS_IMPL_MATH_UNARY_FUNCTION(FUNC)                                 \
   KOKKOS_INLINE_FUNCTION float FUNC(float x) {                                \
     using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                         \
@@ -111,16 +108,16 @@ namespace Experimental {
     using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                         \
     return FUNC(x);                                                           \
   }                                                                           \
-  KOKKOS_INLINE_FUNCTION long double FUNC(long double x) {                    \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                         \
+  inline long double FUNC(long double x) {                                    \
+    using std::FUNC;                                                          \
     return FUNC(x);                                                           \
   }                                                                           \
   KOKKOS_INLINE_FUNCTION float FUNC##f(float x) {                             \
     using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                         \
     return FUNC(x);                                                           \
   }                                                                           \
-  KOKKOS_INLINE_FUNCTION long double FUNC##l(long double x) {                 \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                         \
+  inline long double FUNC##l(long double x) {                                 \
+    using std::FUNC;                                                          \
     return FUNC(x);                                                           \
   }                                                                           \
   template <class T>                                                          \
@@ -130,79 +127,23 @@ namespace Experimental {
     return FUNC(static_cast<double>(x));                                      \
   }
 
+// isinf, isnan, and isinfinite do not work on Windows with CUDA with std::
+// getting warnings about calling host function in device function then
+// runtime test fails
+#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA)
 #define KOKKOS_IMPL_MATH_UNARY_PREDICATE(FUNC)                              \
-  KOKKOS_INLINE_FUNCTION bool FUNC(float x) {                               \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                       \
-    return FUNC(x);                                                         \
-  }                                                                         \
-  KOKKOS_INLINE_FUNCTION bool FUNC(double x) {                              \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                       \
-    return FUNC(x);                                                         \
-  }                                                                         \
-  KOKKOS_INLINE_FUNCTION bool FUNC(long double x) {                         \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                       \
+  KOKKOS_INLINE_FUNCTION bool FUNC(float x) { return ::FUNC(x); }           \
+  KOKKOS_INLINE_FUNCTION bool FUNC(double x) { return ::FUNC(x); }          \
+  inline bool FUNC(long double x) {                                         \
+    using std::FUNC;                                                        \
     return FUNC(x);                                                         \
   }                                                                         \
   template <class T>                                                        \
   KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_integral<T>::value, bool> \
   FUNC(T x) {                                                               \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                       \
-    return FUNC(static_cast<double>(x));                                    \
-  }
-
-#define KOKKOS_IMPL_MATH_BINARY_FUNCTION(FUNC)                               \
-  KOKKOS_INLINE_FUNCTION float FUNC(float x, float y) {                      \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                        \
-    return FUNC(x, y);                                                       \
-  }                                                                          \
-  KOKKOS_INLINE_FUNCTION double FUNC(double x, double y) {                   \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                        \
-    return FUNC(x, y);                                                       \
-  }                                                                          \
-  KOKKOS_INLINE_FUNCTION long double FUNC(long double x, long double y) {    \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                        \
-    return FUNC(x, y);                                                       \
-  }                                                                          \
-  KOKKOS_INLINE_FUNCTION float FUNC##f(float x, float y) {                   \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                        \
-    return FUNC(x, y);                                                       \
-  }                                                                          \
-  KOKKOS_INLINE_FUNCTION long double FUNC##l(long double x, long double y) { \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                        \
-    return FUNC(x, y);                                                       \
-  }                                                                          \
-  template <class T1, class T2>                                              \
-  KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_arithmetic<T1>::value &&   \
-                                              std::is_arithmetic<T2>::value, \
-                                          Kokkos::Impl::promote_2_t<T1, T2>> \
-  FUNC(T1 x, T2 y) {                                                         \
-    using Promoted = Kokkos::Impl::promote_2_t<T1, T2>;                      \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                        \
-    return FUNC(static_cast<Promoted>(x), static_cast<Promoted>(y));         \
-  }
-
-#else  // long double overloads are not available
-
-#define KOKKOS_IMPL_MATH_UNARY_FUNCTION(FUNC)                                 \
-  KOKKOS_INLINE_FUNCTION float FUNC(float x) {                                \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                         \
-    return FUNC(x);                                                           \
-  }                                                                           \
-  KOKKOS_INLINE_FUNCTION double FUNC(double x) {                              \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                         \
-    return FUNC(x);                                                           \
-  }                                                                           \
-  KOKKOS_INLINE_FUNCTION float FUNC##f(float x) {                             \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                         \
-    return FUNC(x);                                                           \
-  }                                                                           \
-  template <class T>                                                          \
-  KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_integral<T>::value, double> \
-  FUNC(T x) {                                                                 \
-    using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                         \
-    return FUNC(static_cast<double>(x));                                      \
+    return ::FUNC(static_cast<double>(x));                                  \
   }
-
+#else
 #define KOKKOS_IMPL_MATH_UNARY_PREDICATE(FUNC)                              \
   KOKKOS_INLINE_FUNCTION bool FUNC(float x) {                               \
     using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                       \
@@ -212,12 +153,17 @@ namespace Experimental {
     using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                       \
     return FUNC(x);                                                         \
   }                                                                         \
+  inline bool FUNC(long double x) {                                         \
+    using std::FUNC;                                                        \
+    return FUNC(x);                                                         \
+  }                                                                         \
   template <class T>                                                        \
   KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_integral<T>::value, bool> \
   FUNC(T x) {                                                               \
     using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                       \
     return FUNC(static_cast<double>(x));                                    \
   }
+#endif
 
 #define KOKKOS_IMPL_MATH_BINARY_FUNCTION(FUNC)                          \
   KOKKOS_INLINE_FUNCTION float FUNC(float x, float y) {                 \
@@ -228,10 +174,18 @@ namespace Experimental {
     using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                   \
     return FUNC(x, y);                                                  \
   }                                                                     \
+  inline long double FUNC(long double x, long double y) {               \
+    using std::FUNC;                                                    \
+    return FUNC(x, y);                                                  \
+  }                                                                     \
   KOKKOS_INLINE_FUNCTION float FUNC##f(float x, float y) {              \
     using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                   \
     return FUNC(x, y);                                                  \
   }                                                                     \
+  inline long double FUNC##l(long double x, long double y) {            \
+    using std::FUNC;                                                    \
+    return FUNC(x, y);                                                  \
+  }                                                                     \
   template <class T1, class T2>                                         \
   KOKKOS_INLINE_FUNCTION std::enable_if_t<                              \
       std::is_arithmetic<T1>::value && std::is_arithmetic<T2>::value && \
@@ -242,10 +196,20 @@ namespace Experimental {
     using Promoted = Kokkos::Impl::promote_2_t<T1, T2>;                 \
     using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC;                   \
     return FUNC(static_cast<Promoted>(x), static_cast<Promoted>(y));    \
+  }                                                                     \
+  template <class T1, class T2>                                         \
+  inline std::enable_if_t<std::is_arithmetic<T1>::value &&              \
+                              std::is_arithmetic<T2>::value &&          \
+                              (std::is_same<T1, long double>::value ||  \
+                               std::is_same<T2, long double>::value),   \
+                          long double>                                  \
+  FUNC(T1 x, T2 y) {                                                    \
+    using Promoted = Kokkos::Impl::promote_2_t<T1, T2>;                 \
+    static_assert(std::is_same<Promoted, long double>::value, "");      \
+    using std::FUNC;                                                    \
+    return FUNC(static_cast<Promoted>(x), static_cast<Promoted>(y));    \
   }
 
-#endif
-
 // Basic operations
 KOKKOS_INLINE_FUNCTION int abs(int n) {
   using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs;
@@ -267,12 +231,10 @@ KOKKOS_INLINE_FUNCTION double abs(double x) {
   using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs;
   return abs(x);
 }
-#if defined(KOKKOS_IMPL_MATH_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS)
-KOKKOS_INLINE_FUNCTION long double abs(long double x) {
-  using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs;
+inline long double abs(long double x) {
+  using std::abs;
   return abs(x);
 }
-#endif
 KOKKOS_IMPL_MATH_UNARY_FUNCTION(fabs)
 KOKKOS_IMPL_MATH_BINARY_FUNCTION(fmod)
 KOKKOS_IMPL_MATH_BINARY_FUNCTION(remainder)
@@ -282,9 +244,6 @@ KOKKOS_IMPL_MATH_BINARY_FUNCTION(fdim)
 #ifndef KOKKOS_ENABLE_SYCL
 KOKKOS_INLINE_FUNCTION float nanf(char const* arg) { return ::nanf(arg); }
 KOKKOS_INLINE_FUNCTION double nan(char const* arg) { return ::nan(arg); }
-#if defined(KOKKOS_IMPL_MATH_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS)
-KOKKOS_INLINE_FUNCTION long double nanl(char const* arg) { return ::nanl(arg); }
-#endif
 #else
 // FIXME_SYCL
 // sycl::nan does not follow the C/C++ standard library and takes an unsigned
@@ -293,6 +252,7 @@ KOKKOS_INLINE_FUNCTION long double nanl(char const* arg) { return ::nanl(arg); }
 KOKKOS_INLINE_FUNCTION float nanf(char const*) { return sycl::nan(0u); }
 KOKKOS_INLINE_FUNCTION double nan(char const*) { return sycl::nan(0ul); }
 #endif
+inline long double nanl(char const* arg) { return ::nanl(arg); }
 // Power functions
 KOKKOS_IMPL_MATH_BINARY_FUNCTION(pow)
 KOKKOS_IMPL_MATH_UNARY_FUNCTION(sqrt)
@@ -340,7 +300,6 @@ KOKKOS_IMPL_MATH_UNARY_PREDICATE(isinf)
 KOKKOS_IMPL_MATH_UNARY_PREDICATE(isnan)
 
 #undef KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE
-#undef KOKKOS_IMPL_MATH_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS
 #undef KOKKOS_IMPL_MATH_UNARY_FUNCTION
 #undef KOKKOS_IMPL_MATH_UNARY_PREDICATE
 #undef KOKKOS_IMPL_MATH_BINARY_FUNCTION
diff --git a/packages/kokkos/core/src/Kokkos_MathematicalSpecialFunctions.hpp b/packages/kokkos/core/src/Kokkos_MathematicalSpecialFunctions.hpp
index 7bcea91c86790dd52265addb1ca651adbe21a966..03c491c36d8c6843b24a784983a9401b4ec80dae 100644
--- a/packages/kokkos/core/src/Kokkos_MathematicalSpecialFunctions.hpp
+++ b/packages/kokkos/core/src/Kokkos_MathematicalSpecialFunctions.hpp
@@ -49,14 +49,11 @@
 #include <cmath>
 #include <algorithm>
 #include <type_traits>
+#include <Kokkos_MathematicalConstants.hpp>
 #include <Kokkos_MathematicalFunctions.hpp>
 #include <Kokkos_NumericTraits.hpp>
 #include <Kokkos_Complex.hpp>
 
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
 namespace Kokkos {
 namespace Experimental {
 
@@ -128,15 +125,15 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> erf(
 
   using CmplxType = Kokkos::complex<RealType>;
 
-  auto const inf = infinity<RealType>::value;
-  auto const tol = epsilon<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
+  constexpr auto tol = epsilon<RealType>::value;
 
   const RealType fnorm = 1.12837916709551;
   const RealType gnorm = 0.564189583547756;
   const RealType eh    = 0.606530659712633;
   const RealType ef    = 0.778800783071405;
   // const RealType tol   = 1.0e-13;
-  const RealType pi = M_PI;
+  constexpr auto pi = Kokkos::Experimental::pi_v<RealType>;
 
   CmplxType cans;
 
@@ -306,15 +303,15 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> erfcx(
 
   using CmplxType = Kokkos::complex<RealType>;
 
-  auto const inf = infinity<RealType>::value;
-  auto const tol = epsilon<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
+  constexpr auto tol = epsilon<RealType>::value;
 
   const RealType fnorm = 1.12837916709551;
-  const RealType gnorm = 0.564189583547756;
+  constexpr auto gnorm = Kokkos::Experimental::inv_sqrtpi_v<RealType>;
   const RealType eh    = 0.606530659712633;
   const RealType ef    = 0.778800783071405;
   // const RealType tol   = 1.0e-13;
-  const RealType pi = M_PI;
+  constexpr auto pi = Kokkos::Experimental::pi_v<RealType>;
 
   CmplxType cans;
 
@@ -493,7 +490,7 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_j0(const CmplxType& z,
   using Kokkos::Experimental::pow;
 
   CmplxType cbj0;
-  const RealType pi    = M_PI;
+  constexpr auto pi    = Kokkos::Experimental::pi_v<RealType>;
   const RealType a[12] = {
       -0.703125e-01,           0.112152099609375e+00,   -0.5725014209747314e+00,
       0.6074042001273483e+01,  -0.1100171402692467e+03, 0.3038090510922384e+04,
@@ -581,11 +578,11 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_y0(const CmplxType& z,
   using Kokkos::Experimental::infinity;
   using Kokkos::Experimental::pow;
 
-  auto const inf = infinity<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
 
   CmplxType cby0, cbj0;
-  const RealType pi    = M_PI;
-  const RealType el    = 0.57721566490153286060651209008240;
+  constexpr auto pi    = Kokkos::Experimental::pi_v<RealType>;
+  constexpr auto el    = Kokkos::Experimental::egamma_v<RealType>;
   const RealType a[12] = {
       -0.703125e-01,           0.112152099609375e+00,   -0.5725014209747314e+00,
       0.6074042001273483e+01,  -0.1100171402692467e+03, 0.3038090510922384e+04,
@@ -682,7 +679,7 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_j1(const CmplxType& z,
   using Kokkos::Experimental::pow;
 
   CmplxType cbj1;
-  const RealType pi     = M_PI;
+  constexpr auto pi     = Kokkos::Experimental::pi_v<RealType>;
   const RealType a1[12] = {0.1171875e+00,          -0.144195556640625e+00,
                            0.6765925884246826e+00, -0.6883914268109947e+01,
                            0.1215978918765359e+03, -0.3302272294480852e+04,
@@ -774,11 +771,11 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_y1(const CmplxType& z,
   using Kokkos::Experimental::infinity;
   using Kokkos::Experimental::pow;
 
-  auto const inf = infinity<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
 
   CmplxType cby1, cbj0, cbj1, cby0;
-  const RealType pi     = M_PI;
-  const RealType el     = 0.57721566490153286060651209008240;
+  constexpr auto pi     = Kokkos::Experimental::pi_v<RealType>;
+  constexpr auto el     = Kokkos::Experimental::egamma_v<RealType>;
   const RealType a1[12] = {0.1171875e+00,          -0.144195556640625e+00,
                            0.6765925884246826e+00, -0.6883914268109947e+01,
                            0.1215978918765359e+03, -0.3302272294480852e+04,
@@ -875,7 +872,7 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_i0(const CmplxType& z,
   //             bw_start  --- Starting point for backward recurrence
   //    Output:  cbi0      --- I0(z)
   CmplxType cbi0;
-  const RealType pi    = M_PI;
+  constexpr auto pi    = Kokkos::Experimental::pi_v<RealType>;
   const RealType a[12] = {0.125,
                           7.03125e-2,
                           7.32421875e-2,
@@ -949,11 +946,11 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_k0(const CmplxType& z,
   using Kokkos::Experimental::infinity;
   using Kokkos::Experimental::pow;
 
-  auto const inf = infinity<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
 
   CmplxType cbk0, cbi0;
-  const RealType pi = M_PI;
-  const RealType el = 0.57721566490153286060651209008240;
+  constexpr auto pi = Kokkos::Experimental::pi_v<RealType>;
+  constexpr auto el = Kokkos::Experimental::egamma_v<RealType>;
 
   RealType a0  = Kokkos::abs(z);
   CmplxType ci = CmplxType(0.0, 1.0);
@@ -1020,7 +1017,7 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_i1(const CmplxType& z,
   //             bw_start  --- Starting point for backward recurrence
   //    Output:  cbi1      --- I1(z)
   CmplxType cbi1;
-  const RealType pi    = M_PI;
+  constexpr auto pi    = Kokkos::Experimental::pi_v<RealType>;
   const RealType b[12] = {-0.375,
                           -1.171875e-1,
                           -1.025390625e-1,
@@ -1095,11 +1092,11 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_k1(const CmplxType& z,
   using Kokkos::Experimental::infinity;
   using Kokkos::Experimental::pow;
 
-  auto const inf = infinity<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
 
   CmplxType cbk0, cbi0, cbk1, cbi1;
-  const RealType pi = M_PI;
-  const RealType el = 0.57721566490153286060651209008240;
+  constexpr auto pi = Kokkos::Experimental::pi_v<RealType>;
+  constexpr auto el = Kokkos::Experimental::egamma_v<RealType>;
 
   RealType a0  = Kokkos::abs(z);
   CmplxType ci = CmplxType(0.0, 1.0);
@@ -1164,10 +1161,10 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_h10(const CmplxType& z) {
   using RealType = typename CmplxType::value_type;
   using Kokkos::Experimental::infinity;
 
-  auto const inf = infinity<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
 
   CmplxType ch10, cbk0, cbj0, cby0;
-  const RealType pi = M_PI;
+  constexpr auto pi = Kokkos::Experimental::pi_v<RealType>;
   CmplxType ci      = CmplxType(0.0, 1.0);
 
   if ((z.real() == 0.0) && (z.imag() == 0.0)) {
@@ -1194,10 +1191,10 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_h11(const CmplxType& z) {
   using RealType = typename CmplxType::value_type;
   using Kokkos::Experimental::infinity;
 
-  auto const inf = infinity<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
 
   CmplxType ch11, cbk1, cbj1, cby1;
-  const RealType pi = M_PI;
+  constexpr auto pi = Kokkos::Experimental::pi_v<RealType>;
   CmplxType ci      = CmplxType(0.0, 1.0);
 
   if ((z.real() == 0.0) && (z.imag() == 0.0)) {
@@ -1224,10 +1221,10 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_h20(const CmplxType& z) {
   using RealType = typename CmplxType::value_type;
   using Kokkos::Experimental::infinity;
 
-  auto const inf = infinity<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
 
   CmplxType ch20, cbk0, cbj0, cby0;
-  const RealType pi = M_PI;
+  constexpr auto pi = Kokkos::Experimental::pi_v<RealType>;
   CmplxType ci      = CmplxType(0.0, 1.0);
 
   if ((z.real() == 0.0) && (z.imag() == 0.0)) {
@@ -1254,10 +1251,10 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_h21(const CmplxType& z) {
   using RealType = typename CmplxType::value_type;
   using Kokkos::Experimental::infinity;
 
-  auto const inf = infinity<RealType>::value;
+  constexpr auto inf = infinity<RealType>::value;
 
   CmplxType ch21, cbk1, cbj1, cby1;
-  const RealType pi = M_PI;
+  constexpr auto pi = Kokkos::Experimental::pi_v<RealType>;
   CmplxType ci      = CmplxType(0.0, 1.0);
 
   if ((z.real() == 0.0) && (z.imag() == 0.0)) {
diff --git a/packages/kokkos/core/src/Kokkos_MemoryPool.hpp b/packages/kokkos/core/src/Kokkos_MemoryPool.hpp
index c814e5a22a32d31e1047f52ac55438934c8194d3..7dce3f4780352c34c40b72e7e591acda84e07f9d 100644
--- a/packages/kokkos/core/src/Kokkos_MemoryPool.hpp
+++ b/packages/kokkos/core/src/Kokkos_MemoryPool.hpp
@@ -192,6 +192,9 @@ class MemoryPool {
     if (!accessible) {
       Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
           sb_state_array, m_sb_state_array, alloc_size);
+      Kokkos::fence(
+          "MemoryPool::get_usage_statistics(): fence after copying state "
+          "array to HostSpace");
     }
 
     stats.superblock_bytes     = (1LU << m_sb_size_lg2);
@@ -240,6 +243,9 @@ class MemoryPool {
     if (!accessible) {
       Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
           sb_state_array, m_sb_state_array, alloc_size);
+      Kokkos::fence(
+          "MemoryPool::print_state(): fence after copying state array to "
+          "HostSpace");
     }
 
     Impl::_print_memory_pool_state(s, sb_state_array, m_sb_count, m_sb_size_lg2,
@@ -449,6 +455,9 @@ class MemoryPool {
     if (!accessible) {
       Kokkos::Impl::DeepCopy<base_memory_space, Kokkos::HostSpace>(
           m_sb_state_array, sb_state_array, header_size);
+      Kokkos::fence(
+          "MemoryPool::MemoryPool(): fence after copying state array from "
+          "HostSpace");
 
       host.deallocate(sb_state_array, header_size);
     } else {
@@ -529,7 +538,7 @@ class MemoryPool {
 #else
     const uint32_t block_id_hint =
         (uint32_t)(Kokkos::Impl::clock_tic()
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
+#ifdef __CUDA_ARCH__  // FIXME_CUDA
                    // Spread out potentially concurrent access
                    // by threads within a warp or thread block.
                    + (threadIdx.x + blockDim.x * threadIdx.y)
@@ -780,9 +789,16 @@ class MemoryPool {
     block_count_capacity = 0;
     block_count_used     = 0;
 
-    if (Kokkos::Impl::MemorySpaceAccess<
-            Kokkos::Impl::ActiveExecutionMemorySpace,
-            base_memory_space>::accessible) {
+    bool can_access_state_array = []() {
+      KOKKOS_IF_ON_HOST(
+          (return SpaceAccessibility<DefaultHostExecutionSpace,
+                                     base_memory_space>::accessible;))
+      KOKKOS_IF_ON_DEVICE(
+          (return SpaceAccessibility<DefaultExecutionSpace,
+                                     base_memory_space>::accessible;))
+    }();
+
+    if (can_access_state_array) {
       // Can access the state array
 
       const uint32_t state =
diff --git a/packages/kokkos/core/src/Kokkos_MinMaxClamp.hpp b/packages/kokkos/core/src/Kokkos_MinMaxClamp.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a82e13df7fa222271247016999a7e206c8ecfd25
--- /dev/null
+++ b/packages/kokkos/core/src/Kokkos_MinMaxClamp.hpp
@@ -0,0 +1,229 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_MIN_MAX_CLAMP_HPP
+#define KOKKOS_MIN_MAX_CLAMP_HPP
+
+#include <Kokkos_Macros.hpp>
+#include <Kokkos_Pair.hpp>
+
+#include <initializer_list>
+
+namespace Kokkos {
+namespace Experimental {
+
+// clamp
+template <class T>
+constexpr KOKKOS_INLINE_FUNCTION const T& clamp(const T& value, const T& lo,
+                                                const T& hi) {
+  KOKKOS_EXPECTS(!(hi < lo));
+  return (value < lo) ? lo : (hi < value) ? hi : value;
+}
+
+template <class T, class ComparatorType>
+constexpr KOKKOS_INLINE_FUNCTION const T& clamp(const T& value, const T& lo,
+                                                const T& hi,
+                                                ComparatorType comp) {
+  KOKKOS_EXPECTS(!comp(hi, lo));
+  return comp(value, lo) ? lo : comp(hi, value) ? hi : value;
+}
+
+// max
+template <class T>
+constexpr KOKKOS_INLINE_FUNCTION const T& max(const T& a, const T& b) {
+  return (a < b) ? b : a;
+}
+
+template <class T, class ComparatorType>
+constexpr KOKKOS_INLINE_FUNCTION const T& max(const T& a, const T& b,
+                                              ComparatorType comp) {
+  return comp(a, b) ? b : a;
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION constexpr T max(std::initializer_list<T> ilist) {
+  auto first      = ilist.begin();
+  auto const last = ilist.end();
+  auto result     = *first;
+  if (first == last) return result;
+  while (++first != last) {
+    if (result < *first) result = *first;
+  }
+  return result;
+}
+
+template <class T, class Compare>
+KOKKOS_INLINE_FUNCTION constexpr T max(std::initializer_list<T> ilist,
+                                       Compare comp) {
+  auto first      = ilist.begin();
+  auto const last = ilist.end();
+  auto result     = *first;
+  if (first == last) return result;
+  while (++first != last) {
+    if (comp(result, *first)) result = *first;
+  }
+  return result;
+}
+
+// min
+template <class T>
+constexpr KOKKOS_INLINE_FUNCTION const T& min(const T& a, const T& b) {
+  return (b < a) ? b : a;
+}
+
+template <class T, class ComparatorType>
+constexpr KOKKOS_INLINE_FUNCTION const T& min(const T& a, const T& b,
+                                              ComparatorType comp) {
+  return comp(b, a) ? b : a;
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION constexpr T min(std::initializer_list<T> ilist) {
+  auto first      = ilist.begin();
+  auto const last = ilist.end();
+  auto result     = *first;
+  if (first == last) return result;
+  while (++first != last) {
+    if (*first < result) result = *first;
+  }
+  return result;
+}
+
+template <class T, class Compare>
+KOKKOS_INLINE_FUNCTION constexpr T min(std::initializer_list<T> ilist,
+                                       Compare comp) {
+  auto first      = ilist.begin();
+  auto const last = ilist.end();
+  auto result     = *first;
+  if (first == last) return result;
+  while (++first != last) {
+    if (comp(*first, result)) result = *first;
+  }
+  return result;
+}
+
+// minmax
+template <class T>
+constexpr KOKKOS_INLINE_FUNCTION auto minmax(const T& a, const T& b) {
+  using return_t = ::Kokkos::pair<const T&, const T&>;
+  return (b < a) ? return_t{b, a} : return_t{a, b};
+}
+
+template <class T, class ComparatorType>
+constexpr KOKKOS_INLINE_FUNCTION auto minmax(const T& a, const T& b,
+                                             ComparatorType comp) {
+  using return_t = ::Kokkos::pair<const T&, const T&>;
+  return comp(b, a) ? return_t{b, a} : return_t{a, b};
+}
+
+template <class T>
+KOKKOS_INLINE_FUNCTION constexpr Kokkos::pair<T, T> minmax(
+    std::initializer_list<T> ilist) {
+  auto first      = ilist.begin();
+  auto const last = ilist.end();
+  auto next       = first;
+  Kokkos::pair<T, T> result{*first, *first};
+  if (first == last || ++next == last) return result;
+  if (*next < *first)
+    result.first = *next;
+  else
+    result.second = *next;
+  first = next;
+  while (++first != last) {
+    if (++next == last) {
+      if (*first < result.first)
+        result.first = *first;
+      else if (!(*first < result.second))
+        result.second = *first;
+      break;
+    }
+    if (*next < *first) {
+      if (*next < result.first) result.first = *next;
+      if (!(*first < result.second)) result.second = *first;
+    } else {
+      if (*first < result.first) result.first = *first;
+      if (!(*next < result.second)) result.second = *next;
+    }
+    first = next;
+  }
+  return result;
+}
+
+template <class T, class Compare>
+KOKKOS_INLINE_FUNCTION constexpr Kokkos::pair<T, T> minmax(
+    std::initializer_list<T> ilist, Compare comp) {
+  auto first      = ilist.begin();
+  auto const last = ilist.end();
+  auto next       = first;
+  Kokkos::pair<T, T> result{*first, *first};
+  if (first == last || ++next == last) return result;
+  if (comp(*next, *first))
+    result.first = *next;
+  else
+    result.second = *next;
+  first = next;
+  while (++first != last) {
+    if (++next == last) {
+      if (comp(*first, result.first))
+        result.first = *first;
+      else if (!comp(*first, result.second))
+        result.second = *first;
+      break;
+    }
+    if (comp(*next, *first)) {
+      if (comp(*next, result.first)) result.first = *next;
+      if (!comp(*first, result.second)) result.second = *first;
+    } else {
+      if (comp(*first, result.first)) result.first = *first;
+      if (!comp(*next, result.second)) result.second = *next;
+    }
+    first = next;
+  }
+  return result;
+}
+
+}  // namespace Experimental
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/core/src/Kokkos_NumericTraits.hpp b/packages/kokkos/core/src/Kokkos_NumericTraits.hpp
index 1999d46f3c4087dab1192c350da8ba844199a8fe..67f017c69146dcae1eab1d19e8bbb5a7f1610c31 100644
--- a/packages/kokkos/core/src/Kokkos_NumericTraits.hpp
+++ b/packages/kokkos/core/src/Kokkos_NumericTraits.hpp
@@ -123,6 +123,60 @@ template <class> struct norm_min_helper {};
 template <> struct norm_min_helper<float> { static constexpr float value = FLT_MIN; };
 template <> struct norm_min_helper<double> { static constexpr double value = DBL_MIN; };
 template <> struct norm_min_helper<long double> { static constexpr long double value = LDBL_MIN; };
+template <class> struct denorm_min_helper {};
+//                               Workaround for GCC <9.2, Clang <9, Intel
+//                               vvvvvvvvvvvvvvvvvvvvvvvvv
+#if defined(KOKKOS_ENABLE_CXX17) && defined (FLT_TRUE_MIN) || defined(_MSC_VER)
+template <> struct denorm_min_helper<float> { static constexpr float value = FLT_TRUE_MIN; };
+template <> struct denorm_min_helper<double> { static constexpr double value = DBL_TRUE_MIN; };
+template <> struct denorm_min_helper<long double> { static constexpr long double value = LDBL_TRUE_MIN; };
+#else
+template <> struct denorm_min_helper<float> { static constexpr float value = __FLT_DENORM_MIN__; };
+template <> struct denorm_min_helper<double> { static constexpr double value = __DBL_DENORM_MIN__; };
+template <> struct denorm_min_helper<long double> { static constexpr long double value = __LDBL_DENORM_MIN__; };
+#endif
+// GCC <10.3 is not able to evaluate T(1) / finite_max_v<T> at compile time when passing -frounding-math
+// https://godbolt.org/z/zj9svb1T7
+// Similar issue was reported on IBM Power without the compiler option
+#define KOKKOS_IMPL_WORKAROUND_CONSTANT_EXPRESSION_COMPILER_BUG
+#ifndef KOKKOS_IMPL_WORKAROUND_CONSTANT_EXPRESSION_COMPILER_BUG
+// NOTE see ?lamch routine from LAPACK that determines machine parameters for floating-point arithmetic
+template <class T>
+constexpr T safe_minimum(T /*ignored*/) {
+  constexpr auto one  = static_cast<T>(1);
+  constexpr auto eps  = epsilon_helper<T>::value;
+  constexpr auto tiny = norm_min_helper<T>::value;
+  constexpr auto huge = finite_max_helper<T>::value;
+  constexpr auto small = one / huge;  // error: is not a constant expression
+  return small >= tiny ? small * (one + eps) : tiny;
+}
+template <class> struct reciprocal_overflow_threshold_helper {};
+template <> struct reciprocal_overflow_threshold_helper<float> { static constexpr float value = safe_minimum(0.f); };
+template <> struct reciprocal_overflow_threshold_helper<double> { static constexpr double value = safe_minimum(0.); };
+template <> struct reciprocal_overflow_threshold_helper<long double> { static constexpr long double value = safe_minimum(0.l); };
+#else
+template <class> struct reciprocal_overflow_threshold_helper {};
+template <> struct reciprocal_overflow_threshold_helper<float> { static constexpr float value = norm_min_helper<float>::value; };  // OK for IEEE-754 floating-point numbers
+template <> struct reciprocal_overflow_threshold_helper<double> { static constexpr double value = norm_min_helper<double>::value; };
+template <> struct reciprocal_overflow_threshold_helper<long double> { static constexpr long double value = norm_min_helper<long double>::value; };
+#endif
+#undef KOKKOS_IMPL_WORKAROUND_CONSTANT_EXPRESSION_COMPILER_BUG
+template <class> struct quiet_NaN_helper {};
+template <> struct quiet_NaN_helper<float> { static constexpr float value = __builtin_nanf(""); };
+template <> struct quiet_NaN_helper<double> { static constexpr double value = __builtin_nan(""); };
+#if defined(_MSC_VER)
+template <> struct quiet_NaN_helper<long double> { static constexpr long double value = __builtin_nan(""); };
+#else
+template <> struct quiet_NaN_helper<long double> { static constexpr long double value = __builtin_nanl(""); };
+#endif
+template <class> struct signaling_NaN_helper {};
+template <> struct signaling_NaN_helper<float> { static constexpr float value = __builtin_nansf(""); };
+template <> struct signaling_NaN_helper<double> { static constexpr double value = __builtin_nans(""); };
+#if defined(_MSC_VER)
+template <> struct signaling_NaN_helper<long double> { static constexpr long double value = __builtin_nans(""); };
+#else
+template <> struct signaling_NaN_helper<long double> { static constexpr long double value = __builtin_nansl(""); };
+#endif
 template <class> struct digits_helper {};
 template <> struct digits_helper<bool> { static constexpr int value = 1; };
 template <> struct digits_helper<char> { static constexpr int value = CHAR_BIT - std::is_signed<char>::value; };
@@ -219,15 +273,15 @@ template <> struct max_exponent10_helper<long double> { static constexpr int val
 }  // namespace Impl
 
 #if defined(KOKKOS_ENABLE_CXX17)
-#define KOKKOS_IMPL_DEFINE_TRAIT(TRAIT)      \
-  template <class T>                         \
-  struct TRAIT : Impl::TRAIT##_helper<T> {}; \
-  template <class T>                         \
+#define KOKKOS_IMPL_DEFINE_TRAIT(TRAIT)                        \
+  template <class T>                                           \
+  struct TRAIT : Impl::TRAIT##_helper<std::remove_cv_t<T>> {}; \
+  template <class T>                                           \
   inline constexpr auto TRAIT##_v = TRAIT<T>::value;
 #else
 #define KOKKOS_IMPL_DEFINE_TRAIT(TRAIT) \
   template <class T>                    \
-  struct TRAIT : Impl::TRAIT##_helper<T> {};
+  struct TRAIT : Impl::TRAIT##_helper<std::remove_cv_t<T>> {};
 #endif
 
 // Numeric distinguished value traits
@@ -237,6 +291,10 @@ KOKKOS_IMPL_DEFINE_TRAIT(finite_max)
 KOKKOS_IMPL_DEFINE_TRAIT(epsilon)
 KOKKOS_IMPL_DEFINE_TRAIT(round_error)
 KOKKOS_IMPL_DEFINE_TRAIT(norm_min)
+KOKKOS_IMPL_DEFINE_TRAIT(denorm_min)
+KOKKOS_IMPL_DEFINE_TRAIT(reciprocal_overflow_threshold)
+KOKKOS_IMPL_DEFINE_TRAIT(quiet_NaN)
+KOKKOS_IMPL_DEFINE_TRAIT(signaling_NaN)
 
 // Numeric characteristics traits
 KOKKOS_IMPL_DEFINE_TRAIT(digits)
@@ -306,6 +364,16 @@ struct reduction_identity<signed char> {
   }
 };
 
+template <>
+struct reduction_identity<bool> {
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static bool lor() {
+    return static_cast<bool>(false);
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static bool land() {
+    return static_cast<bool>(true);
+  }
+};
+
 template <>
 struct reduction_identity<short> {
   KOKKOS_FORCEINLINE_FUNCTION constexpr static short sum() {
@@ -570,24 +638,15 @@ struct reduction_identity<double> {
   KOKKOS_FORCEINLINE_FUNCTION constexpr static double min() { return DBL_MAX; }
 };
 
-#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) && \
-    !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU)
+// No __host__ __device__ annotation because long double treated as double in
+// device code.  May be revisited later if that is not true any more.
 template <>
 struct reduction_identity<long double> {
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static long double sum() {
-    return static_cast<long double>(0.0);
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static long double prod() {
-    return static_cast<long double>(1.0);
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static long double max() {
-    return -LDBL_MAX;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static long double min() {
-    return LDBL_MAX;
-  }
+  constexpr static long double sum() { return static_cast<long double>(0.0); }
+  constexpr static long double prod() { return static_cast<long double>(1.0); }
+  constexpr static long double max() { return -LDBL_MAX; }
+  constexpr static long double min() { return LDBL_MAX; }
 };
-#endif
 
 }  // namespace Kokkos
 
diff --git a/packages/kokkos/core/src/Kokkos_OpenMP.hpp b/packages/kokkos/core/src/Kokkos_OpenMP.hpp
index 8f12eceb27c46946a83c64eceec0711cca6ef2b7..5d76e689f21b2809a47b262c7ed7485e9116d164 100644
--- a/packages/kokkos/core/src/Kokkos_OpenMP.hpp
+++ b/packages/kokkos/core/src/Kokkos_OpenMP.hpp
@@ -129,14 +129,17 @@ class OpenMP {
   /// This is a no-op on OpenMP since a non default instance cannot be created
   static OpenMP create_instance(...);
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
   /// \brief Partition the default instance and call 'f' on each new 'master'
   /// thread
   ///
   /// Func is a functor with the following signiture
   ///   void( int partition_id, int num_partitions )
   template <typename F>
-  static void partition_master(F const& f, int requested_num_partitions = 0,
-                               int requested_partition_size = 0);
+  KOKKOS_DEPRECATED static void partition_master(
+      F const& f, int requested_num_partitions = 0,
+      int requested_partition_size = 0);
+#endif
 
   // use UniqueToken
   static int concurrency();
diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp
index c1d338331f56cd59a9eb917a2d8f72ebb06b453b..25c852717c23da4e25c5573e59f9cd21c03a06e0 100644
--- a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp
+++ b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp
@@ -136,7 +136,7 @@ class OpenMPTargetSpace {
  public:
   //! Tag this class as a kokkos memory space
   using memory_space = OpenMPTargetSpace;
-  using size_type    = size_t;
+  using size_type    = unsigned;
 
   /// \typedef execution_space
   /// \brief Default execution space for this memory space.
@@ -216,12 +216,11 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>
  public:
   KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
       const Kokkos::Experimental::OpenMPTargetSpace& arg_space,
-      const std::string& arg_label, const size_t arg_alloc_size) {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size);
-#else
-    return nullptr;
-#endif
+      const std::string& arg_label, const size_t arg_alloc) {
+    KOKKOS_IF_ON_HOST(
+        (return new SharedAllocationRecord(arg_space, arg_label, arg_alloc);))
+    KOKKOS_IF_ON_DEVICE(
+        ((void)arg_space; (void)arg_label; (void)arg_alloc; return nullptr;))
   }
 };
 
diff --git a/packages/kokkos/core/src/Kokkos_Pair.hpp b/packages/kokkos/core/src/Kokkos_Pair.hpp
index d7512eb08616c243128e24cc7f38e5418bb54049..6045737aa936b078e02b54fa821fcdff6154734e 100644
--- a/packages/kokkos/core/src/Kokkos_Pair.hpp
+++ b/packages/kokkos/core/src/Kokkos_Pair.hpp
@@ -326,8 +326,8 @@ struct pair<T1&, T2> {
 
 //! Equality operator for Kokkos::pair.
 template <class T1, class T2>
-KOKKOS_FORCEINLINE_FUNCTION bool operator==(const pair<T1, T2>& lhs,
-                                            const pair<T1, T2>& rhs) {
+KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator==(const pair<T1, T2>& lhs,
+                                                      const pair<T1, T2>& rhs) {
   return lhs.first == rhs.first && lhs.second == rhs.second;
 }
 
diff --git a/packages/kokkos/core/src/Kokkos_Parallel.hpp b/packages/kokkos/core/src/Kokkos_Parallel.hpp
index 25ebe26155fed5812e161e14360811cfc660e105..c12cd77d38bc93d666037e9fd3a30194494e177b 100644
--- a/packages/kokkos/core/src/Kokkos_Parallel.hpp
+++ b/packages/kokkos/core/src/Kokkos_Parallel.hpp
@@ -54,6 +54,8 @@
 #include <Kokkos_View.hpp>
 
 #include <impl/Kokkos_Tools.hpp>
+#include <impl/Kokkos_Tools_Generic.hpp>
+
 #include <impl/Kokkos_Traits.hpp>
 #include <impl/Kokkos_FunctorAnalysis.hpp>
 #include <impl/Kokkos_FunctorAdapter.hpp>
@@ -85,6 +87,31 @@ using device_type_t = typename T::device_type;
 
 template <class Functor, class Policy>
 struct FunctorPolicyExecutionSpace {
+  using policy_execution_space  = detected_t<execution_space_t, Policy>;
+  using functor_execution_space = detected_t<execution_space_t, Functor>;
+  using functor_device_type     = detected_t<device_type_t, Functor>;
+  using functor_device_type_execution_space =
+      detected_t<execution_space_t, functor_device_type>;
+
+  static_assert(
+      !is_detected<execution_space_t, Policy>::value ||
+          !is_detected<execution_space_t, Functor>::value ||
+          std::is_same<policy_execution_space, functor_execution_space>::value,
+      "A policy with an execution space and a functor with an execution space "
+      "are given but the execution space types do not match!");
+  static_assert(!is_detected<execution_space_t, Policy>::value ||
+                    !is_detected<device_type_t, Functor>::value ||
+                    std::is_same<policy_execution_space,
+                                 functor_device_type_execution_space>::value,
+                "A policy with an execution space and a functor with a device "
+                "type are given but the execution space types do not match!");
+  static_assert(!is_detected<device_type_t, Functor>::value ||
+                    !is_detected<execution_space_t, Functor>::value ||
+                    std::is_same<functor_device_type_execution_space,
+                                 functor_execution_space>::value,
+                "A functor with both an execution space and device type is "
+                "given but their execution space types do not match!");
+
   using execution_space = detected_or_t<
       detected_or_t<
           std::conditional_t<
diff --git a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
index bc613cea62b10a56f888baabbc16ed9258e041dd..abd5c39bb65b302092696ab6f078ba1decaa0b41 100644
--- a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
+++ b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
@@ -49,7 +49,9 @@
 #include <Kokkos_View.hpp>
 #include <impl/Kokkos_FunctorAnalysis.hpp>
 #include <impl/Kokkos_FunctorAdapter.hpp>
+#include <impl/Kokkos_Tools_Generic.hpp>
 #include <type_traits>
+#include <iostream>
 
 namespace Kokkos {
 
@@ -727,6 +729,738 @@ struct MinMaxLoc {
   KOKKOS_INLINE_FUNCTION
   bool references_scalar() const { return references_scalar_v; }
 };
+
+// --------------------------------------------------
+// reducers added to support std algorithms
+// --------------------------------------------------
+
+//
+// MaxFirstLoc
+//
+template <class Scalar, class Index, class Space>
+struct MaxFirstLoc {
+ private:
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+  using index_type  = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer    = MaxFirstLoc<Scalar, Index, Space>;
+  using value_type = ::Kokkos::ValLocScalar<scalar_type, index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  MaxFirstLoc(value_type& value_) : value(&value_), references_scalar_v(true) {}
+
+  KOKKOS_INLINE_FUNCTION
+  MaxFirstLoc(const result_view_type& value_)
+      : value(value_), references_scalar_v(false) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    if (dest.val < src.val) {
+      dest = src;
+    } else if (!(src.val < dest.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    if (dest.val < src.val) {
+      dest = src;
+    } else if (!(src.val < dest.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.val = reduction_identity<scalar_type>::max();
+    val.loc = reduction_identity<index_type>::min();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
+//
+// MaxFirstLocCustomComparator
+// recall that comp(a,b) returns true is a < b
+//
+template <class Scalar, class Index, class ComparatorType, class Space>
+struct MaxFirstLocCustomComparator {
+ private:
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+  using index_type  = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer =
+      MaxFirstLocCustomComparator<Scalar, Index, ComparatorType, Space>;
+  using value_type = ::Kokkos::ValLocScalar<scalar_type, index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+  ComparatorType m_comp;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  MaxFirstLocCustomComparator(value_type& value_, ComparatorType comp_)
+      : value(&value_), references_scalar_v(true), m_comp(comp_) {}
+
+  KOKKOS_INLINE_FUNCTION
+  MaxFirstLocCustomComparator(const result_view_type& value_,
+                              ComparatorType comp_)
+      : value(value_), references_scalar_v(false), m_comp(comp_) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    if (m_comp(dest.val, src.val)) {
+      dest = src;
+    } else if (!m_comp(src.val, dest.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    if (m_comp(dest.val, src.val)) {
+      dest = src;
+    } else if (!m_comp(src.val, dest.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.val = reduction_identity<scalar_type>::max();
+    val.loc = reduction_identity<index_type>::min();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
+//
+// MinFirstLoc
+//
+template <class Scalar, class Index, class Space>
+struct MinFirstLoc {
+ private:
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+  using index_type  = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer    = MinFirstLoc<Scalar, Index, Space>;
+  using value_type = ::Kokkos::ValLocScalar<scalar_type, index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  MinFirstLoc(value_type& value_) : value(&value_), references_scalar_v(true) {}
+
+  KOKKOS_INLINE_FUNCTION
+  MinFirstLoc(const result_view_type& value_)
+      : value(value_), references_scalar_v(false) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    if (src.val < dest.val) {
+      dest = src;
+    } else if (!(dest.val < src.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    if (src.val < dest.val) {
+      dest = src;
+    } else if (!(dest.val < src.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.val = reduction_identity<scalar_type>::min();
+    val.loc = reduction_identity<index_type>::min();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
+//
+// MinFirstLocCustomComparator
+// recall that comp(a,b) returns true is a < b
+//
+template <class Scalar, class Index, class ComparatorType, class Space>
+struct MinFirstLocCustomComparator {
+ private:
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+  using index_type  = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer =
+      MinFirstLocCustomComparator<Scalar, Index, ComparatorType, Space>;
+  using value_type = ::Kokkos::ValLocScalar<scalar_type, index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+  ComparatorType m_comp;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  MinFirstLocCustomComparator(value_type& value_, ComparatorType comp_)
+      : value(&value_), references_scalar_v(true), m_comp(comp_) {}
+
+  KOKKOS_INLINE_FUNCTION
+  MinFirstLocCustomComparator(const result_view_type& value_,
+                              ComparatorType comp_)
+      : value(value_), references_scalar_v(false), m_comp(comp_) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    if (m_comp(src.val, dest.val)) {
+      dest = src;
+    } else if (!m_comp(dest.val, src.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    if (m_comp(src.val, dest.val)) {
+      dest = src;
+    } else if (!m_comp(dest.val, src.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.val = reduction_identity<scalar_type>::min();
+    val.loc = reduction_identity<index_type>::min();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
+//
+// MinMaxFirstLastLoc
+//
+template <class Scalar, class Index, class Space>
+struct MinMaxFirstLastLoc {
+ private:
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+  using index_type  = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer    = MinMaxFirstLastLoc<Scalar, Index, Space>;
+  using value_type = ::Kokkos::MinMaxLocScalar<scalar_type, index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  MinMaxFirstLastLoc(value_type& value_)
+      : value(&value_), references_scalar_v(true) {}
+
+  KOKKOS_INLINE_FUNCTION
+  MinMaxFirstLastLoc(const result_view_type& value_)
+      : value(value_), references_scalar_v(false) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    if (src.min_val < dest.min_val) {
+      dest.min_val = src.min_val;
+      dest.min_loc = src.min_loc;
+    } else if (!(dest.min_val < src.min_val)) {
+      dest.min_loc = (src.min_loc < dest.min_loc) ? src.min_loc : dest.min_loc;
+    }
+
+    if (dest.max_val < src.max_val) {
+      dest.max_val = src.max_val;
+      dest.max_loc = src.max_loc;
+    } else if (!(src.max_val < dest.max_val)) {
+      dest.max_loc = (src.max_loc > dest.max_loc) ? src.max_loc : dest.max_loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    if (src.min_val < dest.min_val) {
+      dest.min_val = src.min_val;
+      dest.min_loc = src.min_loc;
+    } else if (!(dest.min_val < src.min_val)) {
+      dest.min_loc = (src.min_loc < dest.min_loc) ? src.min_loc : dest.min_loc;
+    }
+
+    if (dest.max_val < src.max_val) {
+      dest.max_val = src.max_val;
+      dest.max_loc = src.max_loc;
+    } else if (!(src.max_val < dest.max_val)) {
+      dest.max_loc = (src.max_loc > dest.max_loc) ? src.max_loc : dest.max_loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.max_val = ::Kokkos::reduction_identity<scalar_type>::max();
+    val.min_val = ::Kokkos::reduction_identity<scalar_type>::min();
+    val.max_loc = ::Kokkos::reduction_identity<index_type>::max();
+    val.min_loc = ::Kokkos::reduction_identity<index_type>::min();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
+//
+// MinMaxFirstLastLocCustomComparator
+// recall that comp(a,b) returns true is a < b
+//
+template <class Scalar, class Index, class ComparatorType, class Space>
+struct MinMaxFirstLastLocCustomComparator {
+ private:
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+  using index_type  = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer =
+      MinMaxFirstLastLocCustomComparator<Scalar, Index, ComparatorType, Space>;
+  using value_type = ::Kokkos::MinMaxLocScalar<scalar_type, index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+  ComparatorType m_comp;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  MinMaxFirstLastLocCustomComparator(value_type& value_, ComparatorType comp_)
+      : value(&value_), references_scalar_v(true), m_comp(comp_) {}
+
+  KOKKOS_INLINE_FUNCTION
+  MinMaxFirstLastLocCustomComparator(const result_view_type& value_,
+                                     ComparatorType comp_)
+      : value(value_), references_scalar_v(false), m_comp(comp_) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    if (m_comp(src.min_val, dest.min_val)) {
+      dest.min_val = src.min_val;
+      dest.min_loc = src.min_loc;
+    } else if (!m_comp(dest.min_val, src.min_val)) {
+      dest.min_loc = (src.min_loc < dest.min_loc) ? src.min_loc : dest.min_loc;
+    }
+
+    if (m_comp(dest.max_val, src.max_val)) {
+      dest.max_val = src.max_val;
+      dest.max_loc = src.max_loc;
+    } else if (!m_comp(src.max_val, dest.max_val)) {
+      dest.max_loc = (src.max_loc > dest.max_loc) ? src.max_loc : dest.max_loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    if (m_comp(src.min_val, dest.min_val)) {
+      dest.min_val = src.min_val;
+      dest.min_loc = src.min_loc;
+    } else if (!m_comp(dest.min_val, src.min_val)) {
+      dest.min_loc = (src.min_loc < dest.min_loc) ? src.min_loc : dest.min_loc;
+    }
+
+    if (m_comp(dest.max_val, src.max_val)) {
+      dest.max_val = src.max_val;
+      dest.max_loc = src.max_loc;
+    } else if (!m_comp(src.max_val, dest.max_val)) {
+      dest.max_loc = (src.max_loc > dest.max_loc) ? src.max_loc : dest.max_loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.max_val = ::Kokkos::reduction_identity<scalar_type>::max();
+    val.min_val = ::Kokkos::reduction_identity<scalar_type>::min();
+    val.max_loc = ::Kokkos::reduction_identity<index_type>::max();
+    val.min_loc = ::Kokkos::reduction_identity<index_type>::min();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
+//
+// FirstLoc
+//
+template <class Index>
+struct FirstLocScalar {
+  Index min_loc_true;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator=(const FirstLocScalar& rhs) { min_loc_true = rhs.min_loc_true; }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator=(const volatile FirstLocScalar& rhs) volatile {
+    min_loc_true = rhs.min_loc_true;
+  }
+};
+
+template <class Index, class Space>
+struct FirstLoc {
+ private:
+  using index_type = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer    = FirstLoc<Index, Space>;
+  using value_type = FirstLocScalar<index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  FirstLoc(value_type& value_) : value(&value_), references_scalar_v(true) {}
+
+  KOKKOS_INLINE_FUNCTION
+  FirstLoc(const result_view_type& value_)
+      : value(value_), references_scalar_v(false) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    dest.min_loc_true = (src.min_loc_true < dest.min_loc_true)
+                            ? src.min_loc_true
+                            : dest.min_loc_true;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    dest.min_loc_true = (src.min_loc_true < dest.min_loc_true)
+                            ? src.min_loc_true
+                            : dest.min_loc_true;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.min_loc_true = ::Kokkos::reduction_identity<index_type>::min();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
+//
+// LastLoc
+//
+template <class Index>
+struct LastLocScalar {
+  Index max_loc_true;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator=(const LastLocScalar& rhs) { max_loc_true = rhs.max_loc_true; }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator=(const volatile LastLocScalar& rhs) volatile {
+    max_loc_true = rhs.max_loc_true;
+  }
+};
+
+template <class Index, class Space>
+struct LastLoc {
+ private:
+  using index_type = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer    = LastLoc<Index, Space>;
+  using value_type = LastLocScalar<index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  LastLoc(value_type& value_) : value(&value_), references_scalar_v(true) {}
+
+  KOKKOS_INLINE_FUNCTION
+  LastLoc(const result_view_type& value_)
+      : value(value_), references_scalar_v(false) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    dest.max_loc_true = (src.max_loc_true > dest.max_loc_true)
+                            ? src.max_loc_true
+                            : dest.max_loc_true;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    dest.max_loc_true = (src.max_loc_true > dest.max_loc_true)
+                            ? src.max_loc_true
+                            : dest.max_loc_true;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.max_loc_true = ::Kokkos::reduction_identity<index_type>::max();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
+template <class Index>
+struct StdIsPartScalar {
+  Index max_loc_true, min_loc_false;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator=(const StdIsPartScalar& rhs) {
+    min_loc_false = rhs.min_loc_false;
+    max_loc_true  = rhs.max_loc_true;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator=(const volatile StdIsPartScalar& rhs) volatile {
+    min_loc_false = rhs.min_loc_false;
+    max_loc_true  = rhs.max_loc_true;
+  }
+};
+
+//
+// StdIsPartitioned
+//
+template <class Index, class Space>
+struct StdIsPartitioned {
+ private:
+  using index_type = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer    = StdIsPartitioned<Index, Space>;
+  using value_type = StdIsPartScalar<index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  StdIsPartitioned(value_type& value_)
+      : value(&value_), references_scalar_v(true) {}
+
+  KOKKOS_INLINE_FUNCTION
+  StdIsPartitioned(const result_view_type& value_)
+      : value(value_), references_scalar_v(false) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    dest.max_loc_true = (dest.max_loc_true < src.max_loc_true)
+                            ? src.max_loc_true
+                            : dest.max_loc_true;
+
+    dest.min_loc_false = (dest.min_loc_false < src.min_loc_false)
+                             ? dest.min_loc_false
+                             : src.min_loc_false;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    dest.max_loc_true = (dest.max_loc_true < src.max_loc_true)
+                            ? src.max_loc_true
+                            : dest.max_loc_true;
+
+    dest.min_loc_false = (dest.min_loc_false < src.min_loc_false)
+                             ? dest.min_loc_false
+                             : src.min_loc_false;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.max_loc_true  = ::Kokkos::reduction_identity<index_type>::max();
+    val.min_loc_false = ::Kokkos::reduction_identity<index_type>::min();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
+template <class Index>
+struct StdPartPointScalar {
+  Index min_loc_false;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator=(const StdPartPointScalar& rhs) {
+    min_loc_false = rhs.min_loc_false;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator=(const volatile StdPartPointScalar& rhs) volatile {
+    min_loc_false = rhs.min_loc_false;
+  }
+};
+
+//
+// StdPartitionPoint
+//
+template <class Index, class Space>
+struct StdPartitionPoint {
+ private:
+  using index_type = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using reducer    = StdPartitionPoint<Index, Space>;
+  using value_type = StdPartPointScalar<index_type>;
+
+  using result_view_type = ::Kokkos::View<value_type, Space>;
+
+ private:
+  result_view_type value;
+  bool references_scalar_v;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  StdPartitionPoint(value_type& value_)
+      : value(&value_), references_scalar_v(true) {}
+
+  KOKKOS_INLINE_FUNCTION
+  StdPartitionPoint(const result_view_type& value_)
+      : value(value_), references_scalar_v(false) {}
+
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  void join(value_type& dest, const value_type& src) const {
+    dest.min_loc_false = (dest.min_loc_false < src.min_loc_false)
+                             ? dest.min_loc_false
+                             : src.min_loc_false;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void join(volatile value_type& dest, const volatile value_type& src) const {
+    dest.min_loc_false = (dest.min_loc_false < src.min_loc_false)
+                             ? dest.min_loc_false
+                             : src.min_loc_false;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void init(value_type& val) const {
+    val.min_loc_false = ::Kokkos::reduction_identity<index_type>::min();
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  value_type& reference() const { return *value.data(); }
+
+  KOKKOS_INLINE_FUNCTION
+  result_view_type view() const { return value; }
+
+  KOKKOS_INLINE_FUNCTION
+  bool references_scalar() const { return references_scalar_v; }
+};
+
 }  // namespace Kokkos
 namespace Kokkos {
 namespace Impl {
@@ -845,15 +1579,11 @@ template <class PolicyType, class FunctorType, class ReturnType>
 struct ParallelReduceAdaptor {
   using return_value_adapter =
       Impl::ParallelReduceReturnValue<void, ReturnType, FunctorType>;
-#ifdef KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
-  using functor_adaptor =
-      Impl::ParallelReduceFunctorType<FunctorType, PolicyType,
-                                      typename return_value_adapter::value_type,
-                                      typename PolicyType::execution_space>;
-#endif
-  static inline void execute(const std::string& label, const PolicyType& policy,
-                             const FunctorType& functor,
-                             ReturnType& return_value) {
+
+  static inline void execute_impl(const std::string& label,
+                                  const PolicyType& policy,
+                                  const FunctorType& functor,
+                                  ReturnType& return_value) {
     uint64_t kpID = 0;
 
     PolicyType inner_policy = policy;
@@ -862,17 +1592,10 @@ struct ParallelReduceAdaptor {
                                                      label, kpID);
 
     Kokkos::Impl::shared_allocation_tracking_disable();
-#ifdef KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
-    Impl::ParallelReduce<typename functor_adaptor::functor_type, PolicyType,
-                         typename return_value_adapter::reducer_type>
-        closure(functor_adaptor::functor(functor), inner_policy,
-                return_value_adapter::return_value(return_value, functor));
-#else
     Impl::ParallelReduce<FunctorType, PolicyType,
                          typename return_value_adapter::reducer_type>
         closure(functor, inner_policy,
                 return_value_adapter::return_value(return_value, functor));
-#endif
     Kokkos::Impl::shared_allocation_tracking_enable();
     closure.execute();
 
@@ -880,6 +1603,31 @@ struct ParallelReduceAdaptor {
         typename return_value_adapter::reducer_type>(inner_policy, functor,
                                                      label, kpID);
   }
+
+  static constexpr bool is_array_reduction =
+      Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType,
+                            FunctorType>::StaticValueSize == 0;
+
+  template <typename Dummy = ReturnType>
+  static inline std::enable_if_t<!(is_array_reduction &&
+                                   std::is_pointer<Dummy>::value)>
+  execute(const std::string& label, const PolicyType& policy,
+          const FunctorType& functor, ReturnType& return_value) {
+    execute_impl(label, policy, functor, return_value);
+  }
+
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
+  template <typename Dummy = ReturnType>
+  KOKKOS_DEPRECATED_WITH_COMMENT(
+      "Array reductions with a raw pointer return type a deprecated. Use a "
+      "Kokkos::View as return argument!")
+  static inline std::
+      enable_if_t<is_array_reduction && std::is_pointer<Dummy>::value> execute(
+          const std::string& label, const PolicyType& policy,
+          const FunctorType& functor, ReturnType& return_value) {
+    execute_impl(label, policy, functor, return_value);
+  }
+#endif
 };
 }  // namespace Impl
 
@@ -999,10 +1747,17 @@ struct ParallelReduceFence {
 // ReturnValue is scalar or array: take by reference
 
 template <class PolicyType, class FunctorType, class ReturnType>
-inline typename std::enable_if<
-    Kokkos::is_execution_policy<PolicyType>::value>::type
+inline std::enable_if_t<Kokkos::is_execution_policy<PolicyType>::value &&
+                        !(Kokkos::is_view<ReturnType>::value ||
+                          Kokkos::is_reducer<ReturnType>::value ||
+                          std::is_pointer<ReturnType>::value)>
 parallel_reduce(const std::string& label, const PolicyType& policy,
                 const FunctorType& functor, ReturnType& return_value) {
+  static_assert(
+      !std::is_const<ReturnType>::value,
+      "A const reduction result type is only allowed for a View, pointer or "
+      "reducer return type!");
+
   Impl::ParallelReduceAdaptor<PolicyType, FunctorType, ReturnType>::execute(
       label, policy, functor, return_value);
   Impl::ParallelReduceFence<typename PolicyType::execution_space, ReturnType>::
@@ -1013,10 +1768,17 @@ parallel_reduce(const std::string& label, const PolicyType& policy,
 }
 
 template <class PolicyType, class FunctorType, class ReturnType>
-inline typename std::enable_if<
-    Kokkos::is_execution_policy<PolicyType>::value>::type
+inline std::enable_if_t<Kokkos::is_execution_policy<PolicyType>::value &&
+                        !(Kokkos::is_view<ReturnType>::value ||
+                          Kokkos::is_reducer<ReturnType>::value ||
+                          std::is_pointer<ReturnType>::value)>
 parallel_reduce(const PolicyType& policy, const FunctorType& functor,
                 ReturnType& return_value) {
+  static_assert(
+      !std::is_const<ReturnType>::value,
+      "A const reduction result type is only allowed for a View, pointer or "
+      "reducer return type!");
+
   Impl::ParallelReduceAdaptor<PolicyType, FunctorType, ReturnType>::execute(
       "", policy, functor, return_value);
   Impl::ParallelReduceFence<typename PolicyType::execution_space, ReturnType>::
@@ -1027,11 +1789,20 @@ parallel_reduce(const PolicyType& policy, const FunctorType& functor,
 }
 
 template <class FunctorType, class ReturnType>
-inline void parallel_reduce(const size_t& policy, const FunctorType& functor,
-                            ReturnType& return_value) {
+inline std::enable_if_t<!(Kokkos::is_view<ReturnType>::value ||
+                          Kokkos::is_reducer<ReturnType>::value ||
+                          std::is_pointer<ReturnType>::value)>
+parallel_reduce(const size_t& policy, const FunctorType& functor,
+                ReturnType& return_value) {
+  static_assert(
+      !std::is_const<ReturnType>::value,
+      "A const reduction result type is only allowed for a View, pointer or "
+      "reducer return type!");
+
   using policy_type =
       typename Impl::ParallelReducePolicyType<void, size_t,
                                               FunctorType>::policy_type;
+
   Impl::ParallelReduceAdaptor<policy_type, FunctorType, ReturnType>::execute(
       "", policy_type(0, policy), functor, return_value);
   Impl::ParallelReduceFence<typename policy_type::execution_space, ReturnType>::
@@ -1042,9 +1813,16 @@ inline void parallel_reduce(const size_t& policy, const FunctorType& functor,
 }
 
 template <class FunctorType, class ReturnType>
-inline void parallel_reduce(const std::string& label, const size_t& policy,
-                            const FunctorType& functor,
-                            ReturnType& return_value) {
+inline std::enable_if_t<!(Kokkos::is_view<ReturnType>::value ||
+                          Kokkos::is_reducer<ReturnType>::value ||
+                          std::is_pointer<ReturnType>::value)>
+parallel_reduce(const std::string& label, const size_t& policy,
+                const FunctorType& functor, ReturnType& return_value) {
+  static_assert(
+      !std::is_const<ReturnType>::value,
+      "A const reduction result type is only allowed for a View, pointer or "
+      "reducer return type!");
+
   using policy_type =
       typename Impl::ParallelReducePolicyType<void, size_t,
                                               FunctorType>::policy_type;
@@ -1060,8 +1838,10 @@ inline void parallel_reduce(const std::string& label, const size_t& policy,
 // ReturnValue as View or Reducer: take by copy to allow for inline construction
 
 template <class PolicyType, class FunctorType, class ReturnType>
-inline typename std::enable_if<
-    Kokkos::is_execution_policy<PolicyType>::value>::type
+inline std::enable_if_t<Kokkos::is_execution_policy<PolicyType>::value &&
+                        (Kokkos::is_view<ReturnType>::value ||
+                         Kokkos::is_reducer<ReturnType>::value ||
+                         std::is_pointer<ReturnType>::value)>
 parallel_reduce(const std::string& label, const PolicyType& policy,
                 const FunctorType& functor, const ReturnType& return_value) {
   ReturnType return_value_impl = return_value;
@@ -1075,8 +1855,10 @@ parallel_reduce(const std::string& label, const PolicyType& policy,
 }
 
 template <class PolicyType, class FunctorType, class ReturnType>
-inline typename std::enable_if<
-    Kokkos::is_execution_policy<PolicyType>::value>::type
+inline std::enable_if_t<Kokkos::is_execution_policy<PolicyType>::value &&
+                        (Kokkos::is_view<ReturnType>::value ||
+                         Kokkos::is_reducer<ReturnType>::value ||
+                         std::is_pointer<ReturnType>::value)>
 parallel_reduce(const PolicyType& policy, const FunctorType& functor,
                 const ReturnType& return_value) {
   ReturnType return_value_impl = return_value;
@@ -1090,8 +1872,11 @@ parallel_reduce(const PolicyType& policy, const FunctorType& functor,
 }
 
 template <class FunctorType, class ReturnType>
-inline void parallel_reduce(const size_t& policy, const FunctorType& functor,
-                            const ReturnType& return_value) {
+inline std::enable_if_t<Kokkos::is_view<ReturnType>::value ||
+                        Kokkos::is_reducer<ReturnType>::value ||
+                        std::is_pointer<ReturnType>::value>
+parallel_reduce(const size_t& policy, const FunctorType& functor,
+                const ReturnType& return_value) {
   using policy_type =
       typename Impl::ParallelReducePolicyType<void, size_t,
                                               FunctorType>::policy_type;
@@ -1106,9 +1891,11 @@ inline void parallel_reduce(const size_t& policy, const FunctorType& functor,
 }
 
 template <class FunctorType, class ReturnType>
-inline void parallel_reduce(const std::string& label, const size_t& policy,
-                            const FunctorType& functor,
-                            const ReturnType& return_value) {
+inline std::enable_if_t<Kokkos::is_view<ReturnType>::value ||
+                        Kokkos::is_reducer<ReturnType>::value ||
+                        std::is_pointer<ReturnType>::value>
+parallel_reduce(const std::string& label, const size_t& policy,
+                const FunctorType& functor, const ReturnType& return_value) {
   using policy_type =
       typename Impl::ParallelReducePolicyType<void, size_t,
                                               FunctorType>::policy_type;
diff --git a/packages/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp b/packages/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp
index 241a3a13a9c4d682785f274d4616c3f17cb4c9a5..4556cddbabd748fdad211317822689749ff9defe 100644
--- a/packages/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp
+++ b/packages/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp
@@ -56,9 +56,16 @@ namespace Profiling {
 
 class ProfilingSection {
  public:
-  ProfilingSection(const std::string& sectionName) : secName(sectionName) {
+  ProfilingSection(ProfilingSection const&) = delete;
+  ProfilingSection& operator=(ProfilingSection const&) = delete;
+
+  ProfilingSection(const std::string& sectionName)
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
+      : secName(sectionName)
+#endif
+  {
     if (Kokkos::Profiling::profileLibraryLoaded()) {
-      Kokkos::Profiling::createProfileSection(secName, &secID);
+      Kokkos::Profiling::createProfileSection(sectionName, &secID);
     }
   }
 
@@ -80,12 +87,16 @@ class ProfilingSection {
     }
   }
 
-  std::string getName() { return secName; }
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
+  KOKKOS_DEPRECATED std::string getName() { return secName; }
 
-  uint32_t getSectionID() { return secID; }
+  KOKKOS_DEPRECATED uint32_t getSectionID() { return secID; }
+#endif
 
  protected:
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
   const std::string secName;
+#endif
   uint32_t secID;
 };
 
diff --git a/packages/kokkos/core/src/Kokkos_Serial.hpp b/packages/kokkos/core/src/Kokkos_Serial.hpp
index 9c8ae70721e58bff5a91da3b99e3630ffe0c273a..9aada48bf607231731895dd96be8e8f9289ce4a4 100644
--- a/packages/kokkos/core/src/Kokkos_Serial.hpp
+++ b/packages/kokkos/core/src/Kokkos_Serial.hpp
@@ -106,7 +106,7 @@ class SerialInternal {
 ///
 /// A "device" represents a parallel execution model.  It tells Kokkos
 /// how to parallelize the execution of kernels in a parallel_for or
-/// parallel_reduce.  For example, the Threads device uses Pthreads or
+/// parallel_reduce.  For example, the Threads device uses
 /// C++11 threads on a CPU, the OpenMP device uses the OpenMP language
 /// extensions, and the Cuda device uses NVIDIA's CUDA programming
 /// model.  The Serial device executes "parallel" kernels
diff --git a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp
index 17e78f5e81fe83c940eea1bcd6c1d6c347649a4b..e45feb8554dd9a0e594d0d4a373669801f8ba055 100644
--- a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp
+++ b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp
@@ -646,16 +646,6 @@ typename Scheduler::template future_type_for_functor<
   using task_type =
       typename scheduler_type::template runnable_task_type<FunctorType>;
 
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) && \
-    defined(KOKKOS_ENABLE_CUDA)
-
-  // This doesn't work with clang cuda
-  // static_assert(
-  //    !std::is_same<Kokkos::Cuda, typename Scheduler::execution_space>::value,
-  //    "Error calling Kokkos::task_spawn for Cuda space within Host code");
-
-#endif
-
   static_assert(TaskEnum == Impl::TaskType::TaskTeam ||
                     TaskEnum == Impl::TaskType::TaskSingle,
                 "Kokkos task_spawn requires TaskTeam or TaskSingle");
diff --git a/packages/kokkos/core/src/Kokkos_Threads.hpp b/packages/kokkos/core/src/Kokkos_Threads.hpp
index da9bea9c2347faca7b8e5944cb08a0783983f285..45a2d0e32621a5dd2aff647954aaa56e18692ea2 100644
--- a/packages/kokkos/core/src/Kokkos_Threads.hpp
+++ b/packages/kokkos/core/src/Kokkos_Threads.hpp
@@ -72,7 +72,7 @@ enum class fence_is_static { yes, no };
 
 namespace Kokkos {
 
-/** \brief  Execution space for a pool of Pthreads or C11 threads on a CPU. */
+/** \brief  Execution space for a pool of C++11 threads on a CPU. */
 class Threads {
  public:
   //! \name Type declarations that all Kokkos devices must provide.
@@ -127,21 +127,12 @@ class Threads {
   //! \name Space-specific functions
   //@{
 
-  /** \brief Initialize the device in the "ready to work" state.
-   *
-   *  The device is initialized in a "ready to work" or "awake" state.
-   *  This state reduces latency and thus improves performance when
-   *  dispatching work.  However, the "awake" state consumes resources
-   *  even when no work is being done.  You may call sleep() to put
-   *  the device in a "sleeping" state that does not consume as many
-   *  resources, but it will take time (latency) to awaken the device
-   *  again (via the wake()) method so that it is ready for work.
-   *
+  /**
    *  Teams of threads are distributed as evenly as possible across
    *  the requested number of numa regions and cores per numa region.
    *  A team will not be split across a numa region.
    *
-   *  If the 'use_' arguments are not supplied the hwloc is queried
+   *  If the 'use_' arguments are not supplied, the hwloc is queried
    *  to use all available cores.
    */
   static void impl_initialize(unsigned threads_count             = 0,
@@ -156,11 +147,14 @@ class Threads {
   //----------------------------------------
 
   static int impl_thread_pool_size(int depth = 0);
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  static int impl_thread_pool_rank();
-#else
-  KOKKOS_INLINE_FUNCTION static int impl_thread_pool_rank() { return 0; }
-#endif
+
+  static int impl_thread_pool_rank_host();
+
+  static KOKKOS_FUNCTION int impl_thread_pool_rank() {
+    KOKKOS_IF_ON_HOST((return impl_thread_pool_rank_host();))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
+  }
 
   inline static unsigned impl_max_hardware_threads() {
     return impl_thread_pool_size(0);
diff --git a/packages/kokkos/core/src/Kokkos_Timer.hpp b/packages/kokkos/core/src/Kokkos_Timer.hpp
index 4fda4ec4d443972b280413d41c321059874d8e54..a3a0b32574ebd78dd89f72f426d5dca250e9c202 100644
--- a/packages/kokkos/core/src/Kokkos_Timer.hpp
+++ b/packages/kokkos/core/src/Kokkos_Timer.hpp
@@ -46,17 +46,48 @@
 #define KOKKOS_TIMER_HPP
 
 #include <Kokkos_Macros.hpp>
+// gcc 10.3.0 with CUDA doesn't support std::chrono,
+// see https://github.com/kokkos/kokkos/issues/4334
+#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU == 1030) && \
+    defined(KOKKOS_COMPILER_NVCC)
+#include <sys/time.h>
+#else
 #include <chrono>
+#endif
 
 namespace Kokkos {
 
 /** \brief  Time since construction */
 
+#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU == 1030) && \
+    defined(KOKKOS_COMPILER_NVCC)
+class Timer {
+ private:
+  struct timeval m_old;
+
+ public:
+  inline void reset() { gettimeofday(&m_old, nullptr); }
+
+  inline ~Timer() = default;
+
+  inline Timer() { reset(); }
+
+  Timer(const Timer&) = delete;
+  Timer& operator=(const Timer&) = delete;
+
+  inline double seconds() const {
+    struct timeval m_new;
+
+    gettimeofday(&m_new, nullptr);
+
+    return ((double)(m_new.tv_sec - m_old.tv_sec)) +
+           ((double)(m_new.tv_usec - m_old.tv_usec) * 1.0e-6);
+  }
+};
+#else
 class Timer {
  private:
   std::chrono::high_resolution_clock::time_point m_old;
-  Timer(const Timer&);
-  Timer& operator=(const Timer&);
 
  public:
   inline void reset() { m_old = std::chrono::high_resolution_clock::now(); }
@@ -65,14 +96,18 @@ class Timer {
 
   inline Timer() { reset(); }
 
+  Timer(const Timer&);
+  Timer& operator=(const Timer&);
+
   inline double seconds() const {
     std::chrono::high_resolution_clock::time_point m_new =
         std::chrono::high_resolution_clock::now();
-    return std::chrono::duration_cast<std::chrono::duration<double>>(m_new -
-                                                                     m_old)
+    return std::chrono::duration_cast<std::chrono::duration<double> >(m_new -
+                                                                      m_old)
         .count();
   }
 };
+#endif
 
 }  // namespace Kokkos
 
diff --git a/packages/kokkos/core/src/Kokkos_UniqueToken.hpp b/packages/kokkos/core/src/Kokkos_UniqueToken.hpp
index bce7e703f0bffaba7b9dcfa40c9566f35d9c31fa..c6c1e7cead68e9932205ef0646bd3305dd22864d 100644
--- a/packages/kokkos/core/src/Kokkos_UniqueToken.hpp
+++ b/packages/kokkos/core/src/Kokkos_UniqueToken.hpp
@@ -59,8 +59,8 @@ enum class UniqueTokenScope : int { Instance, Global };
 ///
 /// This object should behave like a ref-counted object, so that when the last
 /// instance is destroy resources are free if needed
-template <typename ExecutionSpace,
-          UniqueTokenScope = UniqueTokenScope::Instance>
+template <typename ExecutionSpace = Kokkos::DefaultExecutionSpace,
+          UniqueTokenScope        = UniqueTokenScope::Instance>
 class UniqueToken {
  public:
   using execution_space = ExecutionSpace;
diff --git a/packages/kokkos/core/src/Kokkos_View.hpp b/packages/kokkos/core/src/Kokkos_View.hpp
index b217cc4bc171a94ce3fdeea9ce6301c70c106da9..b8d33e30c02331f69382ee1ae024592a5d82b50b 100644
--- a/packages/kokkos/core/src/Kokkos_View.hpp
+++ b/packages/kokkos/core/src/Kokkos_View.hpp
@@ -109,13 +109,12 @@ void runtime_check_rank_device(const size_t dyn_rank, const bool is_void_spec,
   }
 }
 
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-KOKKOS_INLINE_FUNCTION
-void runtime_check_rank_host(const size_t dyn_rank, const bool is_void_spec,
-                             const size_t i0, const size_t i1, const size_t i2,
-                             const size_t i3, const size_t i4, const size_t i5,
-                             const size_t i6, const size_t i7,
-                             const std::string& label) {
+inline void runtime_check_rank_host(const size_t dyn_rank,
+                                    const bool is_void_spec, const size_t i0,
+                                    const size_t i1, const size_t i2,
+                                    const size_t i3, const size_t i4,
+                                    const size_t i5, const size_t i6,
+                                    const size_t i7, const std::string& label) {
   if (is_void_spec) {
     const size_t num_passed_args =
         count_valid_integers(i0, i1, i2, i3, i4, i5, i6, i7);
@@ -130,7 +129,6 @@ void runtime_check_rank_host(const size_t dyn_rank, const bool is_void_spec,
     }
   }
 }
-#endif
 
 } /* namespace Impl */
 } /* namespace Kokkos */
@@ -792,18 +790,20 @@ class View : public ViewTraits<DataType, Properties...> {
 
 #define KOKKOS_IMPL_SINK(ARG) ARG
 
-#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG)                          \
-  Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \
-                             typename traits::memory_space>::check();  \
+#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG)                             \
+  Kokkos::Impl::runtime_check_memory_access_violation<                    \
+      typename traits::memory_space>(                                     \
+      "Kokkos::View ERROR: attempt to access inaccessible memory space"); \
   Kokkos::Impl::view_verify_operator_bounds<typename traits::memory_space> ARG;
 
 #else
 
 #define KOKKOS_IMPL_SINK(ARG)
 
-#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG)                          \
-  Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \
-                             typename traits::memory_space>::check();
+#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG)          \
+  Kokkos::Impl::runtime_check_memory_access_violation< \
+      typename traits::memory_space>(                  \
+      "Kokkos::View ERROR: attempt to access inaccessible memory space");
 
 #endif
 
@@ -1605,28 +1605,16 @@ class View : public ViewTraits<DataType, Properties...> {
       : View(arg_prop,
              typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3,
                                            arg_N4, arg_N5, arg_N6, arg_N7)) {
-#ifdef KOKKOS_ENABLE_OPENMPTARGET
-    KOKKOS_IMPL_IF_ON_HOST
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-    else Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-#else
-    Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-
-#endif
+    KOKKOS_IF_ON_HOST(
+        (Impl::runtime_check_rank_host(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());))
+    KOKKOS_IF_ON_DEVICE(
+        (Impl::runtime_check_rank_device(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);))
   }
 
   template <class... P>
@@ -1645,28 +1633,16 @@ class View : public ViewTraits<DataType, Properties...> {
       : View(arg_prop,
              typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3,
                                            arg_N4, arg_N5, arg_N6, arg_N7)) {
-#ifdef KOKKOS_ENABLE_OPENMPTARGET
-    KOKKOS_IMPL_IF_ON_HOST
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-    else Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-#else
-    Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-
-#endif
+    KOKKOS_IF_ON_HOST(
+        (Impl::runtime_check_rank_host(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());))
+    KOKKOS_IF_ON_DEVICE(
+        (Impl::runtime_check_rank_device(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);))
   }
 
   // Allocate with label and layout
@@ -1699,28 +1675,16 @@ class View : public ViewTraits<DataType, Properties...> {
                   "Layout is not extent constructible. A layout object should "
                   "be passed too.\n");
 
-#ifdef KOKKOS_ENABLE_OPENMPTARGET
-    KOKKOS_IMPL_IF_ON_HOST
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-    else Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-#else
-    Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-
-#endif
+    KOKKOS_IF_ON_HOST(
+        (Impl::runtime_check_rank_host(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());))
+    KOKKOS_IF_ON_DEVICE(
+        (Impl::runtime_check_rank_device(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);))
   }
 
   // Construct view from ViewTracker and map
@@ -1775,28 +1739,16 @@ class View : public ViewTraits<DataType, Properties...> {
       : View(Impl::ViewCtorProp<pointer_type>(arg_ptr),
              typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3,
                                            arg_N4, arg_N5, arg_N6, arg_N7)) {
-#ifdef KOKKOS_ENABLE_OPENMPTARGET
-    KOKKOS_IMPL_IF_ON_HOST
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-    else Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-#else
-    Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-
-#endif
+    KOKKOS_IF_ON_HOST(
+        (Impl::runtime_check_rank_host(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());))
+    KOKKOS_IF_ON_DEVICE(
+        (Impl::runtime_check_rank_device(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);))
   }
 
   explicit KOKKOS_INLINE_FUNCTION View(
@@ -1865,28 +1817,16 @@ class View : public ViewTraits<DataType, Properties...> {
                      sizeof(typename traits::value_type)))),
              typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3,
                                            arg_N4, arg_N5, arg_N6, arg_N7)) {
-#ifdef KOKKOS_ENABLE_OPENMPTARGET
-    KOKKOS_IMPL_IF_ON_HOST
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-    else Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    Impl::runtime_check_rank_host(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());
-#else
-    Impl::runtime_check_rank_device(
-        traits::rank_dynamic,
-        std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1,
-        arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);
-
-#endif
+    KOKKOS_IF_ON_HOST(
+        (Impl::runtime_check_rank_host(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());))
+    KOKKOS_IF_ON_DEVICE(
+        (Impl::runtime_check_rank_device(
+             traits::rank_dynamic,
+             std::is_same<typename traits::specialize, void>::value, arg_N0,
+             arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);))
   }
 };
 
@@ -1899,6 +1839,43 @@ KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View<D, P...>& V) {
   return V.Rank;
 }  // Temporary until added to view
 
+namespace Impl {
+
+template <typename ValueType, unsigned int Rank>
+struct RankDataType {
+  using type = typename RankDataType<ValueType, Rank - 1>::type*;
+};
+
+template <typename ValueType>
+struct RankDataType<ValueType, 0> {
+  using type = ValueType;
+};
+
+template <unsigned N, typename... Args>
+std::enable_if_t<N == View<Args...>::Rank, View<Args...>> as_view_of_rank_n(
+    View<Args...> v) {
+  return v;
+}
+
+// Placeholder implementation to compile generic code for DynRankView; should
+// never be called
+template <unsigned N, typename T, typename... Args>
+std::enable_if_t<
+    N != View<T, Args...>::Rank,
+    View<typename RankDataType<typename View<T, Args...>::value_type, N>::type,
+         Args...>>
+as_view_of_rank_n(View<T, Args...>) {
+  Kokkos::Impl::throw_runtime_exception(
+      "Trying to get at a View of the wrong rank");
+  return {};
+}
+
+template <typename Function, typename... Args>
+void apply_to_view_of_static_rank(Function&& f, View<Args...> a) {
+  f(a);
+}
+
+}  // namespace Impl
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
@@ -2091,13 +2068,17 @@ KOKKOS_INLINE_FUNCTION DeducedCommonPropsType<Views...> common_view_alloc_prop(
 
 }  // namespace Kokkos
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
 namespace Kokkos {
 namespace Impl {
 
-using Kokkos::is_view;
+template <class T>
+using is_view KOKKOS_DEPRECATED_WITH_COMMENT("Use Kokkos::is_view instead!") =
+    Kokkos::is_view<T>;
 
 } /* namespace Impl */
 } /* namespace Kokkos */
+#endif
 
 #include <impl/Kokkos_ViewUniformType.hpp>
 #include <impl/Kokkos_Atomic_View.hpp>
diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
index 0d521479eef89121d30c33a41cebdfac4628646f..d2283d456f0883bd6cea5f0856a1abba95df11a3 100644
--- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
+++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
@@ -66,6 +66,7 @@ int g_openmp_hardware_max_threads = 1;
 __thread int t_openmp_hardware_id            = 0;
 __thread Impl::OpenMPExec *t_openmp_instance = nullptr;
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
 void OpenMPExec::validate_partition(const int nthreads, int &num_partitions,
                                     int &partition_size) {
   if (nthreads == 1) {
@@ -117,6 +118,7 @@ void OpenMPExec::validate_partition(const int nthreads, int &num_partitions,
     }
   }
 }
+#endif
 
 void OpenMPExec::verify_is_master(const char *const label) {
   if (!t_openmp_instance) {
@@ -312,10 +314,11 @@ void OpenMP::impl_initialize(int thread_count) {
     // g_openmp_hardware_max_threads to thread_count
     if (thread_count < 0) {
       thread_count = Impl::g_openmp_hardware_max_threads;
-    } else if (thread_count == 0 &&
-               Impl::g_openmp_hardware_max_threads != process_num_threads) {
-      Impl::g_openmp_hardware_max_threads = process_num_threads;
-      omp_set_num_threads(Impl::g_openmp_hardware_max_threads);
+    } else if (thread_count == 0) {
+      if (Impl::g_openmp_hardware_max_threads != process_num_threads) {
+        Impl::g_openmp_hardware_max_threads = process_num_threads;
+        omp_set_num_threads(Impl::g_openmp_hardware_max_threads);
+      }
     } else {
       if (Kokkos::show_warnings() && thread_count > process_num_threads) {
         printf(
diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp
index 1191e49cbe6ecd8d03069288062c88e38e6b8830..2f647af77eb98d1f5a6d161dc5731450b51c3090 100644
--- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp
+++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp
@@ -90,8 +90,11 @@ class OpenMPExec {
 
   void clear_thread_data();
 
-  static void validate_partition(const int nthreads, int& num_partitions,
-                                 int& partition_size);
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
+  KOKKOS_DEPRECATED static void validate_partition(const int nthreads,
+                                                   int& num_partitions,
+                                                   int& partition_size);
+#endif
 
  private:
   OpenMPExec(int arg_pool_size)
@@ -144,11 +147,10 @@ inline int OpenMP::impl_thread_pool_size() noexcept {
 
 KOKKOS_INLINE_FUNCTION
 int OpenMP::impl_thread_pool_rank() noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  return Impl::t_openmp_instance ? 0 : omp_get_thread_num();
-#else
-  return -1;
-#endif
+  KOKKOS_IF_ON_HOST(
+      (return Impl::t_openmp_instance ? 0 : omp_get_thread_num();))
+
+  KOKKOS_IF_ON_DEVICE((return -1;))
 }
 
 inline void OpenMP::impl_static_fence(OpenMP const& /**instance*/,
@@ -164,10 +166,11 @@ inline bool OpenMP::is_asynchronous(OpenMP const& /*instance*/) noexcept {
   return false;
 }
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
 template <typename F>
-void OpenMP::partition_master(F const& f, int num_partitions,
-                              int partition_size) {
-#if _OPENMP >= 201811
+KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions,
+                                                int partition_size) {
+#if _OPENMP >= 201511
   if (omp_get_max_active_levels() > 1) {
 #else
   if (omp_get_nested()) {
@@ -217,18 +220,19 @@ void OpenMP::partition_master(F const& f, int num_partitions,
     f(0, 1);
   }
 }
+#endif
 
 namespace Experimental {
 
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
 template <>
-class KOKKOS_DEPRECATED MasterLock<OpenMP> {
+class MasterLock<OpenMP> {
  public:
   void lock() { omp_set_lock(&m_lock); }
   void unlock() { omp_unset_lock(&m_lock); }
   bool try_lock() { return static_cast<bool>(omp_test_lock(&m_lock)); }
 
-  MasterLock() { omp_init_lock(&m_lock); }
+  KOKKOS_DEPRECATED MasterLock() { omp_init_lock(&m_lock); }
   ~MasterLock() { omp_destroy_lock(&m_lock); }
 
   MasterLock(MasterLock const&) = delete;
@@ -270,43 +274,41 @@ class UniqueToken<OpenMP, UniqueTokenScope::Instance> {
   /// \brief upper bound for acquired values, i.e. 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   int size() const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return m_count;
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST((return m_count;))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
   /// \brief acquire value such that 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   int acquire() const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    if (m_count >= ::Kokkos::OpenMP::impl_thread_pool_size())
-      return ::Kokkos::OpenMP::impl_thread_pool_rank();
-    const ::Kokkos::pair<int, int> result =
-        ::Kokkos::Impl::concurrent_bitset::acquire_bounded(
-            m_buffer, m_count, ::Kokkos::Impl::clock_tic() % m_count);
-
-    if (result.first < 0) {
-      ::Kokkos::abort(
-          "UniqueToken<OpenMP> failure to acquire tokens, no tokens available");
-    }
-
-    return result.first;
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST(
+        (if (m_count >= ::Kokkos::OpenMP::impl_thread_pool_size()) return ::
+             Kokkos::OpenMP::impl_thread_pool_rank();
+         const ::Kokkos::pair<int, int> result =
+             ::Kokkos::Impl::concurrent_bitset::acquire_bounded(
+                 m_buffer, m_count, ::Kokkos::Impl::clock_tic() % m_count);
+
+         if (result.first < 0) {
+           ::Kokkos::abort(
+               "UniqueToken<OpenMP> failure to acquire tokens, no tokens "
+               "available");
+         }
+
+         return result.first;))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
   /// \brief release a value acquired by generate
   KOKKOS_INLINE_FUNCTION
   void release(int i) const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    if (m_count < ::Kokkos::OpenMP::impl_thread_pool_size())
-      ::Kokkos::Impl::concurrent_bitset::release(m_buffer, i);
-#else
-    (void)i;
-#endif
+    KOKKOS_IF_ON_HOST(
+        (if (m_count < ::Kokkos::OpenMP::impl_thread_pool_size()) {
+          ::Kokkos::Impl::concurrent_bitset::release(m_buffer, i);
+        }))
+
+    KOKKOS_IF_ON_DEVICE(((void)i;))
   }
 };
 
@@ -324,21 +326,17 @@ class UniqueToken<OpenMP, UniqueTokenScope::Global> {
   /// \brief upper bound for acquired values, i.e. 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   int size() const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return Kokkos::Impl::g_openmp_hardware_max_threads;
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST((return Kokkos::Impl::g_openmp_hardware_max_threads;))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
   /// \brief acquire value such that 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   int acquire() const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return Kokkos::Impl::t_openmp_hardware_id;
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST((return Kokkos::Impl::t_openmp_hardware_id;))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
   /// \brief release a value acquired by generate
@@ -354,11 +352,9 @@ inline int OpenMP::impl_thread_pool_size(int depth) {
 
 KOKKOS_INLINE_FUNCTION
 int OpenMP::impl_hardware_thread_id() noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  return Impl::t_openmp_hardware_id;
-#else
-  return -1;
-#endif
+  KOKKOS_IF_ON_HOST((return Impl::t_openmp_hardware_id;))
+
+  KOKKOS_IF_ON_DEVICE((return -1;))
 }
 
 inline int OpenMP::impl_max_hardware_threads() noexcept {
diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp
index 2fc522780a495971a1d6455e19260bad0b422207..764dd906545cf607c1264ff7830db4936bdc4f65 100644
--- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp
+++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp
@@ -1041,7 +1041,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
   inline void execute() const {
     enum { is_dynamic = std::is_same<SchedTag, Kokkos::Dynamic>::value };
 
-    if (m_policy.league_size() * m_policy.team_size() == 0) {
+    if (m_policy.league_size() == 0 || m_policy.team_size() == 0) {
       if (m_result_ptr) {
         ValueInit::init(ReducerConditional::select(m_functor, m_reducer),
                         m_result_ptr);
diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
index b99b0017ca17df6462f0ea2b03b65d47121e47d3..c95951a1119272cac3e35382a3224adcfe25c24c 100644
--- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
+++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
@@ -121,7 +121,8 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
 #endif
           reinterpret_cast<SharedAllocationHeader *>(arg_space.allocate(
               sizeof(SharedAllocationHeader) + arg_alloc_size)),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_space(arg_space) {
   SharedAllocationHeader header;
 
@@ -131,6 +132,10 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
   // DeepCopy
   Kokkos::Impl::DeepCopy<Experimental::OpenMPTargetSpace, HostSpace>(
       RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
+  Kokkos::fence(
+      "SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, "
+      "void>::SharedAllocationRecord(): fence after copying header from "
+      "HostSpace");
 }
 
 //----------------------------------------------------------------------------
diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp
index ccfc756213695df6479634a67405d7d89308dbb8..9d0507847129d17d0a33962bacdfaefacb95ed1e 100644
--- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp
+++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp
@@ -408,6 +408,316 @@ struct OpenMPTargetReducerWrapper<MinMaxLoc<Scalar, Index, Space>> {
     val.min_loc = reduction_identity<index_type>::min();
   }
 };
+
+//
+// specialize for MaxFirstLoc
+//
+template <class Scalar, class Index, class Space>
+struct OpenMPTargetReducerWrapper<MaxFirstLoc<Scalar, Index, Space>> {
+ private:
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+  using index_type  = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using value_type = ValLocScalar<scalar_type, index_type>;
+
+// WORKAROUND OPENMPTARGET
+// This pragma omp declare target should not be necessary, but Intel compiler
+// fails without it
+#pragma omp declare target
+  KOKKOS_INLINE_FUNCTION
+  static void join(value_type& dest, const value_type& src) {
+    if (dest.val < src.val) {
+      dest = src;
+    } else if (!(src.val < dest.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type& dest, const volatile value_type& src) {
+    if (dest.val < src.val) {
+      dest = src;
+    } else if (!(src.val < dest.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(value_type& val) {
+    val.val = reduction_identity<scalar_type>::max();
+    val.loc = reduction_identity<index_type>::min();
+  }
+#pragma omp end declare target
+};
+
+//
+// specialize for MinFirstLoc
+//
+template <class Scalar, class Index, class Space>
+struct OpenMPTargetReducerWrapper<MinFirstLoc<Scalar, Index, Space>> {
+ private:
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+  using index_type  = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using value_type = ValLocScalar<scalar_type, index_type>;
+
+// WORKAROUND OPENMPTARGET
+// This pragma omp declare target should not be necessary, but Intel compiler
+// fails without it
+#pragma omp declare target
+  KOKKOS_INLINE_FUNCTION
+  static void join(value_type& dest, const value_type& src) {
+    if (src.val < dest.val) {
+      dest = src;
+    } else if (!(dest.val < src.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type& dest, const volatile value_type& src) {
+    if (src.val < dest.val) {
+      dest = src;
+    } else if (!(dest.val < src.val)) {
+      dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(value_type& val) {
+    val.val = reduction_identity<scalar_type>::min();
+    val.loc = reduction_identity<index_type>::min();
+  }
+#pragma omp end declare target
+};
+
+//
+// specialize for MinMaxFirstLastLoc
+//
+template <class Scalar, class Index, class Space>
+struct OpenMPTargetReducerWrapper<MinMaxFirstLastLoc<Scalar, Index, Space>> {
+ private:
+  using scalar_type = typename std::remove_cv<Scalar>::type;
+  using index_type  = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using value_type = MinMaxLocScalar<scalar_type, index_type>;
+
+// WORKAROUND OPENMPTARGET
+// This pragma omp declare target should not be necessary, but Intel compiler
+// fails without it
+#pragma omp declare target
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  static void join(value_type& dest, const value_type& src) {
+    if (src.min_val < dest.min_val) {
+      dest.min_val = src.min_val;
+      dest.min_loc = src.min_loc;
+    } else if (!(dest.min_val < src.min_val)) {
+      dest.min_loc = (src.min_loc < dest.min_loc) ? src.min_loc : dest.min_loc;
+    }
+
+    if (dest.max_val < src.max_val) {
+      dest.max_val = src.max_val;
+      dest.max_loc = src.max_loc;
+    } else if (!(src.max_val < dest.max_val)) {
+      dest.max_loc = (src.max_loc > dest.max_loc) ? src.max_loc : dest.max_loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type& dest, const volatile value_type& src) {
+    if (src.min_val < dest.min_val) {
+      dest.min_val = src.min_val;
+      dest.min_loc = src.min_loc;
+    } else if (!(dest.min_val < src.min_val)) {
+      dest.min_loc = (src.min_loc < dest.min_loc) ? src.min_loc : dest.min_loc;
+    }
+
+    if (dest.max_val < src.max_val) {
+      dest.max_val = src.max_val;
+      dest.max_loc = src.max_loc;
+    } else if (!(src.max_val < dest.max_val)) {
+      dest.max_loc = (src.max_loc > dest.max_loc) ? src.max_loc : dest.max_loc;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(value_type& val) {
+    val.max_val = reduction_identity<scalar_type>::max();
+    val.min_val = reduction_identity<scalar_type>::min();
+    val.max_loc = reduction_identity<index_type>::max();
+    val.min_loc = reduction_identity<index_type>::min();
+  }
+#pragma omp end declare target
+};
+
+//
+// specialize for FirstLoc
+//
+template <class Index, class Space>
+struct OpenMPTargetReducerWrapper<FirstLoc<Index, Space>> {
+ private:
+  using index_type = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using value_type = FirstLocScalar<index_type>;
+
+// WORKAROUND OPENMPTARGET
+// This pragma omp declare target should not be necessary, but Intel compiler
+// fails without it
+#pragma omp declare target
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  static void join(value_type& dest, const value_type& src) {
+    dest.min_loc_true = (src.min_loc_true < dest.min_loc_true)
+                            ? src.min_loc_true
+                            : dest.min_loc_true;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type& dest, const volatile value_type& src) {
+    dest.min_loc_true = (src.min_loc_true < dest.min_loc_true)
+                            ? src.min_loc_true
+                            : dest.min_loc_true;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(value_type& val) {
+    val.min_loc_true = reduction_identity<index_type>::min();
+  }
+#pragma omp end declare target
+};
+
+//
+// specialize for LastLoc
+//
+template <class Index, class Space>
+struct OpenMPTargetReducerWrapper<LastLoc<Index, Space>> {
+ private:
+  using index_type = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using value_type = LastLocScalar<index_type>;
+
+// WORKAROUND OPENMPTARGET
+// This pragma omp declare target should not be necessary, but Intel compiler
+// fails without it
+#pragma omp declare target
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  static void join(value_type& dest, const value_type& src) {
+    dest.max_loc_true = (src.max_loc_true > dest.max_loc_true)
+                            ? src.max_loc_true
+                            : dest.max_loc_true;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type& dest, const volatile value_type& src) {
+    dest.max_loc_true = (src.max_loc_true > dest.max_loc_true)
+                            ? src.max_loc_true
+                            : dest.max_loc_true;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(value_type& val) {
+    val.max_loc_true = reduction_identity<index_type>::max();
+  }
+#pragma omp end declare target
+};
+
+//
+// specialize for StdIsPartitioned
+//
+template <class Index, class Space>
+struct OpenMPTargetReducerWrapper<StdIsPartitioned<Index, Space>> {
+ private:
+  using index_type = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using value_type = StdIsPartScalar<index_type>;
+
+// WORKAROUND OPENMPTARGET
+// This pragma omp declare target should not be necessary, but Intel compiler
+// fails without it
+#pragma omp declare target
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  static void join(value_type& dest, const value_type& src) {
+    dest.max_loc_true = (dest.max_loc_true < src.max_loc_true)
+                            ? src.max_loc_true
+                            : dest.max_loc_true;
+
+    dest.min_loc_false = (dest.min_loc_false < src.min_loc_false)
+                             ? dest.min_loc_false
+                             : src.min_loc_false;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type& dest, const volatile value_type& src) {
+    dest.max_loc_true = (dest.max_loc_true < src.max_loc_true)
+                            ? src.max_loc_true
+                            : dest.max_loc_true;
+
+    dest.min_loc_false = (dest.min_loc_false < src.min_loc_false)
+                             ? dest.min_loc_false
+                             : src.min_loc_false;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(value_type& val) {
+    val.max_loc_true  = ::Kokkos::reduction_identity<index_type>::max();
+    val.min_loc_false = ::Kokkos::reduction_identity<index_type>::min();
+  }
+#pragma omp end declare target
+};
+
+//
+// specialize for StdPartitionPoint
+//
+template <class Index, class Space>
+struct OpenMPTargetReducerWrapper<StdPartitionPoint<Index, Space>> {
+ private:
+  using index_type = typename std::remove_cv<Index>::type;
+
+ public:
+  // Required
+  using value_type = StdPartPointScalar<index_type>;
+
+// WORKAROUND OPENMPTARGET
+// This pragma omp declare target should not be necessary, but Intel compiler
+// fails without it
+#pragma omp declare target
+  // Required
+  KOKKOS_INLINE_FUNCTION
+  static void join(value_type& dest, const value_type& src) {
+    dest.min_loc_false = (dest.min_loc_false < src.min_loc_false)
+                             ? dest.min_loc_false
+                             : src.min_loc_false;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type& dest, const volatile value_type& src) {
+    dest.min_loc_false = (dest.min_loc_false < src.min_loc_false)
+                             ? dest.min_loc_false
+                             : src.min_loc_false;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(value_type& val) {
+    val.min_loc_false = ::Kokkos::reduction_identity<index_type>::min();
+  }
+#pragma omp end declare target
+};
+
 /*
 template<class ReducerType>
 class OpenMPTargetReducerWrapper {
@@ -835,8 +1145,9 @@ class TeamPolicyInternal<Kokkos::Experimental::OpenMPTarget, Properties...>
         m_chunk_size(p.m_chunk_size) {}
 
   /** \brief  Specify league size, request team size */
-  TeamPolicyInternal(typename traits::execution_space&, int league_size_request,
-                     int team_size_request, int vector_length_request = 1)
+  TeamPolicyInternal(const typename traits::execution_space&,
+                     int league_size_request, int team_size_request,
+                     int vector_length_request = 1)
       : m_team_scratch_size{0, 0},
         m_thread_scratch_size{0, 0},
         m_tune_team_size(false),
@@ -845,7 +1156,8 @@ class TeamPolicyInternal<Kokkos::Experimental::OpenMPTarget, Properties...>
     init(league_size_request, team_size_request, vector_length_request);
   }
 
-  TeamPolicyInternal(typename traits::execution_space&, int league_size_request,
+  TeamPolicyInternal(const typename traits::execution_space&,
+                     int league_size_request,
                      const Kokkos::AUTO_t& /* team_size_request */
                      ,
                      int vector_length_request = 1)
@@ -858,7 +1170,8 @@ class TeamPolicyInternal<Kokkos::Experimental::OpenMPTarget, Properties...>
          vector_length_request);
   }
 
-  TeamPolicyInternal(typename traits::execution_space&, int league_size_request,
+  TeamPolicyInternal(const typename traits::execution_space&,
+                     int league_size_request,
                      const Kokkos::AUTO_t& /* team_size_request */
                      ,
                      const Kokkos::AUTO_t& /* vector_length_request */)
@@ -869,8 +1182,8 @@ class TeamPolicyInternal<Kokkos::Experimental::OpenMPTarget, Properties...>
         m_chunk_size(0) {
     init(league_size_request, default_team_size, 1);
   }
-  TeamPolicyInternal(typename traits::execution_space&, int league_size_request,
-                     int team_size_request,
+  TeamPolicyInternal(const typename traits::execution_space&,
+                     int league_size_request, int team_size_request,
                      const Kokkos::AUTO_t& /* vector_length_request */)
       : m_team_scratch_size{0, 0},
         m_thread_scratch_size{0, 0},
diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp
index 08a3109408bd88bcfa04f3fe31d09f5a6e1cff2c..ab38dea024caf64d498de18c4ff19790ddf4f74a 100644
--- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp
+++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp
@@ -168,16 +168,22 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>,
     const auto begin = p.begin();
     const auto end   = p.end();
 
-    if (end <= begin) return;
+    ValueType result;
+    OpenMPTargetReducerWrapper<ReducerType>::init(result);
 
-    ValueType result = ValueType();
+    // Initialize and copy back the result even if it is a zero length
+    // reduction.
+    if (end <= begin) {
+      ParReduceCommon::memcpy_result(result_ptr, &result, sizeof(ValueType),
+                                     ptr_on_device);
+      return;
+    }
 
 #pragma omp declare reduction(                                         \
     custom:ValueType                                                   \
     : OpenMPTargetReducerWrapper <ReducerType>::join(omp_out, omp_in)) \
     initializer(OpenMPTargetReducerWrapper <ReducerType>::init(omp_priv))
 
-    OpenMPTargetReducerWrapper<ReducerType>::init(result);
 #pragma omp target teams distribute parallel for map(to                    \
                                                      : f) reduction(custom \
                                                                     : result)
@@ -203,12 +209,17 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>,
     const auto begin = p.begin();
     const auto end   = p.end();
 
-    if (end <= begin) return;
-
-    ValueType result = ValueType();
-
     // Enter the loop if the reduction is on a scalar type.
     if constexpr (NumReductions == 1) {
+      ValueType result = ValueType();
+
+      // Initialize and copy back the result even if it is a zero length
+      // reduction.
+      if (end <= begin) {
+        ParReduceCommon::memcpy_result(result_ptr, &result, sizeof(ValueType),
+                                       ptr_on_device);
+        return;
+      }
       // Case where reduction is on a native data type.
       if constexpr (std::is_arithmetic<ValueType>::value) {
 #pragma omp target teams distribute parallel for \
@@ -233,7 +244,20 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>,
             f(TagType(), i, result);
           }
       }
+
+      ParReduceCommon::memcpy_result(result_ptr, &result, sizeof(ValueType),
+                                     ptr_on_device);
     } else {
+      ValueType result[NumReductions] = {};
+
+      // Initialize and copy back the result even if it is a zero length
+      // reduction.
+      if (end <= begin) {
+        ParReduceCommon::memcpy_result(result_ptr, result,
+                                       NumReductions * sizeof(ValueType),
+                                       ptr_on_device);
+        return;
+      }
 #pragma omp target teams distribute parallel for map(to:f) reduction(+:result[:NumReductions])
       for (auto i = begin; i < end; ++i) {
         if constexpr (std::is_same<TagType, void>::value) {
@@ -242,10 +266,10 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>,
           f(TagType(), i, result);
         }
       }
-    }
 
-    ParReduceCommon::memcpy_result(result_ptr, &result, sizeof(ValueType),
-                                   ptr_on_device);
+      ParReduceCommon::memcpy_result(
+          result_ptr, result, NumReductions * sizeof(ValueType), ptr_on_device);
+    }
   }
 
   static void execute_init_join(const FunctorType& f, const PolicyType& p,
@@ -896,8 +920,6 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>,
                                      shmem_size_L0, shmem_size_L1);
     void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr();
 
-    ValueType result = ValueType();
-
     // Maximum active teams possible.
     int max_active_teams = OpenMPTargetExec::MAX_ACTIVE_THREADS / team_size;
     const auto nteams =
@@ -905,6 +927,8 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>,
 
     // Case where the number of reduction items is 1.
     if constexpr (NumReductions == 1) {
+      ValueType result = ValueType();
+
       // Case where reduction is on a native data type.
       if constexpr (std::is_arithmetic<ValueType>::value) {
 #pragma omp target teams num_teams(nteams) thread_limit(team_size) map(to   \
@@ -958,7 +982,12 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>,
             Kokkos::abort("`num_teams` clause was not respected.\n");
         }
       }
+
+      // Copy results back to device if `parallel_reduce` is on a device view.
+      ParReduceCommon::memcpy_result(result_ptr, &result, sizeof(ValueType),
+                                     ptr_on_device);
     } else {
+      ValueType result[NumReductions] = {};
       // Case where the reduction is on an array.
 #pragma omp target teams num_teams(nteams) thread_limit(team_size) map(to   \
                                                                        : f) \
@@ -983,43 +1012,39 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>,
         } else
           Kokkos::abort("`num_teams` clause was not respected.\n");
       }
-    }
 
-    // Copy results back to device if `parallel_reduce` is on a device view.
-    ParReduceCommon::memcpy_result(result_ptr, &result, sizeof(ValueType),
-                                   ptr_on_device);
+      // Copy results back to device if `parallel_reduce` is on a device view.
+      ParReduceCommon::memcpy_result(
+          result_ptr, result, NumReductions * sizeof(ValueType), ptr_on_device);
+    }
   }
 
   // FIXME_OPENMPTARGET : This routine is a copy from `parallel_reduce` over
   // RangePolicy. Need a new implementation.
   static void execute_init_join(const FunctorType& f, const PolicyType& p,
                                 PointerType ptr, const bool ptr_on_device) {
-    const auto begin      = p.begin();
-    const auto end        = p.end();
     constexpr int HasInit = ReduceFunctorHasInit<FunctorType>::value;
 
-    const auto size = end - begin;
-
     const int league_size   = p.league_size();
     const int team_size     = p.team_size();
     const int vector_length = p.impl_vector_length();
 
+    auto begin = 0;
+    auto end   = league_size * team_size + team_size * vector_length;
+
     const size_t shmem_size_L0 = p.scratch_size(0, team_size);
     const size_t shmem_size_L1 = p.scratch_size(1, team_size);
 
     // FIXME_OPENMPTARGET: This would oversubscribe scratch memory since we are
     // already using the available scratch memory to create temporaries for each
     // thread.
-    if constexpr ((shmem_size_L0 + shmem_size_L1) > 0) {
+    if ((shmem_size_L0 + shmem_size_L1) > 0) {
       Kokkos::abort(
           "OpenMPTarget: Scratch memory is not supported in `parallel_reduce` "
           "over functors with init/join.");
     }
 
-    // Maximum active teams possible.
-    int max_active_teams = OpenMPTargetExec::MAX_ACTIVE_THREADS / team_size;
-    const auto nteams =
-        league_size < max_active_teams ? league_size : max_active_teams;
+    const auto nteams = league_size;
 
     // Number of elements in the reduction
     const auto value_count =
@@ -1100,7 +1125,7 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>,
     is_device_ptr(scratch_ptr)
       for (int i = 0; i < nteams - tree_neighbor_offset;
            i += 2 * tree_neighbor_offset) {
-        ValueType* team_scratch = scratch_ptr;
+        ValueType* team_scratch = static_cast<ValueType*>(scratch_ptr);
         const int team_offset   = team_size * value_count;
         ValueJoin::join(
             f, &team_scratch[i * team_offset],
diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
index 0e71a239caf343d77f6ed05ff02bb2e45ca64efd..a46a64ea640f34d811751981b07e44ee71386230 100644
--- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
+++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
@@ -95,8 +95,6 @@ TaskExec<Kokkos::Experimental::OpenMPTarget>::TaskExec(
   Kokkos::memory_fence();
 }
 
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-
 void TaskExec<Kokkos::Experimental::OpenMPTarget>::team_barrier_impl() const {
   if (m_team_exec->scratch_reduce_size() < int(2 * sizeof(int64_t))) {
     Kokkos::abort("TaskQueue<OpenMPTarget> scratch_reduce memory too small");
@@ -125,8 +123,6 @@ void TaskExec<Kokkos::Experimental::OpenMPTarget>::team_barrier_impl() const {
   }
 }
 
-#endif
-
 //----------------------------------------------------------------------------
 
 void TaskQueueSpecialization<Kokkos::Experimental::OpenMPTarget>::execute(
diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp
index c5959a0ad7f50a71074ef03f2def9c4334510eca..f7ba01dffecb950de6320e1028d2c8d36744e089 100644
--- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp
+++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp
@@ -111,27 +111,27 @@ class TaskExec<Kokkos::Experimental::OpenMPTarget> {
   void team_barrier_impl() const;
 
  public:
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  void* team_shared() const {
-    return m_team_exec ? m_team_exec->scratch_thread() : nullptr;
+  KOKKOS_FUNCTION void* team_shared() const {
+    KOKKOS_IF_ON_HOST(
+        (return m_team_exec ? m_team_exec->scratch_thread() : nullptr;))
+
+    KOKKOS_IF_ON_DEVICE((return nullptr;))
   }
 
-  int team_shared_size() const {
-    return m_team_exec ? m_team_exec->scratch_thread_size() : 0;
+  KOKKOS_FUNCTION int team_shared_size() const {
+    KOKKOS_IF_ON_HOST(
+        (return m_team_exec ? m_team_exec->scratch_thread_size() : 0;))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
   /**\brief  Whole team enters this function call
    *         before any teeam member returns from
    *         this function call.
    */
-  void team_barrier() const {
-    if (1 < m_team_size) team_barrier_impl();
+  KOKKOS_FUNCTION void team_barrier() const {
+    KOKKOS_IF_ON_HOST((if (1 < m_team_size) { team_barrier_impl(); }))
   }
-#else
-  KOKKOS_INLINE_FUNCTION void team_barrier() const {}
-  KOKKOS_INLINE_FUNCTION void* team_shared() const { return 0; }
-  KOKKOS_INLINE_FUNCTION int team_shared_size() const { return 0; }
-#endif
 
   KOKKOS_INLINE_FUNCTION
   int team_rank() const { return m_team_rank; }
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp
index 18d33317a29819274037085b6a69e9239797f1a8..48f6b74dc1e280bcf62de6f5cf715acb0643d571 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp
@@ -131,7 +131,7 @@ void SYCL::impl_static_fence(const std::string& name) {
           GlobalDeviceSynchronization,
       [&]() {
         // guard accessing all_queues
-        std::lock_guard<std::mutex> lock(Impl::SYCLInternal::mutex);
+        std::scoped_lock lock(Impl::SYCLInternal::mutex);
         for (auto& queue : Impl::SYCLInternal::all_queues) {
           try {
             (*queue)->wait_and_throw();
@@ -268,9 +268,6 @@ std::ostream& SYCL::impl_sycl_info(std::ostream& os,
             << "\nVendor: " << device.get_info<device::vendor>()
             << "\nProfile: " << device.get_info<device::profile>()
             << "\nVersion: " << device.get_info<device::version>()
-            << "\nExtensions: "
-            << Container<std::vector<std::string>>(
-                   device.get_info<device::extensions>())
             << "\nPrintf Buffer Size: "
             << device.get_info<device::printf_buffer_size>()
             << "\nPreferred Interop User Sync: "
@@ -287,7 +284,11 @@ int g_sycl_space_factory_initialized =
     Kokkos::Impl::initialize_space_factory<SYCLSpaceInitializer>("170_SYCL");
 
 void SYCLSpaceInitializer::initialize(const InitArguments& args) {
-  int use_gpu = Kokkos::Impl::get_gpu(args);
+  // If there are no GPUs return whatever else we can run on if no specific GPU
+  // is requested.
+  const auto num_gpus =
+      sycl::device::get_devices(sycl::info::device_type::gpu).size();
+  int use_gpu = num_gpus == 0 ? args.device_id : Kokkos::Impl::get_gpu(args);
 
   if (std::is_same<Kokkos::Experimental::SYCL,
                    Kokkos::DefaultExecutionSpace>::value ||
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Abort.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Abort.hpp
index 13d6dc1a4a705421a05ce3f86e28f376de0ac41b..e376f012f73acdb7ddd4875bf23e4f7dfcf385ba 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Abort.hpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Abort.hpp
@@ -47,12 +47,19 @@
 
 #include <Kokkos_Macros.hpp>
 #if defined(KOKKOS_ENABLE_SYCL)
+#include <CL/sycl.hpp>
 
 namespace Kokkos {
 namespace Impl {
 
-inline void sycl_abort(char const *msg) {
+inline void sycl_abort(char const* msg) {
+#ifdef NDEBUG
   KOKKOS_IMPL_DO_NOT_USE_PRINTF("Aborting with message %s.\n", msg);
+#else
+  // Choosing "" here causes problems but a single whitespace character works.
+  const char* empty = " ";
+  __assert_fail(msg, empty, 0, empty);
+#endif
 }
 
 }  // namespace Impl
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp
index 3eeab5636342031955920c81e807b218d662f3b8..160f6068482b0884abdef061a8a8f83fb018b2db 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp
@@ -63,8 +63,8 @@ struct ZeroMemset<Kokkos::Experimental::SYCL, DT, DP...> {
     auto event = exec_space.impl_internal_space_instance()->m_queue->memset(
         dst.data(), 0,
         dst.size() * sizeof(typename View<DT, DP...>::value_type));
-    exec_space.impl_internal_space_instance()->m_queue->submit_barrier(
-        std::vector<sycl::event>{event});
+    exec_space.impl_internal_space_instance()
+        ->m_queue->ext_oneapi_submit_barrier(std::vector<sycl::event>{event});
   }
 
   ZeroMemset(const View<DT, DP...>& dst,
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Half_Conversion.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Half_Conversion.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3adbb16268e35999458d6f5c2db7fbe3e79ea6f3
--- /dev/null
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Half_Conversion.hpp
@@ -0,0 +1,159 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.5
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_SYCL_HALF_HPP_
+#define KOKKOS_SYCL_HALF_HPP_
+
+#ifdef KOKKOS_IMPL_SYCL_HALF_TYPE_DEFINED
+
+#include <Kokkos_Half.hpp>
+#include <Kokkos_NumericTraits.hpp>  // reduction_identity
+
+namespace Kokkos {
+namespace Experimental {
+
+/************************** half conversions **********************************/
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(half_t val) { return val; }
+
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(float val) { return half_t::impl_type(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(double val) { return half_t::impl_type(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(short val) { return half_t::impl_type(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned short val) { return half_t::impl_type(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(int val) { return half_t::impl_type(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned int val) { return half_t::impl_type(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long long val) { return half_t::impl_type(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long long val) { return half_t::impl_type(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(long val) { return half_t::impl_type(val); }
+KOKKOS_INLINE_FUNCTION
+half_t cast_to_half(unsigned long val) { return half_t::impl_type(val); }
+
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, float>::value, T>
+cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, double>::value, T>
+cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T>
+cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned short>::value, T>
+    cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T>
+cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned int>::value, T>
+cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T>
+cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long long>::value, T>
+    cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long>::value, T>
+cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+template <class T>
+KOKKOS_INLINE_FUNCTION
+    std::enable_if_t<std::is_same<T, unsigned long>::value, T>
+    cast_from_half(half_t val) {
+  return half_t::impl_type(val);
+}
+}  // namespace Experimental
+
+template <>
+struct reduction_identity<Kokkos::Experimental::half_t> {
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static Kokkos::Experimental::half_t
+  sum() noexcept {
+    return Kokkos::Experimental::half_t::impl_type(0.0F);
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static Kokkos::Experimental::half_t
+  prod() noexcept {
+    return Kokkos::Experimental::half_t::impl_type(1.0F);
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static Kokkos::Experimental::half_t
+  max() noexcept {
+    return std::numeric_limits<
+        Kokkos::Experimental::half_t::impl_type>::lowest();
+  }
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static Kokkos::Experimental::half_t
+  min() noexcept {
+    return std::numeric_limits<Kokkos::Experimental::half_t::impl_type>::max();
+  }
+};
+
+}  // namespace Kokkos
+#endif  // KOKKOS_IMPL_SYCL_HALF_TYPE_DEFINED
+#endif
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Half_Impl_Type.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Half_Impl_Type.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..cf7d513b990e0e82e51939ca6b333b67610e4314
--- /dev/null
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Half_Impl_Type.hpp
@@ -0,0 +1,67 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.5
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_SYCL_HALF_IMPL_TYPE_HPP_
+#define KOKKOS_SYCL_HALF_IMPL_TYPE_HPP_
+
+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_ENABLE_SYCL
+
+#include <CL/sycl.hpp>
+
+#ifndef KOKKOS_IMPL_HALF_TYPE_DEFINED
+// Make sure no one else tries to define half_t
+#define KOKKOS_IMPL_HALF_TYPE_DEFINED
+#define KOKKOS_IMPL_SYCL_HALF_TYPE_DEFINED
+
+namespace Kokkos {
+namespace Impl {
+struct half_impl_t {
+  using type = sycl::half;
+};
+}  // namespace Impl
+}  // namespace Kokkos
+#endif  // KOKKOS_IMPL_HALF_TYPE_DEFINED
+#endif  // KOKKOS_ENABLE_SYCL
+#endif
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp
index 816b42038ed0bb1605d005375bd39d2de4e3d69d..0cf5a95d8a6ae31488b5849119f7bb1ef1cb16ad 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp
@@ -51,9 +51,19 @@ namespace Impl {
 std::vector<std::optional<sycl::queue>*> SYCLInternal::all_queues;
 std::mutex SYCLInternal::mutex;
 
+Kokkos::View<uint32_t*, SYCLDeviceUSMSpace> sycl_global_unique_token_locks(
+    bool deallocate) {
+  static Kokkos::View<uint32_t*, SYCLDeviceUSMSpace> locks =
+      Kokkos::View<uint32_t*, SYCLDeviceUSMSpace>();
+  if (!deallocate && locks.extent(0) == 0)
+    locks = Kokkos::View<uint32_t*, SYCLDeviceUSMSpace>(
+        "Kokkos::UniqueToken<SYCL>::m_locks", SYCL().concurrency());
+  if (deallocate) locks = Kokkos::View<uint32_t*, SYCLDeviceUSMSpace>();
+  return locks;
+}
+
 SYCLInternal::~SYCLInternal() {
-  if (!was_finalized || m_scratchSpace || m_scratchFlags ||
-      m_scratchConcurrentBitset) {
+  if (!was_finalized || m_scratchSpace || m_scratchFlags) {
     std::cerr << "Kokkos::Experimental::SYCL ERROR: Failed to call "
                  "Kokkos::Experimental::SYCL::finalize()"
               << std::endl;
@@ -88,7 +98,11 @@ void SYCLInternal::initialize(const sycl::device& d) {
       Kokkos::Impl::throw_runtime_exception(
           "There was an asynchronous SYCL error!\n");
   };
-  initialize(sycl::queue{d, exception_handler});
+  // FIXME_SYCL using an in-order queue here should not be necessary since we
+  // are using submit_barrier for managing kernel dependencies but this seems to
+  // be required as a hot fix for now.
+  initialize(
+      sycl::queue{d, exception_handler, sycl::property::queue::in_order()});
 }
 
 // FIXME_SYCL
@@ -111,7 +125,7 @@ void SYCLInternal::initialize(const sycl::queue& q) {
     m_queue = q;
     // guard pushing to all_queues
     {
-      std::lock_guard<std::mutex> lock(mutex);
+      std::scoped_lock lock(mutex);
       all_queues.push_back(&m_queue);
     }
     const sycl::device& d = m_queue->get_device();
@@ -135,19 +149,15 @@ void SYCLInternal::initialize(const sycl::queue& q) {
                            "Kokkos::Experimental::SYCL::InternalScratchBitset",
                            sizeof(uint32_t) * buffer_bound);
       Record::increment(r);
-      m_scratchConcurrentBitset = reinterpret_cast<uint32_t*>(r->data());
-      auto event                = m_queue->memset(m_scratchConcurrentBitset, 0,
-                                   sizeof(uint32_t) * buffer_bound);
-      fence(event,
-            "Kokkos::Experimental::SYCLInternal::initialize: fence after "
-            "initializing m_scratchConcurrentBitset",
-            m_instance_id);
     }
 
     m_maxShmemPerBlock =
         d.template get_info<sycl::info::device::local_mem_size>();
-    m_indirectKernelMem.reset(*m_queue, m_instance_id);
-    m_indirectReducerMem.reset(*m_queue, m_instance_id);
+
+    for (auto& usm_mem : m_indirectKernelMem) {
+      usm_mem.reset(*m_queue, m_instance_id);
+    }
+
   } else {
     std::ostringstream msg;
     msg << "Kokkos::Experimental::SYCL::initialize(...) FAILED";
@@ -189,6 +199,10 @@ void SYCLInternal::finalize() {
                       m_instance_id);
   was_finalized = true;
 
+  // The global_unique_token_locks array is static and should only be
+  // deallocated once by the defualt instance
+  if (this == &singleton()) Impl::sycl_global_unique_token_locks(true);
+
   using RecordSYCL = Kokkos::Impl::SharedAllocationRecord<SYCLDeviceUSMSpace>;
   if (nullptr != m_scratchSpace)
     RecordSYCL::decrement(RecordSYCL::get_record(m_scratchSpace));
@@ -200,27 +214,22 @@ void SYCLInternal::finalize() {
   m_scratchFlagsCount = 0;
   m_scratchFlags      = nullptr;
 
-  RecordSYCL::decrement(RecordSYCL::get_record(m_scratchConcurrentBitset));
-  m_scratchConcurrentBitset = nullptr;
-
   if (m_team_scratch_current_size > 0)
     Kokkos::kokkos_free<Kokkos::Experimental::SYCLDeviceUSMSpace>(
         m_team_scratch_ptr);
   m_team_scratch_current_size = 0;
   m_team_scratch_ptr          = nullptr;
 
-  m_indirectKernelMem.reset();
-  m_indirectReducerMem.reset();
+  for (auto& usm_mem : m_indirectKernelMem) usm_mem.reset();
   // guard erasing from all_queues
   {
-    std::lock_guard<std::mutex> lock(mutex);
+    std::scoped_lock lock(mutex);
     all_queues.erase(std::find(all_queues.begin(), all_queues.end(), &m_queue));
   }
   m_queue.reset();
 }
 
-void* SYCLInternal::scratch_space(
-    const Kokkos::Experimental::SYCL::size_type size) {
+void* SYCLInternal::scratch_space(const std::size_t size) {
   const size_type sizeScratchGrain =
       sizeof(Kokkos::Experimental::SYCL::size_type);
   if (verify_is_initialized("scratch_space") &&
@@ -246,8 +255,7 @@ void* SYCLInternal::scratch_space(
   return m_scratchSpace;
 }
 
-void* SYCLInternal::scratch_flags(
-    const Kokkos::Experimental::SYCL::size_type size) {
+void* SYCLInternal::scratch_flags(const std::size_t size) {
   const size_type sizeScratchGrain =
       sizeof(Kokkos::Experimental::SYCL::size_type);
   if (verify_is_initialized("scratch_flags") &&
@@ -300,6 +308,17 @@ template void SYCLInternal::fence_helper<sycl::event>(sycl::event&,
                                                       const std::string&,
                                                       uint32_t);
 
+// This function cycles through a pool of USM allocations for functors
+SYCLInternal::IndirectKernelMem& SYCLInternal::get_indirect_kernel_mem() {
+  // Thread safety: atomically increment round robin variable
+  // NB: atomic_fetch_inc_mod returns values in range [0-N], not
+  // [0-N) as might be expected.
+  size_t next_pool = desul::atomic_fetch_inc_mod(
+      &m_pool_next, m_usm_pool_size - 1, desul::MemoryOrderRelaxed(),
+      desul::MemoryScopeDevice());
+  return m_indirectKernelMem[next_pool];
+}
+
 template <sycl::usm::alloc Kind>
 size_t SYCLInternal::USMObjectMem<Kind>::reserve(size_t n) {
   assert(m_q);
@@ -313,7 +332,9 @@ size_t SYCLInternal::USMObjectMem<Kind>::reserve(size_t n) {
         AllocationSpace(*m_q), "Kokkos::Experimental::SYCL::USMObjectMem", n);
     Record::increment(r);
 
-    m_data     = r->data();
+    m_data = r->data();
+    if constexpr (sycl::usm::alloc::device == Kind)
+      m_staging.reset(new char[n]);
     m_capacity = n;
   }
 
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp
index bf4d6c5b459579213866f6dcb99332c6e641c3a1..907e4e9efe1cd4c2921d0721e73b971fa8104d2f 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp
@@ -66,8 +66,8 @@ class SYCLInternal {
   SYCLInternal& operator=(SYCLInternal&&) = delete;
   SYCLInternal(SYCLInternal&&)            = delete;
 
-  void* scratch_space(const size_type size);
-  void* scratch_flags(const size_type size);
+  void* scratch_space(const std::size_t size);
+  void* scratch_flags(const std::size_t size);
   void* resize_team_scratch_space(std::int64_t bytes,
                                   bool force_shrink = false);
 
@@ -78,14 +78,16 @@ class SYCLInternal {
   uint32_t m_maxConcurrency   = 0;
   uint64_t m_maxShmemPerBlock = 0;
 
-  uint32_t* m_scratchConcurrentBitset = nullptr;
-  size_type m_scratchSpaceCount       = 0;
-  size_type* m_scratchSpace           = nullptr;
-  size_type m_scratchFlagsCount       = 0;
-  size_type* m_scratchFlags           = nullptr;
+  std::size_t m_scratchSpaceCount = 0;
+  size_type* m_scratchSpace       = nullptr;
+  std::size_t m_scratchFlagsCount = 0;
+  size_type* m_scratchFlags       = nullptr;
+  // mutex to access shared memory
+  mutable std::mutex m_mutexScratchSpace;
 
   int64_t m_team_scratch_current_size = 0;
   void* m_team_scratch_ptr            = nullptr;
+  mutable std::mutex m_team_scratch_mutex;
 
   uint32_t m_instance_id = Kokkos::Tools::Experimental::Impl::idForInstance<
       Kokkos::Experimental::SYCL>(reinterpret_cast<uintptr_t>(this));
@@ -142,15 +144,13 @@ class SYCLInternal {
     // (otherwise) and returns a reference to the copied object.
     template <typename T>
     T& copy_from(const T& t) {
+      m_mutex.lock();
       fence();
       reserve(sizeof(T));
       if constexpr (sycl::usm::alloc::device == Kind) {
-        sycl::event memcopied =
-            m_q->memcpy(m_data, std::addressof(t), sizeof(T));
-        SYCLInternal::fence(
-            memcopied,
-            "Kokkos::Experimental::SYCLInternal::USMObject fence after copy",
-            m_instance_id);
+        std::memcpy(static_cast<void*>(m_staging.get()), std::addressof(t),
+                    sizeof(T));
+        m_copy_event = m_q->memcpy(m_data, m_staging.get(), sizeof(T));
       } else
         std::memcpy(m_data, std::addressof(t), sizeof(T));
       return *reinterpret_cast<T*>(m_data);
@@ -169,8 +169,11 @@ class SYCLInternal {
                  .get_info<sycl::info::event::command_execution_status>() ==
              sycl::info::event_command_status::complete);
       m_last_event = event;
+      m_mutex.unlock();
     }
 
+    sycl::event get_copy_event() const { return m_copy_event; }
+
    private:
     // USMObjectMem class invariants
     // All four expressions below must evaluate to true:
@@ -182,22 +185,26 @@ class SYCLInternal {
     //  if m_data != nullptr then m_capacity != 0 && m_q != nullopt
     //  if m_data == nullptr then m_capacity == 0
 
+    sycl::event m_copy_event;
+
     std::optional<sycl::queue> m_q;
-    void* m_data      = nullptr;
+    void* m_data = nullptr;
+    std::unique_ptr<char[]> m_staging;
+
     size_t m_capacity = 0;
     sycl::event m_last_event;
 
     uint32_t m_instance_id;
+
+    // mutex to access the underlying memory
+    mutable std::mutex m_mutex;
   };
 
   // An indirect kernel is one where the functor to be executed is explicitly
   // copied to USM memory before being executed, to get around the
   // trivially copyable limitation of SYCL.
-  using IndirectKernelMem = USMObjectMem<sycl::usm::alloc::shared>;
-  IndirectKernelMem m_indirectKernelMem;
-
-  using IndirectReducerMem = USMObjectMem<sycl::usm::alloc::shared>;
-  IndirectReducerMem m_indirectReducerMem;
+  using IndirectKernelMem = USMObjectMem<sycl::usm::alloc::host>;
+  IndirectKernelMem& get_indirect_kernel_mem();
 
   bool was_finalized = false;
 
@@ -220,6 +227,11 @@ class SYCLInternal {
   static void fence_helper(WAT& wat, const std::string& name,
                            uint32_t instance_id);
 
+  const static size_t m_usm_pool_size = 4;
+  std::vector<IndirectKernelMem> m_indirectKernelMem{m_usm_pool_size};
+
+  size_t m_pool_next{0};
+
  public:
   static void fence(sycl::queue& q, const std::string& name,
                     uint32_t instance_id) {
@@ -231,36 +243,92 @@ class SYCLInternal {
   }
 };
 
+// FIXME_SYCL the limit is 2048 bytes for all arguments handed to a kernel,
+// assume for now that the rest doesn't need more than 248 bytes.
+#if defined(SYCL_DEVICE_COPYABLE) && defined(KOKKOS_ARCH_INTEL_GPU)
 template <typename Functor, typename Storage,
-          bool is_memcpyable = std::is_trivially_copyable_v<Functor>>
+          bool ManualCopy = (sizeof(Functor) >= 1800)>
 class SYCLFunctionWrapper;
+#else
+template <typename Functor, typename Storage,
+          bool ManualCopy = (sizeof(Functor) >= 1800 ||
+                             !std::is_trivially_copyable_v<Functor>)>
+class SYCLFunctionWrapper;
+#endif
 
+#if defined(SYCL_DEVICE_COPYABLE) && defined(KOKKOS_ARCH_INTEL_GPU)
 template <typename Functor, typename Storage>
-class SYCLFunctionWrapper<Functor, Storage, true> {
-  const Functor& m_functor;
+class SYCLFunctionWrapper<Functor, Storage, false> {
+  // We need a union here so that we can avoid calling a constructor for m_f
+  // and can controll all the special member functions.
+  union TrivialWrapper {
+    TrivialWrapper(){};
+
+    TrivialWrapper(const Functor& f) { std::memcpy(&m_f, &f, sizeof(m_f)); }
+
+    TrivialWrapper(const TrivialWrapper& other) {
+      std::memcpy(&m_f, &other.m_f, sizeof(m_f));
+    }
+    TrivialWrapper(TrivialWrapper&& other) {
+      std::memcpy(&m_f, &other.m_f, sizeof(m_f));
+    }
+    TrivialWrapper& operator=(const TrivialWrapper& other) {
+      std::memcpy(&m_f, &other.m_f, sizeof(m_f));
+      return *this;
+    }
+    TrivialWrapper& operator=(TrivialWrapper&& other) {
+      std::memcpy(&m_f, &other.m_f, sizeof(m_f));
+      return *this;
+    }
+    ~TrivialWrapper(){};
+
+    Functor m_f;
+  } m_functor;
+
+ public:
+  SYCLFunctionWrapper(const Functor& functor, Storage&) : m_functor(functor) {}
+
+  const Functor& get_functor() const { return m_functor.m_f; }
+
+  sycl::event get_copy_event() const { return {}; }
+
+  static void register_event(sycl::event) {}
+};
+#else
+template <typename Functor, typename Storage>
+class SYCLFunctionWrapper<Functor, Storage, false> {
+  const Functor m_functor;
 
  public:
   SYCLFunctionWrapper(const Functor& functor, Storage&) : m_functor(functor) {}
 
   const Functor& get_functor() const { return m_functor; }
 
-  static void register_event(Storage&, sycl::event){};
+  sycl::event get_copy_event() const { return {}; }
+
+  static void register_event(sycl::event) {}
 };
+#endif
 
 template <typename Functor, typename Storage>
-class SYCLFunctionWrapper<Functor, Storage, false> {
-  const Functor& m_kernelFunctor;
+class SYCLFunctionWrapper<Functor, Storage, true> {
+  std::reference_wrapper<const Functor> m_kernelFunctor;
+  std::reference_wrapper<Storage> m_storage;
 
  public:
   SYCLFunctionWrapper(const Functor& functor, Storage& storage)
-      : m_kernelFunctor(storage.copy_from(functor)) {}
+      : m_kernelFunctor(storage.copy_from(functor)), m_storage(storage) {}
 
   std::reference_wrapper<const Functor> get_functor() const {
-    return {m_kernelFunctor};
+    return m_kernelFunctor;
   }
 
-  static void register_event(Storage& storage, sycl::event event) {
-    storage.register_event(event);
+  sycl::event get_copy_event() const {
+    return m_storage.get().get_copy_event();
+  }
+
+  void register_event(sycl::event event) {
+    m_storage.get().register_event(event);
   }
 };
 
@@ -271,4 +339,17 @@ auto make_sycl_function_wrapper(const Functor& functor, Storage& storage) {
 }  // namespace Impl
 }  // namespace Experimental
 }  // namespace Kokkos
+
+#if defined(SYCL_DEVICE_COPYABLE) && defined(KOKKOS_ARCH_INTEL_GPU)
+template <typename Functor, typename Storage>
+struct sycl::is_device_copyable<
+    Kokkos::Experimental::Impl::SYCLFunctionWrapper<Functor, Storage, false>>
+    : std::true_type {};
+
+template <typename Functor, typename Storage>
+struct sycl::is_device_copyable<
+    const Kokkos::Experimental::Impl::SYCLFunctionWrapper<Functor, Storage,
+                                                          false>>
+    : std::true_type {};
+#endif
 #endif
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp
index dca73683c3d1f06157affec3e8fe00feb7d36fd0..d631c3ba8cb541de259fed51f69d87620f53be5e 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp
@@ -49,6 +49,24 @@
 
 #include <vector>
 
+namespace Kokkos::Impl {
+template <typename FunctorWrapper, typename Policy>
+struct FunctorWrapperRangePolicyParallelFor {
+  using WorkTag = typename Policy::work_tag;
+
+  void operator()(sycl::item<1> item) const {
+    const typename Policy::index_type id = item.get_linear_id() + m_begin;
+    if constexpr (std::is_same<WorkTag, void>::value)
+      m_functor_wrapper.get_functor()(id);
+    else
+      m_functor_wrapper.get_functor()(WorkTag(), id);
+  }
+
+  typename Policy::index_type m_begin;
+  FunctorWrapper m_functor_wrapper;
+};
+}  // namespace Kokkos::Impl
+
 template <class FunctorType, class... Traits>
 class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>,
                                 Kokkos::Experimental::SYCL> {
@@ -65,26 +83,23 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>,
 
   template <typename Functor>
   static sycl::event sycl_direct_launch(const Policy& policy,
-                                        const Functor& functor) {
+                                        const Functor& functor,
+                                        const sycl::event& memcpy_event) {
     // Convenience references
     const Kokkos::Experimental::SYCL& space = policy.space();
     Kokkos::Experimental::Impl::SYCLInternal& instance =
         *space.impl_internal_space_instance();
     sycl::queue& q = *instance.m_queue;
 
-    auto parallel_for_event = q.submit([functor, policy](sycl::handler& cgh) {
+    auto parallel_for_event = q.submit([&](sycl::handler& cgh) {
+      FunctorWrapperRangePolicyParallelFor<Functor, Policy> f{policy.begin(),
+                                                              functor};
       sycl::range<1> range(policy.end() - policy.begin());
-      const auto begin = policy.begin();
-
-      cgh.parallel_for(range, [=](sycl::item<1> item) {
-        const typename Policy::index_type id = item.get_linear_id() + begin;
-        if constexpr (std::is_same<WorkTag, void>::value)
-          functor(id);
-        else
-          functor(WorkTag(), id);
-      });
+      cgh.depends_on(memcpy_event);
+      cgh.parallel_for<FunctorWrapperRangePolicyParallelFor<Functor, Policy>>(
+          range, f);
     });
-    q.submit_barrier(std::vector<sycl::event>{parallel_for_event});
+    q.ext_oneapi_submit_barrier(std::vector<sycl::event>{parallel_for_event});
 
     return parallel_for_event;
   }
@@ -98,13 +113,13 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>,
     Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem&
         indirectKernelMem = m_policy.space()
                                 .impl_internal_space_instance()
-                                ->m_indirectKernelMem;
+                                ->get_indirect_kernel_mem();
 
-    const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_functor, indirectKernelMem);
-    sycl::event event =
-        sycl_direct_launch(m_policy, functor_wrapper.get_functor());
-    functor_wrapper.register_event(indirectKernelMem, event);
+    sycl::event event = sycl_direct_launch(m_policy, functor_wrapper,
+                                           functor_wrapper.get_copy_event());
+    functor_wrapper.register_event(event);
   }
 
   ParallelFor(const ParallelFor&) = delete;
@@ -204,8 +219,9 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
                   "Kokkos::MDRange Error: Exceeded rank bounds with SYCL\n");
   }
 
-  template <typename Functor>
-  sycl::event sycl_direct_launch(const Functor& functor) const {
+  template <typename FunctorWrapper>
+  sycl::event sycl_direct_launch(const FunctorWrapper& functor_wrapper,
+                                 const sycl::event& memcpy_event) const {
     // Convenience references
     Kokkos::Experimental::Impl::SYCLInternal& instance =
         *m_space.impl_internal_space_instance();
@@ -215,36 +231,37 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
 
     const BarePolicy bare_policy(m_policy);
 
-    auto parallel_for_event =
-        q.submit([functor, this, bare_policy](sycl::handler& cgh) {
-          const auto range                  = compute_ranges();
-          const sycl::range<3> global_range = range.get_global_range();
-          const sycl::range<3> local_range  = range.get_local_range();
-          const sycl::nd_range sycl_swapped_range{
-              sycl::range<3>{global_range[2], global_range[1], global_range[0]},
-              sycl::range<3>{local_range[2], local_range[1], local_range[0]}};
-
-          cgh.parallel_for(sycl_swapped_range, [functor, bare_policy](
-                                                   sycl::nd_item<3> item) {
-            // swap back for correct index calculations in DeviceIterateTile
-            const index_type local_x    = item.get_local_id(2);
-            const index_type local_y    = item.get_local_id(1);
-            const index_type local_z    = item.get_local_id(0);
-            const index_type global_x   = item.get_group(2);
-            const index_type global_y   = item.get_group(1);
-            const index_type global_z   = item.get_group(0);
-            const index_type n_global_x = item.get_group_range(2);
-            const index_type n_global_y = item.get_group_range(1);
-            const index_type n_global_z = item.get_group_range(0);
-
-            Kokkos::Impl::DeviceIterateTile<Policy::rank, BarePolicy, Functor,
-                                            typename Policy::work_tag>(
-                bare_policy, functor, {n_global_x, n_global_y, n_global_z},
-                {global_x, global_y, global_z}, {local_x, local_y, local_z})
-                .exec_range();
-          });
-        });
-    q.submit_barrier(std::vector<sycl::event>{parallel_for_event});
+    auto parallel_for_event = q.submit([&](sycl::handler& cgh) {
+      const auto range                  = compute_ranges();
+      const sycl::range<3> global_range = range.get_global_range();
+      const sycl::range<3> local_range  = range.get_local_range();
+      const sycl::nd_range sycl_swapped_range{
+          sycl::range<3>{global_range[2], global_range[1], global_range[0]},
+          sycl::range<3>{local_range[2], local_range[1], local_range[0]}};
+
+      cgh.depends_on(memcpy_event);
+      cgh.parallel_for(sycl_swapped_range, [functor_wrapper, bare_policy](
+                                               sycl::nd_item<3> item) {
+        // swap back for correct index calculations in DeviceIterateTile
+        const index_type local_x    = item.get_local_id(2);
+        const index_type local_y    = item.get_local_id(1);
+        const index_type local_z    = item.get_local_id(0);
+        const index_type global_x   = item.get_group(2);
+        const index_type global_y   = item.get_group(1);
+        const index_type global_z   = item.get_group(0);
+        const index_type n_global_x = item.get_group_range(2);
+        const index_type n_global_y = item.get_group_range(1);
+        const index_type n_global_z = item.get_group_range(0);
+
+        Kokkos::Impl::DeviceIterateTile<Policy::rank, BarePolicy, FunctorType,
+                                        typename Policy::work_tag>(
+            bare_policy, functor_wrapper.get_functor(),
+            {n_global_x, n_global_y, n_global_z},
+            {global_x, global_y, global_z}, {local_x, local_y, local_z})
+            .exec_range();
+      });
+    });
+    q.ext_oneapi_submit_barrier(std::vector<sycl::event>{parallel_for_event});
 
     return parallel_for_event;
   }
@@ -260,12 +277,13 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
   void execute() const {
     Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem&
         indirectKernelMem =
-            m_space.impl_internal_space_instance()->m_indirectKernelMem;
+            m_space.impl_internal_space_instance()->get_indirect_kernel_mem();
 
-    const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_functor, indirectKernelMem);
-    sycl::event event = sycl_direct_launch(functor_wrapper.get_functor());
-    functor_wrapper.register_event(indirectKernelMem, event);
+    sycl::event event =
+        sycl_direct_launch(functor_wrapper, functor_wrapper.get_copy_event());
+    functor_wrapper.register_event(event);
   }
 
   ParallelFor(const ParallelFor&) = delete;
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp
index 75237b4c72a4dbfc1b7ebe201dda240128f62ced..eca6f311114a71e4142484234572dd7a8d4e0721 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp
@@ -58,41 +58,45 @@ namespace Kokkos {
 
 namespace Impl {
 
+template <class FunctorValueTraits>
+inline constexpr bool use_shuffle_based_algorithm =
+    FunctorValueTraits::StaticValueSize > 0;
+
 namespace SYCLReduction {
 template <class ValueJoin, class ValueOps, typename WorkTag, typename ValueType,
           typename ReducerType, typename FunctorType, int dim>
-void workgroup_reduction(sycl::nd_item<dim>& item,
-                         sycl::local_ptr<ValueType> local_mem,
-                         ValueType* results_ptr,
-                         ValueType* device_accessible_result_ptr,
-                         const unsigned int value_count,
-                         const ReducerType& selected_reducer,
-                         const FunctorType& functor, bool final) {
+std::enable_if_t<
+    !use_shuffle_based_algorithm<FunctorValueTraits<ReducerType, WorkTag>>>
+workgroup_reduction(
+    sycl::nd_item<dim>& item, sycl::local_ptr<ValueType> local_mem,
+    ValueType* results_ptr, ValueType* device_accessible_result_ptr,
+    const unsigned int value_count, const ReducerType& selected_reducer,
+    const FunctorType& functor, bool final, unsigned int max_size) {
   const auto local_id = item.get_local_linear_id();
-  // FIXME_SYCL should be item.get_group().get_local_linear_range();
-  size_t wgroup_size = 1;
-  for (unsigned int i = 0; i < dim; ++i) wgroup_size *= item.get_local_range(i);
 
   // Perform the actual workgroup reduction in each subgroup
   // separately.
-  auto sg                = item.get_sub_group();
-  auto* result           = &local_mem[local_id * value_count];
-  const auto id_in_sg    = sg.get_local_id()[0];
-  const auto local_range = std::min(sg.get_local_range()[0], wgroup_size);
+  auto sg             = item.get_sub_group();
+  auto* result        = &local_mem[local_id * value_count];
+  const auto id_in_sg = sg.get_local_id()[0];
+  const auto local_range =
+      std::min<unsigned int>(sg.get_local_range()[0], max_size);
+  const auto upper_stride_bound =
+      std::min(local_range - id_in_sg, max_size - local_id);
   for (unsigned int stride = 1; stride < local_range; stride <<= 1) {
-    if (id_in_sg + stride < local_range)
+    if (stride < upper_stride_bound)
       ValueJoin::join(selected_reducer, result,
                       &local_mem[(local_id + stride) * value_count]);
-    sg.barrier();
+    sycl::group_barrier(sg);
   }
-  item.barrier(sycl::access::fence_space::local_space);
+  sycl::group_barrier(item.get_group());
 
   // Copy the subgroup results into the first positions of the
   // reduction array.
   if (id_in_sg == 0)
     ValueOps::copy(functor, &local_mem[sg.get_group_id()[0] * value_count],
                    result);
-  item.barrier(sycl::access::fence_space::local_space);
+  sycl::group_barrier(item.get_group());
 
   // Do the final reduction only using the first subgroup.
   if (sg.get_group_id()[0] == 0) {
@@ -106,14 +110,14 @@ void workgroup_reduction(sycl::nd_item<dim>& item,
       if (id_in_sg + offset < n_subgroups)
         ValueJoin::join(selected_reducer, result_,
                         &local_mem[(id_in_sg + offset) * value_count]);
-    sg.barrier();
+    sycl::group_barrier(sg);
 
     // Then, we proceed as before.
     for (unsigned int stride = 1; stride < local_range; stride <<= 1) {
       if (id_in_sg + stride < n_subgroups)
         ValueJoin::join(selected_reducer, result_,
                         &local_mem[(id_in_sg + stride) * value_count]);
-      sg.barrier();
+      sycl::group_barrier(sg);
     }
 
     // Finally, we copy the workgroup results back to global memory
@@ -137,6 +141,84 @@ void workgroup_reduction(sycl::nd_item<dim>& item,
   }
 }
 
+template <class ValueJoin, typename WorkTag, typename ValueType,
+          typename ReducerType, typename FunctorType, int dim>
+std::enable_if_t<
+    use_shuffle_based_algorithm<FunctorValueTraits<ReducerType, WorkTag>>>
+workgroup_reduction(sycl::nd_item<dim>& item,
+                    sycl::local_ptr<ValueType> local_mem, ValueType local_value,
+                    ValueType* results_ptr,
+                    ValueType* device_accessible_result_ptr,
+                    const ReducerType& selected_reducer,
+                    const FunctorType& functor, bool final,
+                    unsigned int max_size) {
+  const auto local_id = item.get_local_linear_id();
+
+  // Perform the actual workgroup reduction in each subgroup
+  // separately.
+  auto sg             = item.get_sub_group();
+  const auto id_in_sg = sg.get_local_id()[0];
+  const auto local_range =
+      std::min<unsigned int>(sg.get_local_range()[0], max_size);
+  const auto upper_stride_bound =
+      std::min(local_range - id_in_sg, max_size - local_id);
+  for (unsigned int stride = 1; stride < local_range; stride <<= 1) {
+    auto tmp = sg.shuffle_down(local_value, stride);
+    if (stride < upper_stride_bound)
+      ValueJoin::join(selected_reducer, &local_value, &tmp);
+  }
+
+  // Copy the subgroup results into the first positions of the
+  // reduction array.
+  const auto max_subgroup_size = sg.get_max_local_range()[0];
+  const auto n_active_subgroups =
+      (max_size + max_subgroup_size - 1) / max_subgroup_size;
+  if (id_in_sg == 0 && sg.get_group_id()[0] <= n_active_subgroups)
+    local_mem[sg.get_group_id()[0]] = local_value;
+  item.barrier(sycl::access::fence_space::local_space);
+
+  // Do the final reduction only using the first subgroup.
+  if (sg.get_group_id()[0] == 0) {
+    auto sg_value = local_mem[id_in_sg < n_active_subgroups ? id_in_sg : 0];
+
+    // In case the number of subgroups is larger than the range of
+    // the first subgroup, we first combine the items with a higher
+    // index.
+    if (n_active_subgroups > local_range) {
+      for (unsigned int offset = local_range; offset < n_active_subgroups;
+           offset += local_range)
+        if (id_in_sg + offset < n_active_subgroups) {
+          ValueJoin::join(selected_reducer, &sg_value,
+                          &local_mem[(id_in_sg + offset)]);
+        }
+      sg.barrier();
+    }
+
+    // Then, we proceed as before.
+    for (unsigned int stride = 1; stride < local_range; stride <<= 1) {
+      auto tmp = sg.shuffle_down(sg_value, stride);
+      if (id_in_sg + stride < n_active_subgroups)
+        ValueJoin::join(selected_reducer, &sg_value, &tmp);
+    }
+
+    // Finally, we copy the workgroup results back to global memory
+    // to be used in the next iteration. If this is the last
+    // iteration, i.e., there is only one workgroup also call
+    // final() if necessary.
+    if (id_in_sg == 0) {
+      if (final) {
+        if constexpr (ReduceFunctorHasFinal<FunctorType>::value)
+          FunctorFinal<FunctorType, WorkTag>::final(functor, &sg_value);
+        if (device_accessible_result_ptr != nullptr)
+          device_accessible_result_ptr[0] = sg_value;
+        else
+          results_ptr[0] = sg_value;
+      } else
+        results_ptr[(item.get_group_linear_id())] = sg_value;
+    }
+  }
+}
+
 }  // namespace SYCLReduction
 
 template <class FunctorType, class ReducerType, class... Traits>
@@ -166,7 +248,9 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
         m_result_ptr(v.data()),
         m_result_ptr_device_accessible(
             MemorySpaceAccess<Kokkos::Experimental::SYCLDeviceUSMSpace,
-                              typename V::memory_space>::accessible) {}
+                              typename V::memory_space>::accessible),
+        m_shared_memory_lock(
+            p.space().impl_internal_space_instance()->m_mutexScratchSpace) {}
 
   ParallelReduce(const FunctorType& f, const Policy& p,
                  const ReducerType& reducer)
@@ -177,13 +261,17 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
         m_result_ptr_device_accessible(
             MemorySpaceAccess<Kokkos::Experimental::SYCLDeviceUSMSpace,
                               typename ReducerType::result_view_type::
-                                  memory_space>::accessible) {}
+                                  memory_space>::accessible),
+        m_shared_memory_lock(
+            p.space().impl_internal_space_instance()->m_mutexScratchSpace) {}
 
  private:
-  template <typename PolicyType, typename Functor, typename Reducer>
-  sycl::event sycl_direct_launch(const PolicyType& policy,
-                                 const Functor& functor,
-                                 const Reducer& reducer) const {
+  template <typename PolicyType, typename FunctorWrapper,
+            typename ReducerWrapper>
+  sycl::event sycl_direct_launch(
+      const PolicyType& policy, const FunctorWrapper& functor_wrapper,
+      const ReducerWrapper& reducer_wrapper,
+      const std::vector<sycl::event>& memcpy_events) const {
     using ReducerConditional =
         Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value,
                            FunctorType, ReducerType>;
@@ -197,8 +285,6 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
         Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>;
     using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>;
 
-    auto selected_reducer = ReducerConditional::select(functor, reducer);
-
     // Convenience references
     const Kokkos::Experimental::SYCL& space = policy.space();
     Kokkos::Experimental::Impl::SYCLInternal& instance =
@@ -215,11 +301,13 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
         1);
     const unsigned int value_count =
         FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>::value_count(
-            selected_reducer);
+            ReducerConditional::select(m_functor, m_reducer));
     const auto results_ptr = static_cast<pointer_type>(instance.scratch_space(
         sizeof(value_type) * std::max(value_count, 1u) * init_size));
     value_type* device_accessible_result_ptr =
         m_result_ptr_device_accessible ? m_result_ptr : nullptr;
+    auto scratch_flags = static_cast<unsigned int*>(
+        instance.scratch_flags(sizeof(unsigned int)));
 
     sycl::event last_reduction_event;
 
@@ -229,10 +317,12 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
     if (size <= 1) {
       auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) {
         const auto begin = policy.begin();
+        cgh.depends_on(memcpy_events);
         cgh.single_task([=]() {
+          const auto& functor          = functor_wrapper.get_functor();
           const auto& selected_reducer = ReducerConditional::select(
               static_cast<const FunctorType&>(functor),
-              static_cast<const ReducerType&>(reducer));
+              static_cast<const ReducerType&>(reducer_wrapper.get_functor()));
           reference_type update =
               ValueInit::init(selected_reducer, results_ptr);
           if (size == 1) {
@@ -249,7 +339,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
                            &results_ptr[0]);
         });
       });
-      q.submit_barrier(std::vector<sycl::event>{parallel_reduce_event});
+      q.ext_oneapi_submit_barrier(
+          std::vector<sycl::event>{parallel_reduce_event});
       last_reduction_event = parallel_reduce_event;
     }
 
@@ -257,8 +348,7 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
     // separately, write the workgroup results back to global memory and recurse
     // until only one workgroup does the reduction and thus gets the final
     // value.
-    bool first_run = true;
-    while (size > 1) {
+    if (size > 1) {
       auto n_wgroups = ((size + values_per_thread - 1) / values_per_thread +
                         wgroup_size - 1) /
                        wgroup_size;
@@ -267,8 +357,14 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
                        sycl::access::target::local>
             local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u),
                       cgh);
+        sycl::accessor<unsigned int, 1, sycl::access::mode::read_write,
+                       sycl::access::target::local>
+            num_teams_done(1, cgh);
+
         const auto begin = policy.begin();
 
+        cgh.depends_on(memcpy_events);
+
         cgh.parallel_for(
             sycl::nd_range<1>(n_wgroups * wgroup_size, wgroup_size),
             [=](sycl::nd_item<1> item) {
@@ -276,20 +372,19 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
               const auto global_id =
                   wgroup_size * item.get_group_linear_id() * values_per_thread +
                   local_id;
+              const auto& functor          = functor_wrapper.get_functor();
               const auto& selected_reducer = ReducerConditional::select(
                   static_cast<const FunctorType&>(functor),
-                  static_cast<const ReducerType&>(reducer));
-
-              // In the first iteration, we call functor to initialize the local
-              // memory. Otherwise, the local memory is initialized with the
-              // results from the previous iteration that are stored in global
-              // memory. Note that we load values_per_thread values per thread
-              // and immediately combine them to avoid too many threads being
-              // idle in the actual workgroup reduction.
+                  static_cast<const ReducerType&>(
+                      reducer_wrapper.get_functor()));
+
               using index_type       = typename Policy::index_type;
               const auto upper_bound = std::min<index_type>(
                   global_id + values_per_thread * wgroup_size, size);
-              if (first_run) {
+
+              if constexpr (FunctorValueTraits<ReducerTypeFwd,
+                                               WorkTagFwd>::StaticValueSize ==
+                            0) {
                 reference_type update = ValueInit::init(
                     selected_reducer, &local_mem[local_id * value_count]);
                 for (index_type id = global_id; id < upper_bound;
@@ -299,35 +394,97 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
                   else
                     functor(WorkTag(), id + begin, update);
                 }
+                item.barrier(sycl::access::fence_space::local_space);
+
+                SYCLReduction::workgroup_reduction<ValueJoin, ValueOps,
+                                                   WorkTag>(
+                    item, local_mem.get_pointer(), results_ptr,
+                    device_accessible_result_ptr, value_count, selected_reducer,
+                    static_cast<const FunctorType&>(functor), false,
+                    std::min(size, wgroup_size));
+
+                if (local_id == 0) {
+                  sycl::ext::oneapi::atomic_ref<
+                      unsigned, sycl::ext::oneapi::memory_order::relaxed,
+                      sycl::ext::oneapi::memory_scope::device,
+                      sycl::access::address_space::global_space>
+                      scratch_flags_ref(*scratch_flags);
+                  num_teams_done[0] = ++scratch_flags_ref;
+                }
+                item.barrier(sycl::access::fence_space::local_space);
+                if (num_teams_done[0] == n_wgroups) {
+                  if (local_id >= n_wgroups)
+                    ValueInit::init(selected_reducer,
+                                    &local_mem[local_id * value_count]);
+                  else {
+                    ValueOps::copy(functor, &local_mem[local_id * value_count],
+                                   &results_ptr[local_id * value_count]);
+                    for (unsigned int id = local_id + wgroup_size;
+                         id < n_wgroups; id += wgroup_size) {
+                      ValueJoin::join(selected_reducer,
+                                      &local_mem[local_id * value_count],
+                                      &results_ptr[id * value_count]);
+                    }
+                  }
+
+                  SYCLReduction::workgroup_reduction<ValueJoin, ValueOps,
+                                                     WorkTag>(
+                      item, local_mem.get_pointer(), results_ptr,
+                      device_accessible_result_ptr, value_count,
+                      selected_reducer,
+                      static_cast<const FunctorType&>(functor), true,
+                      std::min(n_wgroups, wgroup_size));
+                }
               } else {
-                if (global_id >= size)
-                  ValueInit::init(selected_reducer,
-                                  &local_mem[local_id * value_count]);
-                else {
-                  ValueOps::copy(functor, &local_mem[local_id * value_count],
-                                 &results_ptr[global_id * value_count]);
-                  for (index_type id = global_id + wgroup_size;
-                       id < upper_bound; id += wgroup_size) {
-                    ValueJoin::join(selected_reducer,
-                                    &local_mem[local_id * value_count],
-                                    &results_ptr[id * value_count]);
+                value_type local_value;
+                reference_type update =
+                    ValueInit::init(selected_reducer, &local_value);
+                for (index_type id = global_id; id < upper_bound;
+                     id += wgroup_size) {
+                  if constexpr (std::is_same<WorkTag, void>::value)
+                    functor(id + begin, update);
+                  else
+                    functor(WorkTag(), id + begin, update);
+                }
+
+                SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>(
+                    item, local_mem.get_pointer(), local_value, results_ptr,
+                    device_accessible_result_ptr, selected_reducer,
+                    static_cast<const FunctorType&>(functor), false,
+                    std::min(size, wgroup_size));
+
+                if (local_id == 0) {
+                  sycl::ext::oneapi::atomic_ref<
+                      unsigned, sycl::ext::oneapi::memory_order::relaxed,
+                      sycl::ext::oneapi::memory_scope::device,
+                      sycl::access::address_space::global_space>
+                      scratch_flags_ref(*scratch_flags);
+                  num_teams_done[0] = ++scratch_flags_ref;
+                }
+                item.barrier(sycl::access::fence_space::local_space);
+                if (num_teams_done[0] == n_wgroups) {
+                  if (local_id >= n_wgroups)
+                    ValueInit::init(selected_reducer, &local_value);
+                  else {
+                    local_value = results_ptr[local_id];
+                    for (unsigned int id = local_id + wgroup_size;
+                         id < n_wgroups; id += wgroup_size) {
+                      ValueJoin::join(selected_reducer, &local_value,
+                                      &results_ptr[id]);
+                    }
                   }
+
+                  SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>(
+                      item, local_mem.get_pointer(), local_value, results_ptr,
+                      device_accessible_result_ptr, selected_reducer,
+                      static_cast<const FunctorType&>(functor), true,
+                      std::min(n_wgroups, wgroup_size));
                 }
               }
-              item.barrier(sycl::access::fence_space::local_space);
-
-              SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, WorkTag>(
-                  item, local_mem.get_pointer(), results_ptr,
-                  device_accessible_result_ptr, value_count, selected_reducer,
-                  static_cast<const FunctorType&>(functor), n_wgroups <= 1);
             });
       });
-      q.submit_barrier(std::vector<sycl::event>{parallel_reduce_event});
-
-      last_reduction_event = parallel_reduce_event;
-
-      first_run = false;
-      size      = n_wgroups;
+      last_reduction_event       = q.ext_oneapi_submit_barrier(
+          std::vector<sycl::event>{parallel_reduce_event});
     }
 
     // At this point, the reduced value is written to the entry in results_ptr
@@ -352,18 +509,19 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
         *m_policy.space().impl_internal_space_instance();
     using IndirectKernelMem =
         Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem;
-    IndirectKernelMem& indirectKernelMem  = instance.m_indirectKernelMem;
-    IndirectKernelMem& indirectReducerMem = instance.m_indirectReducerMem;
+    IndirectKernelMem& indirectKernelMem  = instance.get_indirect_kernel_mem();
+    IndirectKernelMem& indirectReducerMem = instance.get_indirect_kernel_mem();
 
-    const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_functor, indirectKernelMem);
-    const auto reducer_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto reducer_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_reducer, indirectReducerMem);
 
     sycl::event event = sycl_direct_launch(
-        m_policy, functor_wrapper.get_functor(), reducer_wrapper.get_functor());
-    functor_wrapper.register_event(indirectKernelMem, event);
-    reducer_wrapper.register_event(indirectReducerMem, event);
+        m_policy, functor_wrapper, reducer_wrapper,
+        {functor_wrapper.get_copy_event(), reducer_wrapper.get_copy_event()});
+    functor_wrapper.register_event(event);
+    reducer_wrapper.register_event(event);
   }
 
  private:
@@ -372,6 +530,10 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
   const ReducerType m_reducer;
   const pointer_type m_result_ptr;
   const bool m_result_ptr_device_accessible;
+
+  // Only let one Parallel/Scan modify the shared memory. The
+  // constructor acquires the mutex which is released in the destructor.
+  std::scoped_lock<std::mutex> m_shared_memory_lock;
 };
 
 template <class FunctorType, class ReducerType, class... Traits>
@@ -425,7 +587,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
         m_result_ptr(v.data()),
         m_result_ptr_device_accessible(
             MemorySpaceAccess<Kokkos::Experimental::SYCLDeviceUSMSpace,
-                              typename V::memory_space>::accessible) {}
+                              typename V::memory_space>::accessible),
+        m_shared_memory_lock(
+            m_space.impl_internal_space_instance()->m_mutexScratchSpace) {}
 
   ParallelReduce(const FunctorType& f, const Policy& p,
                  const ReducerType& reducer)
@@ -437,13 +601,17 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
         m_result_ptr_device_accessible(
             MemorySpaceAccess<Kokkos::Experimental::SYCLDeviceUSMSpace,
                               typename ReducerType::result_view_type::
-                                  memory_space>::accessible) {}
+                                  memory_space>::accessible),
+        m_shared_memory_lock(
+            m_space.impl_internal_space_instance()->m_mutexScratchSpace) {}
 
  private:
-  template <typename PolicyType, typename Functor, typename Reducer>
-  sycl::event sycl_direct_launch(const PolicyType& policy,
-                                 const Functor& functor,
-                                 const Reducer& reducer) const {
+  template <typename PolicyType, typename FunctorWrapper,
+            typename ReducerWrapper>
+  sycl::event sycl_direct_launch(
+      const PolicyType& policy, const FunctorWrapper& functor_wrapper,
+      const ReducerWrapper& reducer_wrapper,
+      const std::vector<sycl::event>& memcpy_events) const {
     using ReducerConditional =
         Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value,
                            FunctorType, ReducerType>;
@@ -475,18 +643,16 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
     size_t size              = range.get_global_range().size();
     const auto init_size =
         std::max<std::size_t>((size + wgroup_size - 1) / wgroup_size, 1);
-    const auto& selected_reducer = ReducerConditional::select(functor, reducer);
     const unsigned int value_count =
         FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>::value_count(
-            selected_reducer);
+            ReducerConditional::select(m_functor, m_reducer));
     // FIXME_SYCL only use the first half
     const auto results_ptr = static_cast<pointer_type>(instance.scratch_space(
-        sizeof(value_type) * std::max(value_count, 1u) * init_size * 2));
-    // FIXME_SYCL without this we are running into a race condition
-    const auto results_ptr2 =
-        results_ptr + std::max(value_count, 1u) * init_size;
+        sizeof(value_type) * std::max(value_count, 1u) * init_size));
     value_type* device_accessible_result_ptr =
         m_result_ptr_device_accessible ? m_result_ptr : nullptr;
+    auto scratch_flags = static_cast<unsigned int*>(
+        instance.scratch_flags(sizeof(unsigned int)));
 
     sycl::event last_reduction_event;
 
@@ -495,17 +661,19 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
     // m_result_ptr yet.
     if (size <= 1) {
       auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) {
+        cgh.depends_on(memcpy_events);
         cgh.single_task([=]() {
+          const auto& functor          = functor_wrapper.get_functor();
           const auto& selected_reducer = ReducerConditional::select(
               static_cast<const FunctorType&>(functor),
-              static_cast<const ReducerType&>(reducer));
+              static_cast<const ReducerType&>(reducer_wrapper.get_functor()));
           reference_type update =
               ValueInit::init(selected_reducer, results_ptr);
           if (size == 1) {
             Kokkos::Impl::Reduce::DeviceIterateTile<
-                Policy::rank, BarePolicy, Functor, typename Policy::work_tag,
-                reference_type>(policy, functor, update, {1, 1, 1}, {0, 0, 0},
-                                {0, 0, 0})
+                Policy::rank, BarePolicy, FunctorType,
+                typename Policy::work_tag, reference_type>(
+                policy, functor, update, {1, 1, 1}, {0, 0, 0}, {0, 0, 0})
                 .exec_range();
           }
           if constexpr (ReduceFunctorHasFinal<FunctorType>::value)
@@ -516,7 +684,8 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
                            &results_ptr[0]);
         });
       });
-      q.submit_barrier(std::vector<sycl::event>{parallel_reduce_event});
+      q.ext_oneapi_submit_barrier(
+          std::vector<sycl::event>{parallel_reduce_event});
       last_reduction_event = parallel_reduce_event;
     }
 
@@ -524,89 +693,146 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
     // separately, write the workgroup results back to global memory and recurse
     // until only one workgroup does the reduction and thus gets the final
     // value.
-    bool first_run = true;
-    while (size > 1) {
+    if (size > 1) {
       auto n_wgroups             = (size + wgroup_size - 1) / wgroup_size;
       auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) {
         sycl::accessor<value_type, 1, sycl::access::mode::read_write,
                        sycl::access::target::local>
             local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u),
                       cgh);
+        sycl::accessor<unsigned int, 1, sycl::access::mode::read_write,
+                       sycl::access::target::local>
+            num_teams_done(1, cgh);
 
         const BarePolicy bare_policy = m_policy;
 
+        cgh.depends_on(memcpy_events);
+
         cgh.parallel_for(range, [=](sycl::nd_item<1> item) {
-          const auto local_id = item.get_local_linear_id();
-          const auto global_id =
-              wgroup_size * item.get_group_linear_id() + local_id;
+          const auto local_id          = item.get_local_linear_id();
+          const auto& functor          = functor_wrapper.get_functor();
           const auto& selected_reducer = ReducerConditional::select(
               static_cast<const FunctorType&>(functor),
-              static_cast<const ReducerType&>(reducer));
+              static_cast<const ReducerType&>(reducer_wrapper.get_functor()));
 
           // In the first iteration, we call functor to initialize the local
           // memory. Otherwise, the local memory is initialized with the
           // results from the previous iteration that are stored in global
           // memory.
           using index_type = typename Policy::index_type;
-          const auto upper_bound =
-              std::min<index_type>(global_id + wgroup_size, size);
-          if (first_run) {
+
+          // SWAPPED here to be conforming with CUDA implementation
+          const index_type local_x    = 0;
+          const index_type local_y    = item.get_local_id(0);
+          const index_type local_z    = 0;
+          const index_type global_x   = item.get_group(0);
+          const index_type global_y   = 0;
+          const index_type global_z   = 0;
+          const index_type n_global_x = item.get_group_range(0);
+          const index_type n_global_y = 1;
+          const index_type n_global_z = 1;
+
+          if constexpr (FunctorValueTraits<ReducerTypeFwd,
+                                           WorkTagFwd>::StaticValueSize == 0) {
             reference_type update = ValueInit::init(
                 selected_reducer, &local_mem[local_id * value_count]);
 
-            // SWAPPED here to be conforming with CUDA implementation
-            const index_type local_x    = 0;
-            const index_type local_y    = item.get_local_id(0);
-            const index_type local_z    = 0;
-            const index_type global_x   = item.get_group(0);
-            const index_type global_y   = 0;
-            const index_type global_z   = 0;
-            const index_type n_global_x = item.get_group_range(0);
-            const index_type n_global_y = 1;
-            const index_type n_global_z = 1;
-
             Kokkos::Impl::Reduce::DeviceIterateTile<
-                Policy::rank, BarePolicy, Functor, typename Policy::work_tag,
-                reference_type>(bare_policy, functor, update,
-                                {n_global_x, n_global_y, n_global_z},
-                                {global_x, global_y, global_z},
-                                {local_x, local_y, local_z})
+                Policy::rank, BarePolicy, FunctorType,
+                typename Policy::work_tag, reference_type>(
+                bare_policy, functor, update,
+                {n_global_x, n_global_y, n_global_z},
+                {global_x, global_y, global_z}, {local_x, local_y, local_z})
                 .exec_range();
+            item.barrier(sycl::access::fence_space::local_space);
+
+            SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, WorkTag>(
+                item, local_mem.get_pointer(), results_ptr,
+                device_accessible_result_ptr, value_count, selected_reducer,
+                static_cast<const FunctorType&>(functor), false,
+                std::min(size, wgroup_size));
+
+            if (local_id == 0) {
+              sycl::ext::oneapi::atomic_ref<
+                  unsigned, sycl::ext::oneapi::memory_order::relaxed,
+                  sycl::ext::oneapi::memory_scope::device,
+                  sycl::access::address_space::global_space>
+                  scratch_flags_ref(*scratch_flags);
+              num_teams_done[0] = ++scratch_flags_ref;
+            }
+            item.barrier(sycl::access::fence_space::local_space);
+            if (num_teams_done[0] == n_wgroups) {
+              if (local_id >= n_wgroups)
+                ValueInit::init(selected_reducer,
+                                &local_mem[local_id * value_count]);
+              else {
+                ValueOps::copy(functor, &local_mem[local_id * value_count],
+                               &results_ptr[local_id * value_count]);
+                for (unsigned int id = local_id + wgroup_size; id < n_wgroups;
+                     id += wgroup_size) {
+                  ValueJoin::join(selected_reducer,
+                                  &local_mem[local_id * value_count],
+                                  &results_ptr[id * value_count]);
+                }
+              }
+
+              SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, WorkTag>(
+                  item, local_mem.get_pointer(), results_ptr,
+                  device_accessible_result_ptr, value_count, selected_reducer,
+                  static_cast<const FunctorType&>(functor), true,
+                  std::min(n_wgroups, wgroup_size));
+            }
           } else {
-            if (global_id >= size)
-              ValueInit::init(selected_reducer,
-                              &local_mem[local_id * value_count]);
-            else {
-              ValueOps::copy(functor, &local_mem[local_id * value_count],
-                             &results_ptr[global_id * value_count]);
-              for (index_type id = global_id + wgroup_size; id < upper_bound;
-                   id += wgroup_size) {
-                ValueJoin::join(selected_reducer,
-                                &local_mem[local_id * value_count],
-                                &results_ptr[id * value_count]);
+            value_type local_value;
+            reference_type update =
+                ValueInit::init(selected_reducer, &local_value);
+
+            Kokkos::Impl::Reduce::DeviceIterateTile<
+                Policy::rank, BarePolicy, FunctorType,
+                typename Policy::work_tag, reference_type>(
+                bare_policy, functor, update,
+                {n_global_x, n_global_y, n_global_z},
+                {global_x, global_y, global_z}, {local_x, local_y, local_z})
+                .exec_range();
+
+            SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>(
+                item, local_mem.get_pointer(), local_value, results_ptr,
+                device_accessible_result_ptr, selected_reducer,
+                static_cast<const FunctorType&>(functor), false,
+                std::min(size, wgroup_size));
+
+            if (local_id == 0) {
+              sycl::ext::oneapi::atomic_ref<
+                  unsigned, sycl::ext::oneapi::memory_order::relaxed,
+                  sycl::ext::oneapi::memory_scope::device,
+                  sycl::access::address_space::global_space>
+                  scratch_flags_ref(*scratch_flags);
+              num_teams_done[0] = ++scratch_flags_ref;
+            }
+            item.barrier(sycl::access::fence_space::local_space);
+            if (num_teams_done[0] == n_wgroups) {
+              if (local_id >= n_wgroups)
+                ValueInit::init(selected_reducer, &local_value);
+              else {
+                local_value = results_ptr[local_id];
+                for (unsigned int id = local_id + wgroup_size; id < n_wgroups;
+                     id += wgroup_size) {
+                  ValueJoin::join(selected_reducer, &local_value,
+                                  &results_ptr[id]);
+                }
               }
+
+              SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>(
+                  item, local_mem.get_pointer(), local_value, results_ptr,
+                  device_accessible_result_ptr, selected_reducer,
+                  static_cast<const FunctorType&>(functor), true,
+                  std::min(n_wgroups, wgroup_size));
             }
           }
-          item.barrier(sycl::access::fence_space::local_space);
-
-          SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, WorkTag>(
-              item, local_mem.get_pointer(), results_ptr2,
-              device_accessible_result_ptr, value_count, selected_reducer,
-              static_cast<const FunctorType&>(functor),
-              n_wgroups <= 1 && item.get_group_linear_id() == 0);
         });
       });
-      q.submit_barrier(std::vector<sycl::event>{parallel_reduce_event});
-
-      // FIXME_SYCL this is likely not necessary, see above
-      auto deep_copy_event =
-          q.memcpy(results_ptr, results_ptr2,
-                   sizeof(*m_result_ptr) * value_count * n_wgroups);
-      q.submit_barrier(std::vector<sycl::event>{deep_copy_event});
-      last_reduction_event = deep_copy_event;
-
-      first_run = false;
-      size      = n_wgroups;
+      last_reduction_event       = q.ext_oneapi_submit_barrier(
+          std::vector<sycl::event>{parallel_reduce_event});
     }
 
     // At this point, the reduced value is written to the entry in results_ptr
@@ -636,18 +862,19 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
         *m_space.impl_internal_space_instance();
     using IndirectKernelMem =
         Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem;
-    IndirectKernelMem& indirectKernelMem  = instance.m_indirectKernelMem;
-    IndirectKernelMem& indirectReducerMem = instance.m_indirectReducerMem;
+    IndirectKernelMem& indirectKernelMem  = instance.get_indirect_kernel_mem();
+    IndirectKernelMem& indirectReducerMem = instance.get_indirect_kernel_mem();
 
-    const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_functor, indirectKernelMem);
-    const auto reducer_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto reducer_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_reducer, indirectReducerMem);
 
     sycl::event event = sycl_direct_launch(
-        m_policy, functor_wrapper.get_functor(), reducer_wrapper.get_functor());
-    functor_wrapper.register_event(indirectKernelMem, event);
-    reducer_wrapper.register_event(indirectReducerMem, event);
+        m_policy, functor_wrapper, reducer_wrapper,
+        {functor_wrapper.get_copy_event(), reducer_wrapper.get_copy_event()});
+    functor_wrapper.register_event(event);
+    reducer_wrapper.register_event(event);
   }
 
  private:
@@ -657,6 +884,10 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
   const ReducerType m_reducer;
   const pointer_type m_result_ptr;
   const bool m_result_ptr_device_accessible;
+
+  // Only let one Parallel/Scan modify the shared memory. The
+  // constructor acquires the mutex which is released in the destructor.
+  std::scoped_lock<std::mutex> m_shared_memory_lock;
 };
 
 }  // namespace Impl
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp
index d5611c2159bbc4bf0bd6a29fb89a941f7560650a..e5992956267f96f816d558621614ecaa6864089e 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp
@@ -53,6 +53,71 @@
 namespace Kokkos {
 namespace Impl {
 
+// Perform a scan over a workgroup.
+// At the end of this function, the subgroup scans are stored in the local array
+// such that the last value (at position n_active_subgroups-1) contains the
+// total sum.
+template <class ValueJoin, class ValueInit, int dim, typename ValueType,
+          typename FunctorType>
+void workgroup_scan(sycl::nd_item<dim> item, const FunctorType& functor,
+                    sycl::local_ptr<ValueType> local_mem,
+                    ValueType& local_value, unsigned int global_range) {
+  // subgroup scans
+  auto sg                = item.get_sub_group();
+  const auto sg_group_id = sg.get_group_id()[0];
+  const auto id_in_sg    = sg.get_local_id()[0];
+  for (unsigned int stride = 1; stride < global_range; stride <<= 1) {
+    auto tmp = sg.shuffle_up(local_value, stride);
+    if (id_in_sg >= stride) ValueJoin::join(functor, &local_value, &tmp);
+  }
+
+  const auto max_subgroup_size = sg.get_max_local_range()[0];
+  const auto n_active_subgroups =
+      (global_range + max_subgroup_size - 1) / max_subgroup_size;
+
+  const auto local_range = sg.get_local_range()[0];
+  if (id_in_sg == local_range - 1 && sg_group_id < n_active_subgroups)
+    local_mem[sg_group_id] = local_value;
+  local_value = sg.shuffle_up(local_value, 1);
+  if (id_in_sg == 0) ValueInit::init(functor, &local_value);
+  sycl::group_barrier(item.get_group());
+
+  // scan subgroup results using the first subgroup
+  if (n_active_subgroups > 1) {
+    if (sg_group_id == 0) {
+      const auto n_rounds =
+          (n_active_subgroups + local_range - 1) / local_range;
+      for (unsigned int round = 0; round < n_rounds; ++round) {
+        const unsigned int idx = id_in_sg + round * local_range;
+        const auto upper_bound =
+            std::min(local_range, n_active_subgroups - round * local_range);
+        auto local_sg_value = local_mem[idx < n_active_subgroups ? idx : 0];
+        for (unsigned int stride = 1; stride < upper_bound; stride <<= 1) {
+          auto tmp = sg.shuffle_up(local_sg_value, stride);
+          if (id_in_sg >= stride) {
+            if (idx < n_active_subgroups)
+              ValueJoin::join(functor, &local_sg_value, &tmp);
+            else
+              local_sg_value = tmp;
+          }
+        }
+        if (idx < n_active_subgroups) {
+          local_mem[idx] = local_sg_value;
+          if (round > 0)
+            ValueJoin::join(functor, &local_mem[idx],
+                            &local_mem[round * local_range - 1]);
+        }
+        if (round + 1 < n_rounds) sycl::group_barrier(sg);
+      }
+    }
+    sycl::group_barrier(item.get_group());
+  }
+
+  // add results to all subgroups
+  if (sg_group_id > 0)
+    ValueJoin::join(functor, &local_value, &local_mem[sg_group_id - 1]);
+}
+
 template <class FunctorType, class... Traits>
 class ParallelScanSYCLBase {
  public:
@@ -67,7 +132,6 @@ class ParallelScanSYCLBase {
   using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, WorkTag>;
   using ValueInit   = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>;
   using ValueJoin   = Kokkos::Impl::FunctorValueJoin<FunctorType, WorkTag>;
-  using ValueOps    = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>;
 
  public:
   using pointer_type   = typename ValueTraits::pointer_type;
@@ -82,9 +146,13 @@ class ParallelScanSYCLBase {
   const Policy m_policy;
   pointer_type m_scratch_space = nullptr;
 
+  // Only let one Parallel/Scan modify the shared memory. The
+  // constructor acquires the mutex which is released in the destructor.
+  std::scoped_lock<std::mutex> m_shared_memory_lock;
+
  private:
-  template <typename Functor>
-  void scan_internal(sycl::queue& q, const Functor& functor,
+  template <typename FunctorWrapper>
+  void scan_internal(sycl::queue& q, const FunctorWrapper& functor_wrapper,
                      pointer_type global_mem, std::size_t size) const {
     // FIXME_SYCL optimize
     constexpr size_t wgroup_size = 128;
@@ -92,94 +160,65 @@ class ParallelScanSYCLBase {
     pointer_type group_results   = global_mem + n_wgroups * wgroup_size;
 
     auto local_scans = q.submit([&](sycl::handler& cgh) {
+      // Store subgroup totals
+      const auto min_subgroup_size =
+          q.get_device()
+              .template get_info<sycl::info::device::sub_group_sizes>()
+              .front();
       sycl::accessor<value_type, 1, sycl::access::mode::read_write,
                      sycl::access::target::local>
-          local_mem(sycl::range<1>(wgroup_size), cgh);
+          local_mem(sycl::range<1>((wgroup_size + min_subgroup_size - 1) /
+                                   min_subgroup_size),
+                    cgh);
 
       cgh.parallel_for(
           sycl::nd_range<1>(n_wgroups * wgroup_size, wgroup_size),
           [=](sycl::nd_item<1> item) {
-            const auto local_id      = item.get_local_linear_id();
-            const auto global_id     = item.get_global_linear_id();
-            const auto global_offset = global_id - local_id;
+            const auto local_id  = item.get_local_linear_id();
+            const auto global_id = item.get_global_linear_id();
 
             // Initialize local memory
+            value_type local_value;
             if (global_id < size)
-              local_mem[local_id] = global_mem[global_id];
+              local_value = global_mem[global_id];
             else
-              ValueInit::init(functor, &local_mem[local_id]);
-            item.barrier(sycl::access::fence_space::local_space);
-
-            // subgroup scans
-            auto sg                = item.get_sub_group();
-            const auto sg_group_id = sg.get_group_id()[0];
-            const int id_in_sg     = sg.get_local_id()[0];
-            for (int stride = wgroup_size / 2; stride > 0; stride >>= 1) {
-              auto tmp = sg.shuffle_up(local_mem[local_id], stride);
-              if (id_in_sg >= stride)
-                ValueJoin::join(functor, &local_mem[local_id], &tmp);
-            }
-
-            const int local_range = sg.get_local_range()[0];
-            if (id_in_sg == local_range - 1)
-              global_mem[sg_group_id + global_offset] = local_mem[local_id];
-            local_mem[local_id] = sg.shuffle_up(local_mem[local_id], 1);
-            if (id_in_sg == 0) ValueInit::init(functor, &local_mem[local_id]);
-            item.barrier(sycl::access::fence_space::local_space);
-
-            // scan subgroup results using the first subgroup
-            if (sg_group_id == 0) {
-              const int n_subgroups = sg.get_group_range()[0];
-              if (local_range < n_subgroups) Kokkos::abort("Not implemented!");
-
-              for (int stride = n_subgroups / 2; stride > 0; stride >>= 1) {
-                auto tmp =
-                    sg.shuffle_up(global_mem[id_in_sg + global_offset], stride);
-                if (id_in_sg >= stride) {
-                  if (id_in_sg < n_subgroups)
-                    ValueJoin::join(
-                        functor, &global_mem[id_in_sg + global_offset], &tmp);
-                  else
-                    global_mem[id_in_sg + global_offset] = tmp;
-                }
-              }
-            }
-            item.barrier(sycl::access::fence_space::local_space);
-
-            // add results to all subgroups
-            if (sg_group_id > 0)
-              ValueJoin::join(functor, &local_mem[local_id],
-                              &global_mem[sg_group_id - 1 + global_offset]);
-            item.barrier(sycl::access::fence_space::local_space);
+              ValueInit::init(functor_wrapper.get_functor(), &local_value);
+
+            workgroup_scan<ValueJoin, ValueInit>(
+                item, functor_wrapper.get_functor(), local_mem.get_pointer(),
+                local_value, wgroup_size);
+
             if (n_wgroups > 1 && local_id == wgroup_size - 1)
               group_results[item.get_group_linear_id()] =
-                  global_mem[sg_group_id + global_offset];
-            item.barrier(sycl::access::fence_space::local_space);
+                  local_mem[item.get_sub_group().get_group_range()[0] - 1];
 
             // Write results to global memory
-            if (global_id < size) global_mem[global_id] = local_mem[local_id];
+            if (global_id < size) global_mem[global_id] = local_value;
           });
     });
-    q.submit_barrier(std::vector<sycl::event>{local_scans});
+    q.ext_oneapi_submit_barrier(std::vector<sycl::event>{local_scans});
 
     if (n_wgroups > 1) {
-      scan_internal(q, functor, group_results, n_wgroups);
+      scan_internal(q, functor_wrapper, group_results, n_wgroups);
       auto update_with_group_results = q.submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
             sycl::nd_range<1>(n_wgroups * wgroup_size, wgroup_size),
             [=](sycl::nd_item<1> item) {
               const auto global_id = item.get_global_linear_id();
               if (global_id < size)
-                ValueJoin::join(functor, &global_mem[global_id],
+                ValueJoin::join(functor_wrapper.get_functor(),
+                                &global_mem[global_id],
                                 &group_results[item.get_group_linear_id()]);
             });
       });
-      q.submit_barrier(std::vector<sycl::event>{update_with_group_results});
+      q.ext_oneapi_submit_barrier(
+          std::vector<sycl::event>{update_with_group_results});
     }
   }
 
-  template <typename Functor>
-  sycl::event sycl_direct_launch(const Functor& functor) const {
+  template <typename FunctorWrapper>
+  sycl::event sycl_direct_launch(const FunctorWrapper& functor_wrapper,
+                                 sycl::event memcpy_event) const {
     // Convenience references
     const Kokkos::Experimental::SYCL& space = m_policy.space();
     Kokkos::Experimental::Impl::SYCLInternal& instance =
@@ -192,22 +231,25 @@ class ParallelScanSYCLBase {
     auto initialize_global_memory = q.submit([&](sycl::handler& cgh) {
       auto global_mem = m_scratch_space;
       auto begin      = m_policy.begin();
+
+      cgh.depends_on(memcpy_event);
       cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {
         const typename Policy::index_type id =
             static_cast<typename Policy::index_type>(item.get_id()) + begin;
         value_type update{};
-        ValueInit::init(functor, &update);
+        ValueInit::init(functor_wrapper.get_functor(), &update);
         if constexpr (std::is_same<WorkTag, void>::value)
-          functor(id, update, false);
+          functor_wrapper.get_functor()(id, update, false);
         else
-          functor(WorkTag(), id, update, false);
+          functor_wrapper.get_functor()(WorkTag(), id, update, false);
         global_mem[id] = update;
       });
     });
-    q.submit_barrier(std::vector<sycl::event>{initialize_global_memory});
+    q.ext_oneapi_submit_barrier(
+        std::vector<sycl::event>{initialize_global_memory});
 
     // Perform the actual exclusive scan
-    scan_internal(q, functor, m_scratch_space, len);
+    scan_internal(q, functor_wrapper, m_scratch_space, len);
 
     // Write results to global memory
     auto update_global_results = q.submit([&](sycl::handler& cgh) {
@@ -217,13 +259,14 @@ class ParallelScanSYCLBase {
 
         value_type update = global_mem[global_id];
         if constexpr (std::is_same<WorkTag, void>::value)
-          functor(global_id, update, true);
+          functor_wrapper.get_functor()(global_id, update, true);
         else
-          functor(WorkTag(), global_id, update, true);
+          functor_wrapper.get_functor()(WorkTag(), global_id, update, true);
         global_mem[global_id] = update;
       });
     });
-    q.submit_barrier(std::vector<sycl::event>{update_global_results});
+    q.ext_oneapi_submit_barrier(
+        std::vector<sycl::event>{update_global_results});
     return update_global_results;
   }
 
@@ -258,18 +301,23 @@ class ParallelScanSYCLBase {
         static_cast<pointer_type>(instance.scratch_space(total_memory));
 
     Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem&
-        indirectKernelMem = instance.m_indirectKernelMem;
+        indirectKernelMem = instance.get_indirect_kernel_mem();
 
-    const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_functor, indirectKernelMem);
 
-    sycl::event event = sycl_direct_launch(functor_wrapper.get_functor());
-    functor_wrapper.register_event(indirectKernelMem, event);
+    sycl::event event =
+        sycl_direct_launch(functor_wrapper, functor_wrapper.get_copy_event());
+    functor_wrapper.register_event(event);
     post_functor();
   }
 
   ParallelScanSYCLBase(const FunctorType& arg_functor, const Policy& arg_policy)
-      : m_functor(arg_functor), m_policy(arg_policy) {}
+      : m_functor(arg_functor),
+        m_policy(arg_policy),
+        m_shared_memory_lock(m_policy.space()
+                                 .impl_internal_space_instance()
+                                 ->m_mutexScratchSpace) {}
 };
 
 template <class FunctorType, class... Traits>
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp
index 9538bf708077cc50404e66e19e048d4341a19761..bf37dcb26f3b99b92ab989168c8c0200a29d809b 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp
@@ -113,7 +113,7 @@ class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>
 
   template <typename FunctorType>
   int team_size_recommended(FunctorType const& f, ParallelForTag const&) const {
-    return internal_team_size_max_for(f);
+    return internal_team_size_recommended_for(f);
   }
 
   template <typename FunctorType>
@@ -193,7 +193,7 @@ class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>
         m_vector_length(0),
         m_team_scratch_size{0, 0},
         m_thread_scratch_size{0, 0},
-        m_chunk_size(0),
+        m_chunk_size(vector_length_max()),
         m_tune_team_size(false),
         m_tune_vector_length(false) {}
 
@@ -209,7 +209,7 @@ class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>
                 : (verify_requested_vector_length(1))),
         m_team_scratch_size{0, 0},
         m_thread_scratch_size{0, 0},
-        m_chunk_size(0),
+        m_chunk_size(vector_length_max()),
         m_tune_team_size(bool(team_size_request <= 0)),
         m_tune_vector_length(bool(vector_length_request <= 0)) {
     // FIXME_SYCL Check that league size is permissible,
@@ -330,9 +330,16 @@ class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>
         (space().impl_internal_space_instance()->m_maxShmemPerBlock -
          2 * sizeof(double) - m_team_scratch_size[0]) /
         (sizeof(double) + m_thread_scratch_size[0]);
-    return std::min<int>(
-               m_space.impl_internal_space_instance()->m_maxWorkgroupSize,
-               max_threads_for_memory) /
+    return std::min({
+             int(m_space.impl_internal_space_instance()->m_maxWorkgroupSize),
+      // FIXME_SYCL Avoid requesting to many registers on NVIDIA GPUs.
+#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \
+    defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) ||   \
+    defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE)
+                 256,
+#endif
+                 max_threads_for_memory
+           }) /
            impl_vector_length();
   }
 
@@ -355,22 +362,29 @@ class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>
          2 * sizeof(double) - m_team_scratch_size[0]) /
         (sizeof(double) + sizeof(value_type) * value_count +
          m_thread_scratch_size[0]);
-    return std::min<int>(
-               m_space.impl_internal_space_instance()->m_maxWorkgroupSize,
-               max_threads_for_memory) /
+    return std::min<int>({
+             int(m_space.impl_internal_space_instance()->m_maxWorkgroupSize),
+      // FIXME_SYCL Avoid requesting to many registers on NVIDIA GPUs.
+#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \
+    defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) ||   \
+    defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE)
+                 256,
+#endif
+                 max_threads_for_memory
+           }) /
            impl_vector_length();
   }
 
   template <class FunctorType>
   int internal_team_size_recommended_for(const FunctorType& f) const {
     // FIXME_SYCL improve
-    return internal_team_size_max_for(f);
+    return 1 << Kokkos::Impl::int_log2(internal_team_size_max_for(f));
   }
 
   template <class FunctorType>
   int internal_team_size_recommended_reduce(const FunctorType& f) const {
     // FIXME_SYCL improve
-    return internal_team_size_max_reduce(f);
+    return 1 << Kokkos::Impl::int_log2(internal_team_size_max_reduce(f));
   }
 };
 
@@ -394,12 +408,16 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
   size_type const m_vector_size;
   int m_shmem_begin;
   int m_shmem_size;
-  void* m_scratch_ptr[2];
+  char* m_scratch_ptr[2];
   int m_scratch_size[2];
+  // Only let one ParallelFor/Reduce modify the team scratch memory. The
+  // constructor acquires the mutex which is released in the destructor.
+  std::scoped_lock<std::mutex> m_scratch_lock;
 
-  template <typename Functor>
+  template <typename FunctorWrapper>
   sycl::event sycl_direct_launch(const Policy& policy,
-                                 const Functor& functor) const {
+                                 const FunctorWrapper& functor_wrapper,
+                                 const sycl::event& memcpy_events) const {
     // Convenience references
     const Kokkos::Experimental::SYCL& space = policy.space();
     Kokkos::Experimental::Impl::SYCLInternal& instance =
@@ -418,34 +436,45 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
       // Avoid capturing *this since it might not be trivially copyable
       const auto shmem_begin     = m_shmem_begin;
       const int scratch_size[2]  = {m_scratch_size[0], m_scratch_size[1]};
-      void* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]};
-
+      char* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]};
+
+      auto lambda = [=](sycl::nd_item<2> item) {
+        const member_type team_member(
+            team_scratch_memory_L0.get_pointer(), shmem_begin, scratch_size[0],
+            scratch_ptr[1] + item.get_group(1) * scratch_size[1],
+            scratch_size[1], item);
+        if constexpr (std::is_same<work_tag, void>::value)
+          functor_wrapper.get_functor()(team_member);
+        else
+          functor_wrapper.get_functor()(work_tag(), team_member);
+      };
+
+      static sycl::kernel kernel = [&] {
+        sycl::kernel_id functor_kernel_id =
+            sycl::get_kernel_id<decltype(lambda)>();
+        auto kernel_bundle =
+            sycl::get_kernel_bundle<sycl::bundle_state::executable>(
+                q.get_context(), std::vector{functor_kernel_id});
+        return kernel_bundle.get_kernel(functor_kernel_id);
+      }();
+      auto max_sg_size =
+          kernel
+              .get_info<sycl::info::kernel_device_specific::max_sub_group_size>(
+                  q.get_device(),
+                  sycl::range<3>(m_team_size, m_vector_size, 1));
+      auto final_vector_size = std::min<int>(m_vector_size, max_sg_size);
+      // FIXME_SYCL For some reason, explicitly enforcing the kernel bundle to
+      // be used gives a runtime error.
+      // cgh.use_kernel_bundle(kernel_bundle);
+
+      cgh.depends_on(memcpy_events);
       cgh.parallel_for(
           sycl::nd_range<2>(
-              sycl::range<2>(m_team_size, m_league_size * m_vector_size),
-              sycl::range<2>(m_team_size, m_vector_size)),
-          [=](sycl::nd_item<2> item) {
-#ifdef KOKKOS_ENABLE_DEBUG
-            if (item.get_sub_group().get_local_range() %
-                    item.get_local_range(1) !=
-                0)
-              Kokkos::abort(
-                  "The sub_group size is not divisible by the vector_size. "
-                  "Choose a smaller vector_size!");
-#endif
-            const member_type team_member(
-                team_scratch_memory_L0.get_pointer(), shmem_begin,
-                scratch_size[0],
-                static_cast<char*>(scratch_ptr[1]) +
-                    item.get_group(1) * scratch_size[1],
-                scratch_size[1], item);
-            if constexpr (std::is_same<work_tag, void>::value)
-              functor(team_member);
-            else
-              functor(work_tag(), team_member);
-          });
+              sycl::range<2>(m_team_size, m_league_size * final_vector_size),
+              sycl::range<2>(m_team_size, final_vector_size)),
+          lambda);
     });
-    q.submit_barrier(std::vector<sycl::event>{parallel_for_event});
+    q.ext_oneapi_submit_barrier(std::vector<sycl::event>{parallel_for_event});
     return parallel_for_event;
   }
 
@@ -456,14 +485,14 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
     Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem&
         indirectKernelMem = m_policy.space()
                                 .impl_internal_space_instance()
-                                ->m_indirectKernelMem;
+                                ->get_indirect_kernel_mem();
 
-    const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_functor, indirectKernelMem);
 
-    sycl::event event =
-        sycl_direct_launch(m_policy, functor_wrapper.get_functor());
-    functor_wrapper.register_event(indirectKernelMem, event);
+    sycl::event event = sycl_direct_launch(m_policy, functor_wrapper,
+                                           functor_wrapper.get_copy_event());
+    functor_wrapper.register_event(event);
   }
 
   ParallelFor(FunctorType const& arg_functor, Policy const& arg_policy)
@@ -471,9 +500,14 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
         m_policy(arg_policy),
         m_league_size(arg_policy.league_size()),
         m_team_size(arg_policy.team_size()),
-        m_vector_size(arg_policy.impl_vector_length()) {
+        m_vector_size(arg_policy.impl_vector_length()),
+        m_scratch_lock(arg_policy.space()
+                           .impl_internal_space_instance()
+                           ->m_team_scratch_mutex) {
     // FIXME_SYCL optimize
-    if (m_team_size < 0) m_team_size = 32;
+    if (m_team_size < 0)
+      m_team_size =
+          m_policy.team_size_recommended(arg_functor, ParallelForTag{});
 
     m_shmem_begin = (sizeof(double) * (m_team_size + 2));
     m_shmem_size =
@@ -487,8 +521,8 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
     // upon team size.
     auto& space      = *m_policy.space().impl_internal_space_instance();
     m_scratch_ptr[0] = nullptr;
-    m_scratch_ptr[1] = space.resize_team_scratch_space(
-        static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size);
+    m_scratch_ptr[1] = static_cast<char*>(space.resize_team_scratch_space(
+        static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size));
 
     if (static_cast<int>(space.m_maxShmemPerBlock) <
         m_shmem_size - m_shmem_begin) {
@@ -546,16 +580,21 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
     size_type m_team_begin;*/
   size_type m_shmem_begin;
   size_type m_shmem_size;
-  void* m_scratch_ptr[2];
+  char* m_scratch_ptr[2];
   int m_scratch_size[2];
   const size_type m_league_size;
   int m_team_size;
   const size_type m_vector_size;
-
-  template <typename PolicyType, typename Functor, typename Reducer>
-  sycl::event sycl_direct_launch(const PolicyType& policy,
-                                 const Functor& functor,
-                                 const Reducer& reducer) const {
+  // Only let one ParallelFor/Reduce modify the team scratch memory. The
+  // constructor acquires the mutex which is released in the destructor.
+  std::scoped_lock<std::mutex> m_scratch_lock;
+
+  template <typename PolicyType, typename FunctorWrapper,
+            typename ReducerWrapper>
+  sycl::event sycl_direct_launch(
+      const PolicyType& policy, const FunctorWrapper& functor_wrapper,
+      const ReducerWrapper& reducer_wrapper,
+      const std::vector<sycl::event>& memcpy_events) const {
     using ReducerConditional =
         Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value,
                            FunctorType, ReducerType>;
@@ -569,26 +608,17 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>;
     using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>;
 
-    auto selected_reducer = ReducerConditional::select(functor, reducer);
-
     // Convenience references
     const Kokkos::Experimental::SYCL& space = policy.space();
     Kokkos::Experimental::Impl::SYCLInternal& instance =
         *space.impl_internal_space_instance();
     sycl::queue& q = *instance.m_queue;
 
-    // FIXME_SYCL optimize
-    const size_t wgroup_size = m_team_size * m_vector_size;
-    std::size_t size         = m_league_size * m_team_size * m_vector_size;
-    const auto init_size =
-        std::max<std::size_t>((size + wgroup_size - 1) / wgroup_size, 1);
     const unsigned int value_count =
         FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>::value_count(
-            selected_reducer);
-    const auto results_ptr = static_cast<pointer_type>(instance.scratch_space(
-        sizeof(value_type) * std::max(value_count, 1u) * init_size));
-    value_type* device_accessible_result_ptr =
-        m_result_ptr_device_accessible ? m_result_ptr : nullptr;
+            ReducerConditional::select(m_functor, m_reducer));
+    std::size_t size = std::size_t(m_league_size) * m_team_size * m_vector_size;
+    value_type* results_ptr = nullptr;
 
     sycl::event last_reduction_event;
 
@@ -596,6 +626,11 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
     // working with the global scratch memory but don't copy back to
     // m_result_ptr yet.
     if (size <= 1) {
+      results_ptr = static_cast<pointer_type>(instance.scratch_space(
+          sizeof(value_type) * std::max(value_count, 1u)));
+      value_type* device_accessible_result_ptr =
+          m_result_ptr_device_accessible ? m_result_ptr : nullptr;
+
       auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) {
         // FIXME_SYCL accessors seem to need a size greater than zero at least
         // for host queues
@@ -608,21 +643,23 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         // Avoid capturing *this since it might not be trivially copyable
         const auto shmem_begin     = m_shmem_begin;
         const int scratch_size[2]  = {m_scratch_size[0], m_scratch_size[1]};
-        void* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]};
+        char* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]};
 
+        cgh.depends_on(memcpy_events);
         cgh.parallel_for(
             sycl::nd_range<2>(sycl::range<2>(1, 1), sycl::range<2>(1, 1)),
             [=](sycl::nd_item<2> item) {
+              const auto& functor          = functor_wrapper.get_functor();
               const auto& selected_reducer = ReducerConditional::select(
                   static_cast<const FunctorType&>(functor),
-                  static_cast<const ReducerType&>(reducer));
+                  static_cast<const ReducerType&>(
+                      reducer_wrapper.get_functor()));
               reference_type update =
                   ValueInit::init(selected_reducer, results_ptr);
               if (size == 1) {
                 const member_type team_member(
                     team_scratch_memory_L0.get_pointer(), shmem_begin,
-                    scratch_size[0], static_cast<char*>(scratch_ptr[1]),
-                    scratch_size[1], item);
+                    scratch_size[0], scratch_ptr[1], scratch_size[1], item);
                 if constexpr (std::is_same<WorkTag, void>::value)
                   functor(team_member, update);
                 else
@@ -636,22 +673,18 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
                                &results_ptr[0]);
             });
       });
-      q.submit_barrier(std::vector<sycl::event>{parallel_reduce_event});
+      q.ext_oneapi_submit_barrier(
+          std::vector<sycl::event>{parallel_reduce_event});
       last_reduction_event = parallel_reduce_event;
-    }
-
-    // Otherwise, we perform a reduction on the values in all workgroups
-    // separately, write the workgroup results back to global memory and recurse
-    // until only one workgroup does the reduction and thus gets the final
-    // value.
-    bool first_run = true;
-    while (size > 1) {
-      auto n_wgroups             = (size + wgroup_size - 1) / wgroup_size;
+    } else {
+      // Otherwise, (if the total range has more than one element) we perform a
+      // reduction on the values in all workgroups separately, write the
+      // workgroup results back to global memory and recurse until only one
+      // workgroup does the reduction and thus gets the final value.
       auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) {
-        sycl::accessor<value_type, 1, sycl::access::mode::read_write,
-                       sycl::access::target::local>
-            local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u),
-                      cgh);
+        auto scratch_flags = static_cast<unsigned int*>(
+            instance.scratch_flags(sizeof(unsigned int)));
+
         // FIXME_SYCL accessors seem to need a size greater than zero at least
         // for host queues
         sycl::accessor<char, 1, sycl::access::mode::read_write,
@@ -663,71 +696,185 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         // Avoid capturing *this since it might not be trivially copyable
         const auto shmem_begin     = m_shmem_begin;
         const int scratch_size[2]  = {m_scratch_size[0], m_scratch_size[1]};
-        void* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]};
+        char* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]};
+
+        auto team_reduction_factory = [&](sycl::accessor<
+                                              value_type, 1,
+                                              sycl::access::mode::read_write,
+                                              sycl::access::target::local>
+                                              local_mem,
+                                          value_type* results_ptr) mutable {
+          value_type* device_accessible_result_ptr =
+              m_result_ptr_device_accessible ? m_result_ptr : nullptr;
+          auto lambda = [=](sycl::nd_item<2> item) {
+            auto n_wgroups =
+                item.get_group_range()[0] * item.get_group_range()[1];
+            auto wgroup_size =
+                item.get_local_range()[0] * item.get_local_range()[1];
+            auto size = n_wgroups * wgroup_size;
+
+            auto& num_teams_done = reinterpret_cast<unsigned int&>(
+                local_mem[wgroup_size * std::max(value_count, 1u)]);
+            const auto local_id          = item.get_local_linear_id();
+            const auto& functor          = functor_wrapper.get_functor();
+            const auto& selected_reducer = ReducerConditional::select(
+                static_cast<const FunctorType&>(functor),
+                static_cast<const ReducerType&>(reducer_wrapper.get_functor()));
+
+            if constexpr (FunctorValueTraits<ReducerTypeFwd,
+                                             WorkTagFwd>::StaticValueSize ==
+                          0) {
+              reference_type update = ValueInit::init(
+                  selected_reducer, &local_mem[local_id * value_count]);
+              const member_type team_member(
+                  team_scratch_memory_L0.get_pointer(), shmem_begin,
+                  scratch_size[0],
+                  scratch_ptr[1] + item.get_group(1) * scratch_size[1],
+                  scratch_size[1], item);
+              if constexpr (std::is_same<WorkTag, void>::value)
+                functor(team_member, update);
+              else
+                functor(WorkTag(), team_member, update);
+              item.barrier(sycl::access::fence_space::local_space);
 
-        cgh.parallel_for(
-            sycl::nd_range<2>(
-                sycl::range<2>(m_team_size, m_league_size * m_vector_size),
-                sycl::range<2>(m_team_size, m_vector_size)),
-            [=](sycl::nd_item<2> item) {
-#ifdef KOKKOS_ENABLE_DEBUG
-              if (first_run && item.get_sub_group().get_local_range() %
-                                       item.get_local_range(1) !=
-                                   0)
-                Kokkos::abort(
-                    "The sub_group size is not divisible by the vector_size. "
-                    "Choose a smaller vector_size!");
-#endif
-              const auto local_id = item.get_local_linear_id();
-              const auto global_id =
-                  wgroup_size * item.get_group_linear_id() + local_id;
-              const auto& selected_reducer = ReducerConditional::select(
-                  static_cast<const FunctorType&>(functor),
-                  static_cast<const ReducerType&>(reducer));
-
-              // In the first iteration, we call functor to initialize the local
-              // memory. Otherwise, the local memory is initialized with the
-              // results from the previous iteration that are stored in global
-              // memory.
-              if (first_run) {
-                reference_type update = ValueInit::init(
-                    selected_reducer, &local_mem[local_id * value_count]);
-                const member_type team_member(
-                    team_scratch_memory_L0.get_pointer(), shmem_begin,
-                    scratch_size[0],
-                    static_cast<char*>(scratch_ptr[1]) +
-                        item.get_group(1) * scratch_size[1],
-                    scratch_size[1], item);
-                if constexpr (std::is_same<WorkTag, void>::value)
-                  functor(team_member, update);
-                else
-                  functor(WorkTag(), team_member, update);
-              } else {
-                if (global_id >= size)
+              SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, WorkTag>(
+                  item, local_mem.get_pointer(), results_ptr,
+                  device_accessible_result_ptr, value_count, selected_reducer,
+                  static_cast<const FunctorType&>(functor), false,
+                  std::min<std::size_t>(size, item.get_local_range()[0] *
+                                                  item.get_local_range()[1]));
+
+              if (local_id == 0) {
+                sycl::ext::oneapi::atomic_ref<
+                    unsigned, sycl::ext::oneapi::memory_order::relaxed,
+                    sycl::ext::oneapi::memory_scope::device,
+                    sycl::access::address_space::global_space>
+                    scratch_flags_ref(*scratch_flags);
+                num_teams_done = ++scratch_flags_ref;
+              }
+              sycl::group_barrier(item.get_group());
+              if (num_teams_done == n_wgroups) {
+                if (local_id >= n_wgroups)
                   ValueInit::init(selected_reducer,
                                   &local_mem[local_id * value_count]);
                 else {
                   ValueOps::copy(functor, &local_mem[local_id * value_count],
-                                 &results_ptr[global_id * value_count]);
+                                 &results_ptr[local_id * value_count]);
+                  for (unsigned int id = local_id + wgroup_size; id < n_wgroups;
+                       id += wgroup_size) {
+                    ValueJoin::join(selected_reducer,
+                                    &local_mem[local_id * value_count],
+                                    &results_ptr[id * value_count]);
+                  }
                 }
+
+                SYCLReduction::workgroup_reduction<ValueJoin, ValueOps,
+                                                   WorkTag>(
+                    item, local_mem.get_pointer(), results_ptr,
+                    device_accessible_result_ptr, value_count, selected_reducer,
+                    static_cast<const FunctorType&>(functor), true,
+                    std::min(n_wgroups, item.get_local_range()[0] *
+                                            item.get_local_range()[1]));
+              }
+            } else {
+              value_type local_value;
+              reference_type update =
+                  ValueInit::init(selected_reducer, &local_value);
+              const member_type team_member(
+                  team_scratch_memory_L0.get_pointer(), shmem_begin,
+                  scratch_size[0],
+                  scratch_ptr[1] + item.get_group(1) * scratch_size[1],
+                  scratch_size[1], item);
+              if constexpr (std::is_same<WorkTag, void>::value)
+                functor(team_member, update);
+              else
+                functor(WorkTag(), team_member, update);
+
+              SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>(
+                  item, local_mem.get_pointer(), local_value, results_ptr,
+                  device_accessible_result_ptr, selected_reducer,
+                  static_cast<const FunctorType&>(functor), false,
+                  std::min<std::size_t>(size, item.get_local_range()[0] *
+                                                  item.get_local_range()[1]));
+
+              if (local_id == 0) {
+                sycl::ext::oneapi::atomic_ref<
+                    unsigned, sycl::ext::oneapi::memory_order::relaxed,
+                    sycl::ext::oneapi::memory_scope::device,
+                    sycl::access::address_space::global_space>
+                    scratch_flags_ref(*scratch_flags);
+                num_teams_done = ++scratch_flags_ref;
               }
               item.barrier(sycl::access::fence_space::local_space);
+              if (num_teams_done == n_wgroups) {
+                if (local_id >= n_wgroups)
+                  ValueInit::init(selected_reducer, &local_value);
+                else {
+                  local_value = results_ptr[local_id];
+                  for (unsigned int id = local_id + wgroup_size; id < n_wgroups;
+                       id += wgroup_size) {
+                    ValueJoin::join(selected_reducer, &local_value,
+                                    &results_ptr[id]);
+                  }
+                }
 
-              SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, WorkTag>(
-                  item, local_mem.get_pointer(), results_ptr,
-                  device_accessible_result_ptr, value_count, selected_reducer,
-                  static_cast<const FunctorType&>(functor),
-                  n_wgroups <= 1 && item.get_group_linear_id() == 0);
+                SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>(
+                    item, local_mem.get_pointer(), local_value, results_ptr,
+                    device_accessible_result_ptr, selected_reducer,
+                    static_cast<const FunctorType&>(functor), true,
+                    std::min(n_wgroups, item.get_local_range()[0] *
+                                            item.get_local_range()[1]));
+              }
+            }
+          };
+          return lambda;
+        };
+
+        auto dummy_reduction_lambda = team_reduction_factory({1, cgh}, nullptr);
+
+        static sycl::kernel kernel = [&] {
+          sycl::kernel_id functor_kernel_id =
+              sycl::get_kernel_id<decltype(dummy_reduction_lambda)>();
+          auto kernel_bundle =
+              sycl::get_kernel_bundle<sycl::bundle_state::executable>(
+                  q.get_context(), std::vector{functor_kernel_id});
+          return kernel_bundle.get_kernel(functor_kernel_id);
+        }();
+        auto max_sg_size = kernel.get_info<
+            sycl::info::kernel_device_specific::max_sub_group_size>(
+            q.get_device(), sycl::range<3>(m_team_size, m_vector_size, 1));
+        auto final_vector_size = std::min<int>(m_vector_size, max_sg_size);
+        // FIXME_SYCL For some reason, explicitly enforcing the kernel bundle to
+        // be used gives a runtime error.
+
+        //     cgh.use_kernel_bundle(kernel_bundle);
+
+        auto wgroup_size = m_team_size * final_vector_size;
+        std::size_t size = std::size_t(m_league_size) * wgroup_size;
+        sycl::accessor<value_type, 1, sycl::access::mode::read_write,
+                       sycl::access::target::local>
+            local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u) +
+                          (sizeof(unsigned int) + sizeof(value_type) - 1) /
+                              sizeof(value_type),
+                      cgh);
 
-              // FIXME_SYCL not quite sure why this is necessary
-              item.barrier(sycl::access::fence_space::global_space);
-            });
-      });
-      q.submit_barrier(std::vector<sycl::event>{parallel_reduce_event});
-      last_reduction_event = parallel_reduce_event;
+        const auto init_size =
+            std::max<std::size_t>((size + wgroup_size - 1) / wgroup_size, 1);
+        results_ptr = static_cast<pointer_type>(instance.scratch_space(
+            sizeof(value_type) * std::max(value_count, 1u) * init_size));
 
-      first_run = false;
-      size      = n_wgroups;
+        auto reduction_lambda = team_reduction_factory(local_mem, results_ptr);
+
+        cgh.depends_on(memcpy_events);
+
+        cgh.parallel_for(
+            sycl::nd_range<2>(
+                sycl::range<2>(m_team_size, m_league_size * m_vector_size),
+                sycl::range<2>(m_team_size, m_vector_size)),
+            reduction_lambda);
+      });
+      last_reduction_event       = q.ext_oneapi_submit_barrier(
+          std::vector<sycl::event>{parallel_reduce_event});
     }
 
     // At this point, the reduced value is written to the entry in results_ptr
@@ -752,24 +899,27 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
         *m_policy.space().impl_internal_space_instance();
     using IndirectKernelMem =
         Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem;
-    IndirectKernelMem& indirectKernelMem  = instance.m_indirectKernelMem;
-    IndirectKernelMem& indirectReducerMem = instance.m_indirectReducerMem;
+    IndirectKernelMem& indirectKernelMem  = instance.get_indirect_kernel_mem();
+    IndirectKernelMem& indirectReducerMem = instance.get_indirect_kernel_mem();
 
-    const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_functor, indirectKernelMem);
-    const auto reducer_wrapper = Experimental::Impl::make_sycl_function_wrapper(
+    auto reducer_wrapper = Experimental::Impl::make_sycl_function_wrapper(
         m_reducer, indirectReducerMem);
 
     sycl::event event = sycl_direct_launch(
-        m_policy, functor_wrapper.get_functor(), reducer_wrapper.get_functor());
-    functor_wrapper.register_event(indirectKernelMem, event);
-    reducer_wrapper.register_event(indirectReducerMem, event);
+        m_policy, functor_wrapper, reducer_wrapper,
+        {functor_wrapper.get_copy_event(), reducer_wrapper.get_copy_event()});
+    functor_wrapper.register_event(event);
+    reducer_wrapper.register_event(event);
   }
 
  private:
   void initialize() {
     // FIXME_SYCL optimize
-    if (m_team_size < 0) m_team_size = 32;
+    if (m_team_size < 0)
+      m_team_size =
+          m_policy.team_size_recommended(m_functor, ParallelReduceTag{});
     // Must be a power of two greater than two, get the one not bigger than the
     // requested one.
     if ((m_team_size & m_team_size - 1) || m_team_size < 2) {
@@ -790,8 +940,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
     // upon team size.
     auto& space      = *m_policy.space().impl_internal_space_instance();
     m_scratch_ptr[0] = nullptr;
-    m_scratch_ptr[1] = space.resize_team_scratch_space(
-        static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size);
+    m_scratch_ptr[1] = static_cast<char*>(space.resize_team_scratch_space(
+        static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size));
 
     if (static_cast<int>(space.m_maxShmemPerBlock) <
         m_shmem_size - m_shmem_begin) {
@@ -803,7 +953,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
       Kokkos::Impl::throw_runtime_exception(out.str());
     }
 
-    if (m_team_size > m_policy.team_size_max(m_functor, ParallelForTag{}))
+    if (m_team_size > m_policy.team_size_max(m_functor, ParallelReduceTag{}))
       Kokkos::Impl::throw_runtime_exception(
           "Kokkos::Impl::ParallelFor<SYCL> requested too large team size.");
   }
@@ -823,7 +973,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
                               typename ViewType::memory_space>::accessible),
         m_league_size(arg_policy.league_size()),
         m_team_size(arg_policy.team_size()),
-        m_vector_size(arg_policy.impl_vector_length()) {
+        m_vector_size(arg_policy.impl_vector_length()),
+        m_scratch_lock(arg_policy.space()
+                           .impl_internal_space_instance()
+                           ->m_team_scratch_mutex) {
     initialize();
   }
 
@@ -839,7 +992,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
                                   memory_space>::accessible),
         m_league_size(arg_policy.league_size()),
         m_team_size(arg_policy.team_size()),
-        m_vector_size(arg_policy.impl_vector_length()) {
+        m_vector_size(arg_policy.impl_vector_length()),
+        m_scratch_lock(arg_policy.space()
+                           .impl_internal_space_instance()
+                           ->m_team_scratch_mutex) {
     initialize();
   }
 };
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp
index 6ec6204e711586b0d88d6882955d21bf830a5327..f9b34dca2fa28f4aa3cc59aad720924b380486e8 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp
@@ -58,14 +58,15 @@ namespace Kokkos {
 namespace Impl {
 
 void DeepCopySYCL(void* dst, const void* src, size_t n) {
-  Experimental::SYCL().fence("Kokkos::Impl::DeepCopySYCL: fence before memcpy");
   Experimental::Impl::SYCLInternal::singleton().m_queue->memcpy(dst, src, n);
-  Experimental::SYCL().fence("Kokkos::Impl::DeepCopySYCL: fence after memcpy");
 }
 
 void DeepCopyAsyncSYCL(const Kokkos::Experimental::SYCL& instance, void* dst,
                        const void* src, size_t n) {
-  instance.impl_internal_space_instance()->m_queue->memcpy(dst, src, n);
+  auto event =
+      instance.impl_internal_space_instance()->m_queue->memcpy(dst, src, n);
+  instance.impl_internal_space_instance()->m_queue->ext_oneapi_submit_barrier(
+      std::vector<sycl::event>{event});
 }
 
 void DeepCopyAsyncSYCL(void* dst, const void* src, size_t n) {
@@ -244,15 +245,20 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>::
                                   void>::s_root_record,
 #endif
           Kokkos::Impl::checked_allocation_with_header(space, label, size),
-          sizeof(SharedAllocationHeader) + size, dealloc),
+          sizeof(SharedAllocationHeader) + size, dealloc, label),
       m_space(space) {
   SharedAllocationHeader header;
 
   this->base_t::_fill_host_accessible_header_info(header, label);
 
   // Copy to device memory
+  Kokkos::Experimental::SYCL exec;
   Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, HostSpace>(
-      RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
+      exec, RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
+  exec.fence(
+      "SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, "
+      "void>::SharedAllocationRecord(): fence after copying header from "
+      "HostSpace");
 }
 
 SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, void>::
@@ -269,7 +275,8 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, void>::
 #endif
           Impl::checked_allocation_with_header(arg_space, arg_label,
                                                arg_alloc_size),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_space(arg_space) {
 
   this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
@@ -290,7 +297,8 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLHostUSMSpace, void>::
 #endif
           Impl::checked_allocation_with_header(arg_space, arg_label,
                                                arg_alloc_size),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_space(arg_space) {
 
   this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
@@ -308,38 +316,25 @@ namespace Impl {
 
 SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace,
                        void>::~SharedAllocationRecord() {
-  const char* label = nullptr;
-  if (Kokkos::Profiling::profileLibraryLoaded()) {
-    SharedAllocationHeader header;
-    Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace,
-                           Kokkos::HostSpace>(&header, RecordBase::m_alloc_ptr,
-                                              sizeof(SharedAllocationHeader));
-    label = header.label();
-  }
   const auto alloc_size = SharedAllocationRecord<void, void>::m_alloc_size;
-  m_space.deallocate(label, SharedAllocationRecord<void, void>::m_alloc_ptr,
+  m_space.deallocate(m_label.c_str(),
+                     SharedAllocationRecord<void, void>::m_alloc_ptr,
                      alloc_size, alloc_size - sizeof(SharedAllocationHeader));
 }
 
 SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace,
                        void>::~SharedAllocationRecord() {
-  const char* label = nullptr;
-  if (Kokkos::Profiling::profileLibraryLoaded()) {
-    label = RecordBase::m_alloc_ptr->m_label;
-  }
   const auto alloc_size = SharedAllocationRecord<void, void>::m_alloc_size;
-  m_space.deallocate(label, SharedAllocationRecord<void, void>::m_alloc_ptr,
+  m_space.deallocate(m_label.c_str(),
+                     SharedAllocationRecord<void, void>::m_alloc_ptr,
                      alloc_size, alloc_size - sizeof(SharedAllocationHeader));
 }
 
 SharedAllocationRecord<Kokkos::Experimental::SYCLHostUSMSpace,
                        void>::~SharedAllocationRecord() {
-  const char* label = nullptr;
-  if (Kokkos::Profiling::profileLibraryLoaded()) {
-    label = RecordBase::m_alloc_ptr->m_label;
-  }
   const auto alloc_size = SharedAllocationRecord<void, void>::m_alloc_size;
-  m_space.deallocate(label, SharedAllocationRecord<void, void>::m_alloc_ptr,
+  m_space.deallocate(m_label.c_str(),
+                     SharedAllocationRecord<void, void>::m_alloc_ptr,
                      alloc_size, alloc_size - sizeof(SharedAllocationHeader));
 }
 
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp
index c405ad31a5fb6d9bb7abee273b9ff10c474b134c..bda2dfd0aa9384e51462012418336cb3c629acb5 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp
@@ -100,7 +100,9 @@ class SYCLTeamMember {
   KOKKOS_INLINE_FUNCTION int team_size() const {
     return m_item.get_local_range(0);
   }
-  KOKKOS_INLINE_FUNCTION void team_barrier() const { m_item.barrier(); }
+  KOKKOS_INLINE_FUNCTION void team_barrier() const {
+    sycl::group_barrier(m_item.get_group());
+  }
 
   KOKKOS_INLINE_FUNCTION const sycl::nd_item<2>& item() const { return m_item; }
 
@@ -119,13 +121,13 @@ class SYCLTeamMember {
   KOKKOS_INLINE_FUNCTION std::enable_if_t<!std::is_arithmetic_v<ValueType>>
   team_broadcast(ValueType& val, const int thread_id) const {
     // Wait for shared data write until all threads arrive here
-    m_item.barrier(sycl::access::fence_space::local_space);
+    sycl::group_barrier(m_item.get_group());
     if (m_item.get_local_id(1) == 0 &&
         static_cast<int>(m_item.get_local_id(0)) == thread_id) {
       *static_cast<ValueType*>(m_team_reduce) = val;
     }
     // Wait for shared data read until root thread writes
-    m_item.barrier(sycl::access::fence_space::local_space);
+    sycl::group_barrier(m_item.get_group());
     val = *(static_cast<ValueType*>(m_team_reduce));
   }
 
@@ -153,78 +155,73 @@ class SYCLTeamMember {
                   typename ReducerType::value_type& value) const noexcept {
     using value_type = typename ReducerType::value_type;
 
+    auto sg                       = m_item.get_sub_group();
+    const auto sub_group_range    = sg.get_local_range()[0];
+    const auto vector_range       = m_item.get_local_range(1);
+    const unsigned int team_size_ = team_size();
+    const unsigned int team_rank_ = team_rank();
+
+    // First combine the values in the same subgroup
+    for (unsigned int shift = 1; vector_range * shift < sub_group_range;
+         shift <<= 1) {
+      const value_type tmp = sg.shuffle_down(value, vector_range * shift);
+      if (team_rank_ + shift < team_size_) reducer.join(value, tmp);
+    }
+    value = sg.shuffle(value, 0);
+
     // We need to chunk up the whole reduction because we might not have
     // allocated enough memory.
-    const int maximum_work_range =
-        std::min<int>(m_team_reduce_size / sizeof(value_type), team_size());
-
-    int smaller_power_of_two = 1;
-    while ((smaller_power_of_two << 1) < maximum_work_range)
-      smaller_power_of_two <<= 1;
+    const auto n_subgroups = sg.get_group_range()[0];
+    const unsigned int maximum_work_range =
+        std::min<int>(m_team_reduce_size / sizeof(value_type), n_subgroups);
 
-    const int idx        = team_rank();
+    const auto id_in_sg  = sg.get_local_id()[0];
     auto reduction_array = static_cast<value_type*>(m_team_reduce);
 
     // Load values into the first maximum_work_range values of the reduction
-    // array in chunks. This means that only threads with an id in the
-    // corresponding chunk load values and the reduction is always done by the
-    // first smaller_power_of_two threads.
-    if (idx < maximum_work_range) reduction_array[idx] = value;
-    m_item.barrier(sycl::access::fence_space::local_space);
-
-    for (int start = maximum_work_range; start < team_size();
+    // array in chunks. This means that only sub groups with an id in the
+    // corresponding chunk load values.
+    const auto group_id = sg.get_group_id()[0];
+    if (id_in_sg == 0 && group_id < maximum_work_range)
+      reduction_array[group_id] = value;
+    sycl::group_barrier(m_item.get_group());
+
+    for (unsigned int start = maximum_work_range; start < n_subgroups;
          start += maximum_work_range) {
-      if (idx >= start &&
-          idx < std::min(start + maximum_work_range, team_size()))
-        reducer.join(reduction_array[idx - start], value);
-      m_item.barrier(sycl::access::fence_space::local_space);
-    }
-
-    for (int stride = smaller_power_of_two; stride > 0; stride >>= 1) {
-      if (idx < stride && idx + stride < maximum_work_range)
-        reducer.join(reduction_array[idx], reduction_array[idx + stride]);
-      m_item.barrier(sycl::access::fence_space::local_space);
-    }
-    reducer.reference() = reduction_array[0];
-    m_item.barrier(sycl::access::fence_space::local_space);
-  }
-
-  // FIXME_SYCL move somewhere else and combine with other places that do
-  // parallel_scan
-  // Exclusive scan returning the total sum.
-  // n is required to be a power of two and
-  // temp must point to an array containing the data to be processed
-  // The accumulated value is returned.
-  template <typename Type>
-  static Type prescan(sycl::nd_item<2> m_item, Type* temp, int n) {
-    int thid = m_item.get_local_id(0);
-
-    // First do a reduction saving intermediate results
-    for (int stride = 1; stride < n; stride <<= 1) {
-      auto idx = 2 * stride * (thid + 1) - 1;
-      if (idx < n) temp[idx] += temp[idx - stride];
-      m_item.barrier(sycl::access::fence_space::local_space);
+      if (id_in_sg == 0 && group_id >= start &&
+          group_id <
+              std::min<unsigned int>(start + maximum_work_range, n_subgroups))
+        reducer.join(reduction_array[group_id - start], value);
+      sycl::group_barrier(m_item.get_group());
     }
 
-    Type total_sum = temp[n - 1];
-    m_item.barrier(sycl::access::fence_space::local_space);
-
-    // clear the last element so we get an exclusive scan
-    if (thid == 0) temp[n - 1] = Type{};
-    m_item.barrier(sycl::access::fence_space::local_space);
-
-    // Now add the intermediate results to the remaining items again
-    for (int stride = n / 2; stride > 0; stride >>= 1) {
-      auto idx = 2 * stride * (thid + 1) - 1;
-      if (idx < n) {
-        Type dummy         = temp[idx - stride];
-        temp[idx - stride] = temp[idx];
-        temp[idx] += dummy;
+    // Let the first subgroup do the final reduction
+    if (group_id == 0) {
+      const auto local_range = sg.get_local_range()[0];
+      auto result =
+          reduction_array[id_in_sg < maximum_work_range ? id_in_sg : 0];
+      // In case the maximum_work_range is larger than the range of the first
+      // subgroup, we first combine the items with a higher index.
+      for (unsigned int offset = local_range; offset < maximum_work_range;
+           offset += local_range)
+        if (id_in_sg + offset < maximum_work_range)
+          reducer.join(result, reduction_array[id_in_sg + offset]);
+      sycl::group_barrier(sg);
+
+      // Now do the actual subgroup reduction.
+      const auto min_range =
+          std::min<unsigned int>(maximum_work_range, local_range);
+      for (unsigned int stride = 1; stride < min_range; stride <<= 1) {
+        const auto tmp = sg.shuffle_down(result, stride);
+        if (id_in_sg + stride < min_range) reducer.join(result, tmp);
       }
-      m_item.barrier(sycl::access::fence_space::local_space);
+      if (id_in_sg == 0) reduction_array[0] = result;
     }
+    sycl::group_barrier(m_item.get_group());
 
-    return total_sum;
+    reducer.reference() = reduction_array[0];
+    // Make sure that the reduction array hasn't been modified in the meantime.
+    m_item.barrier(sycl::access::fence_space::local_space);
   }
 
   //--------------------------------------------------------------------------
@@ -238,52 +235,75 @@ class SYCLTeamMember {
    *  non-deterministic.
    */
   template <typename Type>
-  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value,
+  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& input_value,
                                         Type* const global_accum) const {
-    // We need to chunk up the whole reduction because we might not have
-    // allocated enough memory.
-    const int maximum_work_range =
-        std::min<int>(m_team_reduce_size / sizeof(Type), team_size());
-
-    int not_greater_power_of_two = 1;
-    while ((not_greater_power_of_two << 1) < maximum_work_range + 1)
-      not_greater_power_of_two <<= 1;
-
-    Type intermediate;
-    Type total{};
-
-    const int idx        = team_rank();
-    const auto base_data = static_cast<Type*>(m_team_reduce);
-
-    // Load values into the first not_greater_power_of_two values of the
-    // reduction array in chunks. This means that only threads with an id in the
-    // corresponding chunk load values and the reduction is always done by the
-    // first not_greater_power_of_two threads.
-    for (int start = 0; start < team_size();
-         start += not_greater_power_of_two) {
-      m_item.barrier(sycl::access::fence_space::local_space);
-      if (idx >= start && idx < start + not_greater_power_of_two) {
-        base_data[idx - start] = value;
+    Type value                 = input_value;
+    auto sg                    = m_item.get_sub_group();
+    const auto sub_group_range = sg.get_local_range()[0];
+    const auto vector_range    = m_item.get_local_range(1);
+    const auto id_in_sg        = sg.get_local_id()[0];
+
+    // First combine the values in the same subgroup
+    for (unsigned int stride = 1; vector_range * stride < sub_group_range;
+         stride <<= 1) {
+      auto tmp = sg.shuffle_up(value, vector_range * stride);
+      if (id_in_sg >= vector_range * stride) value += tmp;
+    }
+
+    const auto n_active_subgroups = sg.get_group_range()[0];
+    const auto base_data          = static_cast<Type*>(m_team_reduce);
+    if (static_cast<int>(n_active_subgroups * sizeof(Type)) >
+        m_team_reduce_size)
+      Kokkos::abort("Not implemented!");
+
+    const auto group_id = sg.get_group_id()[0];
+    if (id_in_sg == sub_group_range - 1) base_data[group_id] = value;
+    sycl::group_barrier(m_item.get_group());
+
+    // scan subgroup results using the first subgroup
+    if (n_active_subgroups > 1) {
+      if (group_id == 0) {
+        const auto n_rounds =
+            (n_active_subgroups + sub_group_range - 1) / sub_group_range;
+        for (unsigned int round = 0; round < n_rounds; ++round) {
+          const auto idx         = id_in_sg + round * sub_group_range;
+          const auto upper_bound = std::min(
+              sub_group_range, n_active_subgroups - round * sub_group_range);
+          auto local_value = base_data[idx];
+          for (unsigned int stride = 1; stride < upper_bound; stride <<= 1) {
+            auto tmp = sg.shuffle_up(local_value, stride);
+            if (id_in_sg >= stride) {
+              if (idx < n_active_subgroups)
+                local_value += tmp;
+              else
+                local_value = tmp;
+            }
+          }
+          base_data[idx] = local_value;
+          if (round > 0)
+            base_data[idx] += base_data[round * sub_group_range - 1];
+          if (round + 1 < n_rounds) sycl::group_barrier(sg);
+        }
       }
-      m_item.barrier(sycl::access::fence_space::local_space);
-
-      const Type partial_total =
-          prescan(m_item, base_data, not_greater_power_of_two);
-      if (idx >= start && idx < start + not_greater_power_of_two)
-        intermediate = base_data[idx - start] + total;
-      if (start == 0)
-        total = partial_total;
-      else
-        total += partial_total;
+      sycl::group_barrier(m_item.get_group());
     }
+    auto total = base_data[n_active_subgroups - 1];
+
+    const auto update = sg.shuffle_up(value, vector_range);
+    Type intermediate = (group_id > 0 ? base_data[group_id - 1] : 0) +
+                        (id_in_sg >= vector_range ? update : 0);
 
     if (global_accum) {
-      if (team_size() == idx + 1) {
-        base_data[team_size()] = atomic_fetch_add(global_accum, total);
+      if (id_in_sg == sub_group_range - 1 &&
+          group_id == n_active_subgroups - 1) {
+        base_data[n_active_subgroups - 1] =
+            atomic_fetch_add(global_accum, total);
       }
-      m_item.barrier();  // Wait for atomic
-      intermediate += base_data[team_size()];
+      sycl::group_barrier(m_item.get_group());  // Wait for atomic
+      intermediate += base_data[n_active_subgroups - 1];
     }
+    // Make sure that the reduction array hasn't been modified in the meantime.
+    m_item.barrier(sycl::access::fence_space::local_space);
 
     return intermediate;
   }
@@ -701,7 +721,13 @@ KOKKOS_INLINE_FUNCTION void parallel_for(
        i += grange1)
     closure(i);
 
-  loop_boundaries.member.item().get_sub_group().barrier();
+  // FIXME_SYCL We only should fence active threads here but this not yet
+  // available in the compiler. We need https://github.com/intel/llvm/pull/4904
+  // or https://github.com/intel/llvm/pull/4903 for that. The current
+  // implementation leads to a deadlock only for SYCL+CUDA if not all threads in
+  // a subgroup see this barrier. For SYCL on Intel GPUs, the subgroup barrier
+  // is essentially a no-op (only a memory fence), though.
+  sycl::group_barrier(loop_boundaries.member.item().get_sub_group());
 }
 
 //----------------------------------------------------------------------------
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_UniqueToken.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_UniqueToken.hpp
index d2820b3b3a34cdb933c4615260a73e1b82e7de34..82bfae46f590165823dfe74b5487dd32ea3a54f7 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_UniqueToken.hpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_UniqueToken.hpp
@@ -52,20 +52,25 @@
 namespace Kokkos {
 namespace Experimental {
 
+namespace Impl {
+Kokkos::View<uint32_t*, SYCLDeviceUSMSpace> sycl_global_unique_token_locks(
+    bool deallocate = false);
+}
+
 // both global and instance Unique Tokens are implemented in the same way
+// the global version has one shared static lock array underneath
+// but it can't be a static member variable since we need to acces it on device
+// and we share the implementation with the instance version
 template <>
 class UniqueToken<SYCL, UniqueTokenScope::Global> {
- protected:
-  uint32_t volatile* m_buffer;
-  uint32_t m_count;
+  Kokkos::View<uint32_t*, SYCLDeviceUSMSpace> m_locks;
 
  public:
   using execution_space = SYCL;
   using size_type       = int32_t;
 
   explicit UniqueToken(execution_space const& = execution_space())
-      : m_buffer(Impl::SYCLInternal::singleton().m_scratchConcurrentBitset),
-        m_count(SYCL::concurrency()) {}
+      : m_locks(Impl::sycl_global_unique_token_locks()) {}
 
   KOKKOS_DEFAULTED_FUNCTION
   UniqueToken(const UniqueToken&) = default;
@@ -81,51 +86,89 @@ class UniqueToken<SYCL, UniqueTokenScope::Global> {
 
   /// \brief upper bound for acquired values, i.e. 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
-  size_type size() const noexcept { return m_count; }
+  size_type size() const noexcept { return m_locks.extent(0); }
+
+ protected:
+  // Constructors for the Instance version
+  UniqueToken(size_type max_size)
+      : m_locks(Kokkos::View<uint32_t*, SYCLDeviceUSMSpace>(
+            "Kokkos::UniqueToken::m_locks", max_size)) {}
+
+  UniqueToken(size_type max_size, execution_space const& arg)
+      : m_locks(Kokkos::View<uint32_t*, SYCLDeviceUSMSpace>(
+            Kokkos::view_alloc(arg, "Kokkos::UniqueToken::m_locks"),
+            max_size)) {}
 
+ private:
   /// \brief acquire value such that 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
-  size_type acquire() const {
-    const Kokkos::pair<int, int> result =
-        Kokkos::Impl::concurrent_bitset::acquire_bounded(
-            m_buffer, m_count
-#ifdef KOKKOS_ARCH_INTEL_GPU
-            ,
-            Kokkos::Impl::clock_tic() % m_count
-#endif
-        );
-
-    if (result.first < 0) {
-      Kokkos::abort(
-          "UniqueToken<SYCL> failure to acquire tokens, no tokens available");
+  size_type impl_acquire() const {
+    auto item = sycl::ext::oneapi::experimental::this_nd_item<3>();
+    std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1),
+                                item.get_local_id(0)};
+    std::size_t blockIdx[3]  = {item.get_group(2), item.get_group(1),
+                               item.get_group(0)};
+    std::size_t blockDim[3] = {item.get_local_range(2), item.get_local_range(1),
+                               item.get_local_range(0)};
+
+    int idx = blockIdx[0] * (blockDim[0] * blockDim[1]) +
+              threadIdx[1] * blockDim[0] + threadIdx[0];
+    idx %= size();
+
+    while (Kokkos::atomic_compare_exchange(&m_locks(idx), 0, 1) == 1) {
+      idx += blockDim[1] * blockDim[0] + 1;
+      idx %= size();
     }
 
-    return result.first;
+    // Make sure that all writes in the previous lock owner are visible to me
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    desul::atomic_thread_fence(desul::MemoryOrderAcquire(),
+                               desul::MemoryScopeDevice());
+#else
+    Kokkos::memory_fence();
+#endif
+    return idx;
+  }
+
+ public:
+  /// \brief acquire value such that 0 <= value < size()
+  KOKKOS_INLINE_FUNCTION
+  size_type acquire() const {
+    KOKKOS_IF_ON_DEVICE(return impl_acquire();)
+    KOKKOS_IF_ON_HOST(return 0;)
   }
 
   /// \brief release an acquired value
   KOKKOS_INLINE_FUNCTION
-  void release(size_type i) const noexcept {
-    Kokkos::Impl::concurrent_bitset::release(m_buffer, i);
+  void release(size_type idx) const noexcept {
+    // Make sure my writes are visible to the next lock owner
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    desul::atomic_thread_fence(desul::MemoryOrderRelease(),
+                               desul::MemoryScopeDevice());
+#else
+    Kokkos::memory_fence();
+#endif
+    (void)Kokkos::atomic_exchange(&m_locks(idx), 0);
   }
 };
 
 template <>
 class UniqueToken<SYCL, UniqueTokenScope::Instance>
     : public UniqueToken<SYCL, UniqueTokenScope::Global> {
-  View<uint32_t*, SYCLDeviceUSMSpace> m_buffer_view;
-
  public:
-  explicit UniqueToken(execution_space const& arg = execution_space())
-      : UniqueToken<SYCL, UniqueTokenScope::Global>(arg) {}
-
-  UniqueToken(size_type max_size, execution_space const& = execution_space())
-      : m_buffer_view(
-            "UniqueToken::m_buffer_view",
-            ::Kokkos::Impl::concurrent_bitset::buffer_bound(max_size)) {
-    m_buffer = m_buffer_view.data();
-    m_count  = max_size;
-  }
+  UniqueToken()
+      : UniqueToken<SYCL, UniqueTokenScope::Global>(
+            Kokkos::Experimental::SYCL().concurrency()) {}
+
+  explicit UniqueToken(execution_space const& arg)
+      : UniqueToken<SYCL, UniqueTokenScope::Global>(
+            Kokkos::Experimental::SYCL().concurrency(), arg) {}
+
+  explicit UniqueToken(size_type max_size)
+      : UniqueToken<SYCL, UniqueTokenScope::Global>(max_size) {}
+
+  UniqueToken(size_type max_size, execution_space const& arg)
+      : UniqueToken<SYCL, UniqueTokenScope::Global>(max_size, arg) {}
 };
 
 }  // namespace Experimental
diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
index 18ef97ae4650ff50e4ea4a51b74ab53c88970ca4..8a7c49871bc0e3bed33de6fe75eaa3f207739c65 100644
--- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
+++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
@@ -50,6 +50,7 @@
 #include <utility>
 #include <iostream>
 #include <sstream>
+#include <thread>
 
 #include <Kokkos_Core.hpp>
 
@@ -66,7 +67,7 @@ namespace {
 
 ThreadsExec s_threads_process;
 ThreadsExec *s_threads_exec[ThreadsExec::MAX_THREAD_COUNT] = {nullptr};
-pthread_t s_threads_pid[ThreadsExec::MAX_THREAD_COUNT]     = {0};
+std::thread::id s_threads_pid[ThreadsExec::MAX_THREAD_COUNT];
 std::pair<unsigned, unsigned> s_threads_coord[ThreadsExec::MAX_THREAD_COUNT];
 
 int s_thread_pool_size[3] = {0, 0, 0};
@@ -164,7 +165,7 @@ ThreadsExec::ThreadsExec()
       m_pool_fan_size  = fan_size(m_pool_rank, m_pool_size);
       m_pool_state     = ThreadsExec::Active;
 
-      s_threads_pid[m_pool_rank] = pthread_self();
+      s_threads_pid[m_pool_rank] = std::this_thread::get_id();
 
       // Inform spawning process that the threads_exec entry has been set.
       s_threads_process.m_pool_state = ThreadsExec::Active;
@@ -178,7 +179,7 @@ ThreadsExec::ThreadsExec()
     m_pool_size  = 1;
     m_pool_state = ThreadsExec::Inactive;
 
-    s_threads_pid[m_pool_rank] = pthread_self();
+    s_threads_pid[m_pool_rank] = std::this_thread::get_id();
   }
 }
 
@@ -398,38 +399,6 @@ bool ThreadsExec::wake() {
 
 //----------------------------------------------------------------------------
 
-void ThreadsExec::execute_serial(void (*func)(ThreadsExec &, const void *)) {
-  s_current_function     = func;
-  s_current_function_arg = &s_threads_process;
-
-  // Make sure function and arguments are written before activating threads.
-  memory_fence();
-
-  const unsigned begin = s_threads_process.m_pool_base ? 1 : 0;
-
-  for (unsigned i = s_thread_pool_size[0]; begin < i;) {
-    ThreadsExec &th = *s_threads_exec[--i];
-
-    th.m_pool_state = ThreadsExec::Active;
-
-    wait_yield(th.m_pool_state, ThreadsExec::Active);
-  }
-
-  if (s_threads_process.m_pool_base) {
-    s_threads_process.m_pool_state = ThreadsExec::Active;
-    (*func)(s_threads_process, nullptr);
-    s_threads_process.m_pool_state = ThreadsExec::Inactive;
-  }
-
-  s_current_function_arg = nullptr;
-  s_current_function     = nullptr;
-
-  // Make sure function and arguments are cleared before proceeding.
-  memory_fence();
-}
-
-//----------------------------------------------------------------------------
-
 void *ThreadsExec::root_reduce_scratch() {
   return s_threads_process.reduce_memory();
 }
@@ -492,7 +461,7 @@ void *ThreadsExec::resize_scratch(size_t reduce_size, size_t thread_size) {
     s_threads_process.m_scratch_reduce_end = reduce_size;
     s_threads_process.m_scratch_thread_end = reduce_size + thread_size;
 
-    execute_serial(&execute_resize_scratch);
+    execute_resize_scratch(s_threads_process, nullptr);
 
     s_threads_process.m_scratch = s_threads_exec[0]->m_scratch;
   }
@@ -637,9 +606,8 @@ void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count,
       // Wait until spawned thread has attempted to initialize.
       // If spawning and initialization is successful then
       // an entry in 's_threads_exec' will be assigned.
-      if (ThreadsExec::spawn()) {
-        wait_yield(s_threads_process.m_pool_state, ThreadsExec::Inactive);
-      }
+      ThreadsExec::spawn();
+      wait_yield(s_threads_process.m_pool_state, ThreadsExec::Inactive);
       if (s_threads_process.m_pool_state == ThreadsExec::Terminating) break;
     }
 
@@ -681,7 +649,8 @@ void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count,
         s_threads_process.m_pool_size     = thread_count;
         s_threads_process.m_pool_fan_size = fan_size(
             s_threads_process.m_pool_rank, s_threads_process.m_pool_size);
-        s_threads_pid[s_threads_process.m_pool_rank] = pthread_self();
+        s_threads_pid[s_threads_process.m_pool_rank] =
+            std::this_thread::get_id();
       } else {
         s_threads_process.m_pool_base     = nullptr;
         s_threads_process.m_pool_rank     = 0;
@@ -755,7 +724,7 @@ void ThreadsExec::finalize() {
       s_threads_process.m_pool_state = ThreadsExec::Inactive;
     }
 
-    s_threads_pid[i] = 0;
+    s_threads_pid[i] = std::thread::id();
   }
 
   if (s_threads_process.m_pool_base) {
@@ -806,20 +775,18 @@ Threads &Threads::impl_instance(int) {
   return t;
 }
 
-int Threads::impl_thread_pool_size(int depth) {
-  return Impl::s_thread_pool_size[depth];
-}
-
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-int Threads::impl_thread_pool_rank() {
-  const pthread_t pid = pthread_self();
-  int i               = 0;
+int Threads::impl_thread_pool_rank_host() {
+  const std::thread::id pid = std::this_thread::get_id();
+  int i                     = 0;
   while ((i < Impl::s_thread_pool_size[0]) && (pid != Impl::s_threads_pid[i])) {
     ++i;
   }
   return i;
 }
-#endif
+
+int Threads::impl_thread_pool_size(int depth) {
+  return Impl::s_thread_pool_size[depth];
+}
 
 const char *Threads::name() { return "Threads"; }
 
@@ -843,10 +810,11 @@ void ThreadsSpaceInitializer::initialize(const InitArguments &args) {
     } else {
       Kokkos::Threads::impl_initialize();
     }
-    // std::cout << "Kokkos::initialize() fyi: Pthread enabled and initialized"
+    // std::cout << "Kokkos::initialize() fyi: CppThread enabled and
+    // initialized"
     // << std::endl ;
   } else {
-    // std::cout << "Kokkos::initialize() fyi: Pthread enabled but not
+    // std::cout << "Kokkos::initialize() fyi: CppThread enabled but not
     // initialized" << std::endl ;
   }
 }
diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp
index 4d9a72a03467977ed21867a90a84563c7254bba7..561b1ce292eb387a45891ef2de8c678a3bdda2e1 100644
--- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp
+++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp
@@ -121,7 +121,7 @@ class ThreadsExec {
 
   static void global_lock();
   static void global_unlock();
-  static bool spawn();
+  static void spawn();
 
   static void execute_resize_scratch(ThreadsExec &, const void *);
   static void execute_sleep(ThreadsExec &, const void *);
@@ -129,8 +129,6 @@ class ThreadsExec {
   ThreadsExec(const ThreadsExec &);
   ThreadsExec &operator=(const ThreadsExec &);
 
-  static void execute_serial(void (*)(ThreadsExec &, const void *));
-
  public:
   KOKKOS_INLINE_FUNCTION int pool_size() const { return m_pool_size; }
   KOKKOS_INLINE_FUNCTION int pool_rank() const { return m_pool_rank; }
@@ -693,36 +691,33 @@ class UniqueToken<Threads, UniqueTokenScope::Instance> {
   /// \brief acquire value such that 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   int acquire() const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    if (m_buffer == nullptr) {
-      return Threads::impl_thread_pool_rank();
-    } else {
-      const ::Kokkos::pair<int, int> result =
-          ::Kokkos::Impl::concurrent_bitset::acquire_bounded(
-              m_buffer, m_count, ::Kokkos::Impl::clock_tic() % m_count);
-
-      if (result.first < 0) {
-        ::Kokkos::abort(
-            "UniqueToken<Threads> failure to acquire tokens, no tokens "
-            "available");
-      }
-      return result.first;
-    }
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST((
+        if (m_buffer == nullptr) {
+          return Threads::impl_thread_pool_rank();
+        } else {
+          const ::Kokkos::pair<int, int> result =
+              ::Kokkos::Impl::concurrent_bitset::acquire_bounded(
+                  m_buffer, m_count, ::Kokkos::Impl::clock_tic() % m_count);
+
+          if (result.first < 0) {
+            ::Kokkos::abort(
+                "UniqueToken<Threads> failure to acquire tokens, no tokens "
+                "available");
+          }
+          return result.first;
+        }))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
   /// \brief release a value acquired by generate
   KOKKOS_INLINE_FUNCTION
   void release(int i) const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    if (m_buffer != nullptr) {
+    KOKKOS_IF_ON_HOST((if (m_buffer != nullptr) {
       ::Kokkos::Impl::concurrent_bitset::release(m_buffer, i);
-    }
-#else
-    (void)i;
-#endif
+    }))
+
+    KOKKOS_IF_ON_DEVICE(((void)i;))
   }
 };
 
@@ -740,21 +735,17 @@ class UniqueToken<Threads, UniqueTokenScope::Global> {
   /// \brief upper bound for acquired values, i.e. 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   int size() const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return Threads::impl_thread_pool_size();
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST((return Threads::impl_thread_pool_size();))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
   /// \brief acquire value such that 0 <= value < size()
   KOKKOS_INLINE_FUNCTION
   int acquire() const noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return Threads::impl_thread_pool_rank();
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST((return Threads::impl_thread_pool_rank();))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
   /// \brief release a value acquired by generate
diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp
index e4eaeac78163efe48a2ddbd6d39920900b035c29..bd29a0b18ae3951c30d54b1a897d42cff0805397 100644
--- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp
+++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp
@@ -46,11 +46,6 @@
 #if defined(KOKKOS_ENABLE_THREADS)
 
 #include <Kokkos_Core_fwd.hpp>
-/* Standard 'C' Linux libraries */
-
-#include <pthread.h>
-#include <sched.h>
-#include <errno.h>
 
 /* Standard C++ libraries */
 
@@ -58,6 +53,8 @@
 #include <string>
 #include <iostream>
 #include <stdexcept>
+#include <thread>
+#include <mutex>
 
 #include <Kokkos_Threads.hpp>
 
@@ -67,14 +64,14 @@ namespace Kokkos {
 namespace Impl {
 namespace {
 
-pthread_mutex_t host_internal_pthread_mutex = PTHREAD_MUTEX_INITIALIZER;
+std::mutex host_internal_cppthread_mutex;
 
-// Pthreads compatible driver.
+// std::thread compatible driver.
 // Recovery from an exception would require constant intra-thread health
 // verification; which would negatively impact runtime.  As such simply
 // abort the process.
 
-void* internal_pthread_driver(void*) {
+void internal_cppthread_driver() {
   try {
     ThreadsExec::driver();
   } catch (const std::exception& x) {
@@ -87,7 +84,6 @@ void* internal_pthread_driver(void*) {
     std::cerr.flush();
     std::abort();
   }
-  return nullptr;
 }
 
 }  // namespace
@@ -95,141 +91,28 @@ void* internal_pthread_driver(void*) {
 //----------------------------------------------------------------------------
 // Spawn a thread
 
-bool ThreadsExec::spawn() {
-  bool result = false;
-
-  pthread_attr_t attr;
-
-  if (0 == pthread_attr_init(&attr) &&
-      0 == pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) &&
-      0 == pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
-    pthread_t pt;
-
-    result = 0 == pthread_create(&pt, &attr, internal_pthread_driver, nullptr);
-  }
-
-  pthread_attr_destroy(&attr);
-
-  return result;
+void ThreadsExec::spawn() {
+  std::thread t(internal_cppthread_driver);
+  t.detach();
 }
 
 //----------------------------------------------------------------------------
 
 bool ThreadsExec::is_process() {
-  static const pthread_t master_pid = pthread_self();
+  static const std::thread::id master_pid = std::this_thread::get_id();
 
-  return pthread_equal(master_pid, pthread_self());
+  return master_pid == std::this_thread::get_id();
 }
 
-void ThreadsExec::global_lock() {
-  pthread_mutex_lock(&host_internal_pthread_mutex);
-}
+void ThreadsExec::global_lock() { host_internal_cppthread_mutex.lock(); }
 
-void ThreadsExec::global_unlock() {
-  pthread_mutex_unlock(&host_internal_pthread_mutex);
-}
+void ThreadsExec::global_unlock() { host_internal_cppthread_mutex.unlock(); }
 
 //----------------------------------------------------------------------------
 
 void ThreadsExec::wait_yield(volatile int& flag, const int value) {
   while (value == flag) {
-    sched_yield();
-  }
-}
-
-}  // namespace Impl
-}  // namespace Kokkos
-
-/* end #if defined( KOKKOS_ENABLE_THREADS ) */
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#elif defined(KOKKOS_ENABLE_WINTHREAD)
-
-#include <Kokkos_Core_fwd.hpp>
-
-/* Windows libraries */
-#include <winsock2.h>
-#include <windows.h>
-#include <process.h>
-
-/* Standard C++ libraries */
-
-#include <cstdlib>
-#include <string>
-#include <iostream>
-#include <stdexcept>
-
-#include <Kokkos_Threads.hpp>
-
-//----------------------------------------------------------------------------
-// Driver for each created pthread
-
-namespace Kokkos {
-namespace Impl {
-namespace {
-
-unsigned WINAPI internal_winthread_driver(void* arg) {
-  ThreadsExec::driver();
-
-  return 0;
-}
-
-class ThreadLockWindows {
- private:
-  CRITICAL_SECTION m_handle;
-
-  ~ThreadLockWindows() { DeleteCriticalSection(&m_handle); }
-
-  ThreadLockWindows();
-  { InitializeCriticalSection(&m_handle); }
-
-  ThreadLockWindows(const ThreadLockWindows&);
-  ThreadLockWindows& operator=(const ThreadLockWindows&);
-
- public:
-  static ThreadLockWindows& singleton();
-
-  void lock() { EnterCriticalSection(&m_handle); }
-
-  void unlock() { LeaveCriticalSection(&m_handle); }
-};
-
-ThreadLockWindows& ThreadLockWindows::singleton() {
-  static ThreadLockWindows self;
-  return self;
-}
-
-}  // namespace
-}  // namespace Impl
-}  // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-// Spawn this thread
-
-bool ThreadsExec::spawn() {
-  unsigned Win32ThreadID = 0;
-
-  HANDLE handle =
-      _beginthreadex(0, 0, internal_winthread_driver, 0, 0, &Win32ThreadID);
-
-  return !handle;
-}
-
-bool ThreadsExec::is_process() { return true; }
-
-void ThreadsExec::global_lock() { ThreadLockWindows::singleton().lock(); }
-
-void ThreadsExec::global_unlock() { ThreadLockWindows::singleton().unlock(); }
-
-void ThreadsExec::wait_yield(volatile int& flag, const int value){} {
-  while (value == flag) {
-    Sleep(0);
+    std::this_thread::yield();
   }
 }
 
@@ -238,4 +121,4 @@ void ThreadsExec::wait_yield(volatile int& flag, const int value){} {
 
 #else
 void KOKKOS_CORE_SRC_THREADS_EXEC_BASE_PREVENT_LINK_ERROR() {}
-#endif /* end #elif defined( KOKKOS_ENABLE_WINTHREAD ) */
+#endif /* end #if defined( KOKKOS_ENABLE_THREADS ) */
diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
index e0ae43dd87ec337d24f659e3da74a662f31dfb84..36d6a25b0ddfa6fae9c579485627965180e62daf 100644
--- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
+++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
@@ -168,90 +168,76 @@ class ThreadsExecTeamMember {
   template <class ValueType>
   KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& value,
                                              const int& thread_id) const {
-#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    {
-      (void)value;
-      (void)thread_id;
-    }
-#else
-    // Make sure there is enough scratch space:
-    using type = typename if_c<sizeof(ValueType) < TEAM_REDUCE_SIZE, ValueType,
-                               void>::type;
-
-    if (m_team_base) {
-      type* const local_value = ((type*)m_team_base[0]->scratch_memory());
-      memory_fence();
-      team_barrier();
-      if (team_rank() == thread_id) *local_value = value;
-      memory_fence();
-      team_barrier();
-      value = *local_value;
-    }
-#endif
+    KOKKOS_IF_ON_DEVICE(((void)value; (void)thread_id;))
+
+    KOKKOS_IF_ON_HOST((
+        // Make sure there is enough scratch space:
+        using type = typename if_c<sizeof(ValueType) < TEAM_REDUCE_SIZE,
+                                   ValueType, void>::type;
+
+        if (m_team_base) {
+          type* const local_value = ((type*)m_team_base[0]->scratch_memory());
+          memory_fence();
+          team_barrier();
+          if (team_rank() == thread_id) *local_value = value;
+          memory_fence();
+          team_barrier();
+          value = *local_value;
+        }))
   }
 
   template <class Closure, class ValueType>
   KOKKOS_INLINE_FUNCTION void team_broadcast(Closure const& f, ValueType& value,
                                              const int& thread_id) const {
-#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    {
-      (void)f;
-      (void)value;
-      (void)thread_id;
-    }
-#else
-    // Make sure there is enough scratch space:
-    using type = typename if_c<sizeof(ValueType) < TEAM_REDUCE_SIZE, ValueType,
-                               void>::type;
-    f(value);
-    if (m_team_base) {
-      type* const local_value = ((type*)m_team_base[0]->scratch_memory());
-      memory_fence();
-      team_barrier();
-      if (team_rank() == thread_id) *local_value = value;
-      memory_fence();
-      team_barrier();
-      value = *local_value;
-    }
-#endif
+    KOKKOS_IF_ON_DEVICE(((void)f; (void)value; (void)thread_id;))
+
+    KOKKOS_IF_ON_HOST((
+        // Make sure there is enough scratch space:
+        using type = typename if_c<sizeof(ValueType) < TEAM_REDUCE_SIZE,
+                                   ValueType, void>::type;
+        f(value); if (m_team_base) {
+          type* const local_value = ((type*)m_team_base[0]->scratch_memory());
+          memory_fence();
+          team_barrier();
+          if (team_rank() == thread_id) *local_value = value;
+          memory_fence();
+          team_barrier();
+          value = *local_value;
+        }))
   }
 
   template <typename Type>
   KOKKOS_INLINE_FUNCTION
       typename std::enable_if<!Kokkos::is_reducer<Type>::value, Type>::type
-      team_reduce(const Type& value) const
-#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  {
-    return value;
-  }
-#else
-  {
-    // Make sure there is enough scratch space:
-    using type =
-        typename if_c<sizeof(Type) < TEAM_REDUCE_SIZE, Type, void>::type;
+      team_reduce(const Type& value) const {
+    KOKKOS_IF_ON_DEVICE((return value;))
 
-    if (nullptr == m_exec) return value;
+    KOKKOS_IF_ON_HOST((
+        // Make sure there is enough scratch space:
+        using type =
+            typename if_c<sizeof(Type) < TEAM_REDUCE_SIZE, Type, void>::type;
 
-    if (team_rank() != team_size() - 1)
-      *((volatile type*)m_exec->scratch_memory()) = value;
+        if (nullptr == m_exec) return value;
 
-    memory_fence();
+        if (team_rank() != team_size() - 1) *
+            ((volatile type*)m_exec->scratch_memory()) = value;
 
-    type& accum = *((type*)m_team_base[0]->scratch_memory());
+        memory_fence();
 
-    if (team_fan_in()) {
-      accum = value;
-      for (int i = 1; i < m_team_size; ++i) {
-        accum += *((type*)m_team_base[i]->scratch_memory());
-      }
-      memory_fence();
-    }
+        type& accum = *((type*)m_team_base[0]->scratch_memory());
 
-    team_fan_out();
+        if (team_fan_in()) {
+          accum = value;
+          for (int i = 1; i < m_team_size; ++i) {
+            accum += *((type*)m_team_base[i]->scratch_memory());
+          }
+          memory_fence();
+        }
 
-    return accum;
+        team_fan_out();
+
+        return accum;))
   }
-#endif
 
   template <typename ReducerType>
   KOKKOS_INLINE_FUNCTION
@@ -263,55 +249,53 @@ class ThreadsExecTeamMember {
   template <typename ReducerType>
   KOKKOS_INLINE_FUNCTION
       typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type
-#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-      team_reduce(const ReducerType&,
-                  const typename ReducerType::value_type) const {
-  }
-#else
       team_reduce(const ReducerType& reducer,
                   const typename ReducerType::value_type contribution) const {
-    using value_type = typename ReducerType::value_type;
-    // Make sure there is enough scratch space:
-    using type = typename if_c<sizeof(value_type) < TEAM_REDUCE_SIZE,
-                               value_type, void>::type;
+    KOKKOS_IF_ON_DEVICE(((void)reducer; (void)contribution;))
 
-    if (nullptr == m_exec) return;
+    KOKKOS_IF_ON_HOST(
+        (using value_type = typename ReducerType::value_type;
+         // Make sure there is enough scratch space:
+         using type = typename if_c<sizeof(value_type) < TEAM_REDUCE_SIZE,
+                                    value_type, void>::type;
 
-    type* const local_value = ((type*)m_exec->scratch_memory());
+         if (nullptr == m_exec) return;
 
-    // Set this thread's contribution
-    if (team_rank() != team_size() - 1) *local_value = contribution;
+         type* const local_value = ((type*)m_exec->scratch_memory());
 
-    // Fence to make sure the base team member has access:
-    memory_fence();
+         // Set this thread's contribution
+         if (team_rank() != team_size() - 1)* local_value = contribution;
 
-    if (team_fan_in()) {
-      // The last thread to synchronize returns true, all other threads wait for
-      // team_fan_out()
-      type* const team_value = ((type*)m_team_base[0]->scratch_memory());
+         // Fence to make sure the base team member has access:
+         memory_fence();
 
-      *team_value = contribution;
-      // Join to the team value:
-      for (int i = 1; i < m_team_size; ++i) {
-        reducer.join(*team_value, *((type*)m_team_base[i]->scratch_memory()));
-      }
+         if (team_fan_in()) {
+           // The last thread to synchronize returns true, all other threads
+           // wait for team_fan_out()
+           type* const team_value = ((type*)m_team_base[0]->scratch_memory());
 
-      // Team base thread may "lap" member threads so copy out to their local
-      // value.
-      for (int i = 1; i < m_team_size; ++i) {
-        *((type*)m_team_base[i]->scratch_memory()) = *team_value;
-      }
+           *team_value = contribution;
+           // Join to the team value:
+           for (int i = 1; i < m_team_size; ++i) {
+             reducer.join(*team_value,
+                          *((type*)m_team_base[i]->scratch_memory()));
+           }
 
-      // Fence to make sure all team members have access
-      memory_fence();
-    }
+           // Team base thread may "lap" member threads so copy out to their
+           // local value.
+           for (int i = 1; i < m_team_size; ++i) {
+             *((type*)m_team_base[i]->scratch_memory()) = *team_value;
+           }
 
-    team_fan_out();
+           // Fence to make sure all team members have access
+           memory_fence();
+         }
 
-    // Value was changed by the team base
-    reducer.reference() = *((type volatile const*)local_value);
+         team_fan_out();
+
+         // Value was changed by the team base
+         reducer.reference() = *((type volatile const*)local_value);))
   }
-#endif
 
   /** \brief  Intra-team exclusive prefix sum with team_rank() ordering
    *          with intra-team non-deterministic ordering accumulation.
@@ -324,59 +308,54 @@ class ThreadsExecTeamMember {
    */
   template <typename ArgType>
   KOKKOS_INLINE_FUNCTION ArgType team_scan(const ArgType& value,
-                                           ArgType* const global_accum) const
-#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  {
-    (void)global_accum;
-    return value;
-  }
-#else
-  {
-    // Make sure there is enough scratch space:
-    using type =
-        typename if_c<sizeof(ArgType) < TEAM_REDUCE_SIZE, ArgType, void>::type;
+                                           ArgType* const global_accum) const {
+    KOKKOS_IF_ON_DEVICE(((void)global_accum; return value;))
 
-    if (nullptr == m_exec) return type(0);
+    KOKKOS_IF_ON_HOST((  // Make sure there is enough scratch space:
+        using type = typename if_c<sizeof(ArgType) < TEAM_REDUCE_SIZE, ArgType,
+                                   void>::type;
 
-    volatile type* const work_value = ((type*)m_exec->scratch_memory());
+        if (nullptr == m_exec) return type(0);
 
-    *work_value = value;
+        volatile type* const work_value = ((type*)m_exec->scratch_memory());
 
-    memory_fence();
+        *work_value = value;
 
-    if (team_fan_in()) {
-      // The last thread to synchronize returns true, all other threads wait for
-      // team_fan_out() m_team_base[0]                 == highest ranking team
-      // member m_team_base[ m_team_size - 1 ] == lowest ranking team member
-      //
-      // 1) copy from lower to higher rank, initialize lowest rank to zero
-      // 2) prefix sum from lowest to highest rank, skipping lowest rank
+        memory_fence();
 
-      type accum = 0;
+        if (team_fan_in()) {
+          // The last thread to synchronize returns true, all other threads wait
+          // for team_fan_out() m_team_base[0]                 == highest
+          // ranking team member m_team_base[ m_team_size - 1 ] == lowest
+          // ranking team member
+          //
+          // 1) copy from lower to higher rank, initialize lowest rank to zero
+          // 2) prefix sum from lowest to highest rank, skipping lowest rank
 
-      if (global_accum) {
-        for (int i = m_team_size; i--;) {
-          type& val = *((type*)m_team_base[i]->scratch_memory());
-          accum += val;
-        }
-        accum = atomic_fetch_add(global_accum, accum);
-      }
+          type accum = 0;
 
-      for (int i = m_team_size; i--;) {
-        type& val = *((type*)m_team_base[i]->scratch_memory());
-        const type offset = accum;
-        accum += val;
-        val = offset;
-      }
+          if (global_accum) {
+            for (int i = m_team_size; i--;) {
+              type& val = *((type*)m_team_base[i]->scratch_memory());
+              accum += val;
+            }
+            accum = atomic_fetch_add(global_accum, accum);
+          }
 
-      memory_fence();
-    }
+          for (int i = m_team_size; i--;) {
+            type& val         = *((type*)m_team_base[i]->scratch_memory());
+            const type offset = accum;
+            accum += val;
+            val = offset;
+          }
 
-    team_fan_out();
+          memory_fence();
+        }
 
-    return *work_value;
+        team_fan_out();
+
+        return *work_value;))
   }
-#endif
 
   /** \brief  Intra-team exclusive prefix sum with team_rank() ordering.
    *
diff --git a/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp
index c08615188f68be6de52a2c66e9c717fffb012606..88dc670fa43ff5a1e969857808bd38cafd1544c4 100644
--- a/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp
+++ b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp
@@ -253,6 +253,16 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
       : m_functor(arg_functor),
         m_mdr_policy(arg_policy),
         m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {}
+
+  template <typename Policy, typename Functor>
+  static int max_tile_size_product(const Policy &, const Functor &) {
+    /**
+     * 1024 here is just our guess for a reasonable max tile size,
+     * it isn't a hardware constraint. If people see a use for larger
+     * tile size products, we're happy to change this.
+     */
+    return 1024;
+  }
 };
 
 //----------------------------------------------------------------------------
@@ -693,6 +703,16 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
       , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace"
       );*/
   }
+
+  template <typename Policy, typename Functor>
+  static int max_tile_size_product(const Policy &, const Functor &) {
+    /**
+     * 1024 here is just our guess for a reasonable max tile size,
+     * it isn't a hardware constraint. If people see a use for larger
+     * tile size products, we're happy to change this.
+     */
+    return 1024;
+  }
 };
 
 //----------------------------------------------------------------------------
diff --git a/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp b/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp
index df09e9e7215310e26d72009cc32f7e5339dfdc5b..2a06cb65e56cc800867d88663ac7e1ad9e58d801 100644
--- a/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp
+++ b/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp
@@ -47,6 +47,8 @@
 
 #if defined(KOKKOS_ENABLE_CUDA)
 #include <Kokkos_Cuda.hpp>
+#include <Cuda/Kokkos_Cuda_Half_Impl_Type.hpp>
+#include <Cuda/Kokkos_Cuda_Half_Conversion.hpp>
 #include <Cuda/Kokkos_Cuda_Parallel.hpp>
 #include <Cuda/Kokkos_Cuda_KernelLaunch.hpp>
 #include <Cuda/Kokkos_Cuda_Instance.hpp>
diff --git a/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp b/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp
index 92cd85bcae8b9e8c65d37b9308033a0748c8d3aa..72a26b27f4e70dfa42a2449252932b255a762ed0 100644
--- a/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp
+++ b/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp
@@ -47,6 +47,8 @@
 
 #if defined(KOKKOS_ENABLE_SYCL)
 #include <Kokkos_SYCL.hpp>
+#include <SYCL/Kokkos_SYCL_Half_Impl_Type.hpp>
+#include <SYCL/Kokkos_SYCL_Half_Conversion.hpp>
 #include <SYCL/Kokkos_SYCL_DeepCopy.hpp>
 #include <SYCL/Kokkos_SYCL_MDRangePolicy.hpp>
 #include <SYCL/Kokkos_SYCL_Parallel_Range.hpp>
diff --git a/packages/kokkos/core/src/desul/.clang-format b/packages/kokkos/core/src/desul/.clang-format
index 9d159247d518108410702980b90b13c2cfb4b84f..7968b43c1d6548729fd65c4308046f0276b08bd5 100644
--- a/packages/kokkos/core/src/desul/.clang-format
+++ b/packages/kokkos/core/src/desul/.clang-format
@@ -1,2 +1,3 @@
 DisableFormat: true
 SortIncludes: false
+
diff --git a/packages/kokkos/core/src/desul/atomics/CUDA.hpp b/packages/kokkos/core/src/desul/atomics/CUDA.hpp
index 32873a59776b07dea770c193e0034c1e82387246..be308a23228e3054b0d9d038c076657a2a478491 100644
--- a/packages/kokkos/core/src/desul/atomics/CUDA.hpp
+++ b/packages/kokkos/core/src/desul/atomics/CUDA.hpp
@@ -9,20 +9,17 @@ SPDX-License-Identifier: (BSD-3-Clause)
 #define DESUL_ATOMICS_CUDA_HPP_
 
 #ifdef DESUL_HAVE_CUDA_ATOMICS
-// When building with clang we need to include the device functions always
-// since clang must see a consistent overload set in both device and host compilation
-// but that means we need to know on the host what to make visible, i.e. we need
-// a host side compile knowledge of architecture.
-// We simply can say DESUL proper doesn't support clang CUDA build pre Volta,
-// Kokkos has that knowledge and so I use it here, allowing in Kokkos to use
-// clang with pre Volta as CUDA compiler
-#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__>=700)) || \
-    (!defined(__NVCC__) && !defined(KOKKOS_ARCH_KEPLER) && !defined(KOKKOS_ARCH_MAXWELL) && !defined(KOKKOS_ARCH_PASCAL))
+// When building with Clang we need to include the device functions always since Clang
+// must see a consistent overload set in both device and host compilation, but that
+// means we need to know on the host what to make visible, i.e. we need a host side
+// compile knowledge of architecture.
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 700)) || \
+    (!defined(__NVCC__) && !defined(DESUL_CUDA_ARCH_IS_PRE_VOLTA))
 #define DESUL_HAVE_CUDA_ATOMICS_ASM
 #include <desul/atomics/cuda/CUDA_asm.hpp>
 #endif
 
-#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__<700)) || \
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700)) || \
     (!defined(__NVCC__) && !defined(DESUL_HAVE_CUDA_ATOMICS_ASM))
 namespace desul {
 namespace Impl {
@@ -75,7 +72,7 @@ atomic_fetch_add(T* dest, T val, MemoryOrder, MemoryScopeCore) {
 }
 
 
-// Atomic Sub
+// Atomic Sub 
 template<class T>
 __device__ inline
 typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value,T>::type
@@ -100,49 +97,111 @@ atomic_fetch_sub(T* dest, T val, MemoryOrder, MemoryScopeCore) {
   return atomic_fetch_sub(dest,val,MemoryOrder(),MemoryScopeDevice());
 }
 
+// Wrap around atomic add
+__device__ inline unsigned int atomic_fetch_inc_mod(unsigned int* dest,
+                                                    unsigned int val,
+                                                    MemoryOrderRelaxed,
+                                                    MemoryScopeDevice) {
+  return atomicInc(dest, val);
+}
+
+template <typename MemoryOrder>
+__device__ inline unsigned int atomic_fetch_inc_mod(unsigned int* dest,
+                                                    unsigned int val,
+                                                    MemoryOrder,
+                                                    MemoryScopeDevice) {
+  __threadfence();
+  unsigned int return_val = atomicInc(dest, val);
+  __threadfence();
+  return return_val;
+}
+
+template <typename MemoryOrder>
+__device__ inline unsigned int atomic_fetch_inc_mod(unsigned int* dest,
+                                                    unsigned int val,
+                                                    MemoryOrder,
+                                                    MemoryScopeCore) {
+  return atomic_fetch_inc_mod(dest, val, MemoryOrder(), MemoryScopeDevice());
+}
+
+// Wrap around atomic sub
+__device__ inline unsigned int atomic_fetch_dec_mod(unsigned int* dest,
+                                                    unsigned int val,
+                                                    MemoryOrderRelaxed,
+                                                    MemoryScopeDevice) {
+  return atomicDec(dest, val);
+}
+
+template <typename MemoryOrder>
+__device__ inline unsigned int atomic_fetch_dec_mod(unsigned int* dest,
+                                                    unsigned int val,
+                                                    MemoryOrder,
+                                                    MemoryScopeDevice) {
+  __threadfence();
+  unsigned int return_val = atomicDec(dest, val);
+  __threadfence();
+  return return_val;
+}
+
+template <typename MemoryOrder>
+__device__ inline unsigned int atomic_fetch_dec_mod(unsigned int* dest,
+                                                    unsigned int val,
+                                                    MemoryOrder,
+                                                    MemoryScopeCore) {
+  return atomic_fetch_dec_mod(dest, val, MemoryOrder(), MemoryScopeDevice());
+}
+
 // Atomic Inc
+template <typename T>
 __device__ inline
-unsigned int atomic_fetch_inc(unsigned int* dest, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) {
-  return atomicInc(dest,val);
+    typename std::enable_if<Impl::is_cuda_atomic_add_type<T>::value, T>::type
+    atomic_fetch_inc(T* dest, MemoryOrderRelaxed, MemoryScopeDevice) {
+  return atomicAdd(dest, T(1));
 }
 
-template<class MemoryOrder>
+template <typename T, typename MemoryOrder>
 __device__ inline
-unsigned int atomic_fetch_inc(unsigned int* dest, unsigned int val, MemoryOrder, MemoryScopeDevice) {
+    typename std::enable_if<Impl::is_cuda_atomic_add_type<T>::value, T>::type
+    atomic_fetch_inc(T* dest, MemoryOrder, MemoryScopeDevice) {
   __threadfence();
-  unsigned int return_val = atomicInc(dest,val);
+  T return_val = atomicAdd(dest, T(1));
   __threadfence();
+
   return return_val;
 }
 
-template<class MemoryOrder>
+template <typename T, typename MemoryOrder>
 __device__ inline
-unsigned int atomic_fetch_inc(unsigned int* dest, unsigned int val, MemoryOrder, MemoryScopeCore) {
-  return atomic_fetch_inc(dest,val,MemoryOrder(),MemoryScopeDevice());
+    typename std::enable_if<Impl::is_cuda_atomic_add_type<T>::value, T>::type
+    atomic_fetch_inc(T* dest, MemoryOrder, MemoryScopeCore) {
+  return atomic_fetch_add(dest, T(1), MemoryOrder(), MemoryScopeDevice());
 }
 
-// Atomic Inc
+// Atomic Dec
+template <typename T>
 __device__ inline
-unsigned int atomic_fetch_dec(unsigned int* dest, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) {
-  return atomicDec(dest,val);
+    typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value, T>::type
+    atomic_fetch_dec(T* dest, MemoryOrderRelaxed, MemoryScopeDevice) {
+  return atomicSub(dest, T(1));
 }
 
-template<class MemoryOrder>
+template <typename T, typename MemoryOrder>
 __device__ inline
-unsigned int atomic_fetch_dec(unsigned int* dest, unsigned int val, MemoryOrder, MemoryScopeDevice) {
+    typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value, T>::type
+    atomic_fetch_dec(T* dest, MemoryOrder, MemoryScopeDevice) {
   __threadfence();
-  unsigned int return_val = atomicDec(dest,val);
+  T return_val = atomicSub(dest, T(1));
   __threadfence();
   return return_val;
 }
 
-template<class MemoryOrder>
+template <typename T, typename MemoryOrder>
 __device__ inline
-unsigned int atomic_fetch_dec(unsigned int* dest, unsigned int val, MemoryOrder, MemoryScopeCore) {
-  return atomic_fetch_dec(dest,val,MemoryOrder(),MemoryScopeDevice());
+    typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value, T>::type
+    atomic_fetch_dec(T* dest, MemoryOrder, MemoryScopeCore) {
+  return atomic_fetch_sub(dest, T(1), MemoryOrder(), MemoryScopeDevice());
 }
 
-
 // Atomic Max
 template<class T>
 __device__ inline
@@ -307,8 +366,36 @@ namespace desul {
     (void) atomic_fetch_dec(dest, order, scope); \
   }
   DESUL_IMPL_CUDA_HOST_ATOMIC_DEC(unsigned,MemoryOrderRelaxed,MemoryScopeDevice); // only for ASM?
+
 #endif // DESUL_HAVE_CUDA_ATOMICS_ASM
 
+#define DESUL_IMPL_CUDA_HOST_ATOMIC_INC_MOD(TYPE,ORDER,SCOPE) \
+  inline TYPE atomic_fetch_inc_mod(TYPE* dest, TYPE val, ORDER order, SCOPE scope) { \
+  using cas_t = typename Impl::atomic_compare_exchange_type<sizeof(TYPE)>::type; \
+  cas_t oldval = reinterpret_cast<cas_t&>(*dest); \
+  cas_t assume = oldval; \
+  do { \
+    assume = oldval; \
+    TYPE newval = (reinterpret_cast<TYPE&>(assume) >= val) ? static_cast<TYPE>(0) : reinterpret_cast<TYPE&>(assume) + static_cast<TYPE>(1); \
+    oldval = desul::atomic_compare_exchange(reinterpret_cast<cas_t*>(dest), assume, reinterpret_cast<cas_t&>(newval), order, scope); \
+  } while (assume != oldval); \
+  return reinterpret_cast<TYPE&>(oldval); \
+}
+DESUL_IMPL_CUDA_HOST_ATOMIC_INC_MOD(unsigned int,MemoryOrderRelaxed,MemoryScopeDevice);
+#define DESUL_IMPL_CUDA_HOST_ATOMIC_DEC_MOD(TYPE,ORDER,SCOPE) \
+    inline TYPE atomic_fetch_dec_mod(TYPE* dest, TYPE val, ORDER order, SCOPE scope) { \
+    using cas_t = typename Impl::atomic_compare_exchange_type<sizeof(TYPE)>::type; \
+    cas_t oldval = reinterpret_cast<cas_t&>(*dest); \
+    cas_t assume = oldval; \
+    do { \
+      assume = oldval; \
+      TYPE newval = ((reinterpret_cast<TYPE&>(assume) == static_cast<TYPE>(0)) | (reinterpret_cast<TYPE&>(assume) > val)) ? val : reinterpret_cast<TYPE&>(assume) - static_cast<TYPE>(1); \
+      oldval = desul::atomic_compare_exchange(reinterpret_cast<cas_t*>(dest), assume, reinterpret_cast<cas_t&>(newval), order, scope); \
+    } while (assume != oldval); \
+    return reinterpret_cast<TYPE&>(oldval); \
+  }
+  DESUL_IMPL_CUDA_HOST_ATOMIC_DEC_MOD(unsigned int,MemoryOrderRelaxed,MemoryScopeDevice);
+
   #define DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_ADD(TYPE,ORDER,SCOPE) \
     inline TYPE atomic_fetch_add(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \
       return Impl::atomic_fetch_oper(Impl::AddOper<TYPE, const TYPE>(),dest, val, order, scope); \
@@ -345,7 +432,7 @@ namespace desul {
 //  DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(unsigned long long,MemoryOrderRelaxed,MemoryScopeDevice);
 //  inline void atomic_fetch_max(int32_t* const dest, int32_t val, MemoryOrderRelaxed order, MemoryScopeDevice scope) {
 
-}
+}  // namespace desul
 
 // Functions defined int the GCC overload set but not in the device overload set
 namespace desul {
@@ -447,7 +534,8 @@ namespace desul {
   long atomic_and_fetch(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) {
     return Impl::atomic_oper_fetch(Impl::AndOper<long, const long>(), dest, val, order, scope);
   }
-}
+}  // namespace desul
 #endif
+
 #endif  // DESUL_HAVE_CUDA_ATOMICS
 #endif
diff --git a/packages/kokkos/core/src/desul/atomics/Common.hpp b/packages/kokkos/core/src/desul/atomics/Common.hpp
index f1dccc6c52318f58b6fb1ed792ed614a8351458c..1b8dc9f58e947042c5a20c2a3d5154d780bece78 100644
--- a/packages/kokkos/core/src/desul/atomics/Common.hpp
+++ b/packages/kokkos/core/src/desul/atomics/Common.hpp
@@ -52,6 +52,8 @@ struct MemoryScopeNode {};
 struct MemoryScopeDevice {};
 // Core scoped (i.e. a shared Level 1 cache)
 struct MemoryScopeCore {};
+// Caller scoped (i.e. NOT atomic!)
+struct MemoryScopeCaller {};
 }  // namespace desul
 
 #ifndef __ATOMIC_RELAXED
diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange.hpp b/packages/kokkos/core/src/desul/atomics/Compare_Exchange.hpp
index 7b8289d75b8e70a1097207418a5a0f435913cded..81c90db79e8b42159fab8e7e632d9315f2920475 100644
--- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange.hpp
+++ b/packages/kokkos/core/src/desul/atomics/Compare_Exchange.hpp
@@ -11,6 +11,8 @@ SPDX-License-Identifier: (BSD-3-Clause)
 
 #include "desul/atomics/Macros.hpp"
 
+#include "desul/atomics/Compare_Exchange_ScopeCaller.hpp"
+
 #ifdef DESUL_HAVE_GCC_ATOMICS
 #include "desul/atomics/Compare_Exchange_GCC.hpp"
 #endif
diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_CUDA.hpp b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_CUDA.hpp
index aab0d943eb659f9c0f860fef1293753bcf5c52be..abe566c7ba91bd2ce7ffb2e8db96bce43090f414 100644
--- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_CUDA.hpp
+++ b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_CUDA.hpp
@@ -60,8 +60,9 @@ __device__ inline void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeNode) {
 // Compare Exchange for PRE Volta, not supported with CLANG as CUDA compiler, since we do NOT have a way
 // of having the code included for clang only when the CC is smaller than 700
 // But on Clang the device side symbol list must be independent of __CUDA_ARCH__
+// FIXME temporary fix for https://github.com/kokkos/kokkos/issues/4390
 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700) || \
-(!defined(__NVCC__) && (defined(KOKKOS_ENABLE_KEPLER) || defined(KOKKOS_ENABLE_MAXWELL) || defined(KOKKOS_ENABLE_PASCAL)))
+(!defined(__NVCC__) && defined(DESUL_CUDA_ARCH_IS_PRE_VOLTA) && 0)
 namespace desul {
 template <typename T, class MemoryScope>
 __device__ typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_exchange(
@@ -162,7 +163,7 @@ __device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type at
 // Kokkos has that knowledge and so I use it here, allowing in Kokkos to use
 // clang with pre Volta as CUDA compiler
 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__>=700)) || \
-     (!defined(__NVCC__) && !defined(KOKKOS_ARCH_KEPLER) && !defined(KOKKOS_ARCH_MAXWELL) && !defined(KOKKOS_ARCH_PASCAL))
+     (!defined(__NVCC__) && !defined(DESUL_CUDA_ARCH_IS_PRE_VOLTA))
 #include <desul/atomics/cuda/CUDA_asm_exchange.hpp>
 #endif
 
diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_MSVC.hpp b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_MSVC.hpp
index c96cb031714f63b5039ade535077c7511838ffbd..edf72f13db8d2ea80af5cb35b64230e362290372 100644
--- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_MSVC.hpp
+++ b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_MSVC.hpp
@@ -1,4 +1,4 @@
-/* 
+/*
 Copyright (c) 2019, Lawrence Livermore National Security, LLC
 and DESUL project contributors. See the COPYRIGHT file for details.
 Source: https://github.com/desul/desul
@@ -8,8 +8,9 @@ SPDX-License-Identifier: (BSD-3-Clause)
 
 #ifndef DESUL_ATOMICS_COMPARE_EXCHANGE_MSVC_HPP_
 #define DESUL_ATOMICS_COMPARE_EXCHANGE_MSVC_HPP_
-#include "desul/atomics/Common.hpp"
 #include <type_traits>
+
+#include "desul/atomics/Common.hpp"
 #ifdef DESUL_HAVE_MSVC_ATOMICS
 
 #ifndef DESUL_HAVE_16BYTE_COMPARE_AND_SWAP
@@ -18,79 +19,111 @@ SPDX-License-Identifier: (BSD-3-Clause)
 
 namespace desul {
 
-template<class T, class MemoryOrder, class MemoryScope>
-T atomic_exchange(T* const, T val, MemoryOrder, MemoryScope) { return val;}
+// Forward declare these functions. They use compare_exchange themselves
+// so the actual header file with them comes after this file is included.
+namespace Impl {
+template <typename MemoryScope>
+inline bool lock_address(void* ptr, MemoryScope ms);
 
+template <typename MemoryScope>
+void unlock_address(void* ptr, MemoryScope ms);
+}  // namespace Impl
 
-template<class MemoryOrder, class MemoryScope>
+template <class MemoryOrder, class MemoryScope>
 void atomic_thread_fence(MemoryOrder, MemoryScope) {
   std::atomic_thread_fence(CXXMemoryOrder<MemoryOrder>::value);
 }
 
 template <typename T, class MemoryScope>
-typename std::enable_if<sizeof(T) == 1, T>::type atomic_exchange(
-    T* const dest, T val, MemoryOrderRelaxed, MemoryScope) {
-  char return_val =
-      _InterlockedExchange8((char*)dest, *((char*)&val));
+typename std::enable_if<sizeof(T) == 1, T>::type atomic_exchange(T* const dest,
+                                                                 T val,
+                                                                 MemoryOrderRelaxed,
+                                                                 MemoryScope) {
+  char return_val = _InterlockedExchange8((char*)dest, *((char*)&val));
   return *(reinterpret_cast<T*>(&return_val));
 }
 
 template <typename T, class MemoryScope>
-typename std::enable_if<sizeof(T) == 2, T>::type atomic_exchange(
-    T* const dest, T val, MemoryOrderRelaxed, MemoryScope) {
-  short return_val =
-      _InterlockedExchange16((short*)dest, *((short*)&val));
+typename std::enable_if<sizeof(T) == 2, T>::type atomic_exchange(T* const dest,
+                                                                 T val,
+                                                                 MemoryOrderRelaxed,
+                                                                 MemoryScope) {
+  short return_val = _InterlockedExchange16((short*)dest, *((short*)&val));
   return *(reinterpret_cast<T*>(&return_val));
 }
 
 template <typename T, class MemoryScope>
-typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(
-    T* const dest, T val, MemoryOrderRelaxed, MemoryScope) {
-  long return_val =
-      _InterlockedExchange((long*)dest, *((long*)&val));
+typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(T* const dest,
+                                                                 T val,
+                                                                 MemoryOrderRelaxed,
+                                                                 MemoryScope) {
+  long return_val = _InterlockedExchange((long*)dest, *((long*)&val));
   return *(reinterpret_cast<T*>(&return_val));
 }
 
 template <typename T, class MemoryScope>
-typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(
-    T* const dest, T val, MemoryOrderRelaxed, MemoryScope) {
-  __int64 return_val = _InterlockedExchange64(
-      (__int64*)dest, *((__int64*)&val));
+typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(T* const dest,
+                                                                 T val,
+                                                                 MemoryOrderRelaxed,
+                                                                 MemoryScope) {
+  __int64 return_val = _InterlockedExchange64((__int64*)dest, *((__int64*)&val));
   return *(reinterpret_cast<T*>(&return_val));
 }
 
 template <typename T, class MemoryScope>
-typename std::enable_if<sizeof(T) == 1, T>::type atomic_exchange(
-    T* const dest, T val, MemoryOrderSeqCst, MemoryScope) {
-  char return_val =
-      _InterlockedExchange8((char*)dest, *((char*)&val));
+typename std::enable_if<sizeof(T) == 1, T>::type atomic_exchange(T* const dest,
+                                                                 T val,
+                                                                 MemoryOrderSeqCst,
+                                                                 MemoryScope) {
+  char return_val = _InterlockedExchange8((char*)dest, *((char*)&val));
   return *(reinterpret_cast<T*>(&return_val));
 }
 
 template <typename T, class MemoryScope>
-typename std::enable_if<sizeof(T) == 2, T>::type atomic_exchange(
-    T* const dest, T val, MemoryOrderSeqCst, MemoryScope) {
-  short return_val =
-      _InterlockedExchange16((short*)dest, *((short*)&val));
+typename std::enable_if<sizeof(T) == 2, T>::type atomic_exchange(T* const dest,
+                                                                 T val,
+                                                                 MemoryOrderSeqCst,
+                                                                 MemoryScope) {
+  short return_val = _InterlockedExchange16((short*)dest, *((short*)&val));
   return *(reinterpret_cast<T*>(&return_val));
 }
 
 template <typename T, class MemoryScope>
-typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(
-    T* const dest, T val, MemoryOrderSeqCst, MemoryScope) {
-  long return_val =
-      _InterlockedExchange((long*)dest, *((long*)&val));
+typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(T* const dest,
+                                                                 T val,
+                                                                 MemoryOrderSeqCst,
+                                                                 MemoryScope) {
+  long return_val = _InterlockedExchange((long*)dest, *((long*)&val));
   return *(reinterpret_cast<T*>(&return_val));
 }
 
 template <typename T, class MemoryScope>
-typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(
-    T* const dest, T val, MemoryOrderSeqCst, MemoryScope) {
-  __int64 return_val = _InterlockedExchange64(
-      (__int64*)dest, *((__int64*)&val));
+typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(T* const dest,
+                                                                 T val,
+                                                                 MemoryOrderSeqCst,
+                                                                 MemoryScope) {
+  __int64 return_val = _InterlockedExchange64((__int64*)dest, *((__int64*)&val));
   return *(reinterpret_cast<T*>(&return_val));
 }
 
+template <typename T, class MemoryOrder, class MemoryScope>
+typename std::enable_if<(sizeof(T) != 1 && sizeof(T) != 2 && sizeof(T) != 4 &&
+                         sizeof(T) != 8),
+                        T>::type
+atomic_exchange(T* const dest, T val, MemoryOrder, MemoryScope scope) {
+  while (!Impl::lock_address((void*)dest, scope)) {
+  }
+  if (std::is_same<MemoryOrder, MemoryOrderSeqCst>::value)
+    atomic_thread_fence(MemoryOrderRelease(), scope);
+  atomic_thread_fence(MemoryOrderAcquire(), scope);
+  T return_val = *dest;
+  *dest = val;
+  atomic_thread_fence(MemoryOrderRelease(), scope);
+
+  Impl::unlock_address((void*)dest, scope);
+  return return_val;
+}
+
 template <typename T, class MemoryScope>
 typename std::enable_if<sizeof(T) == 1, T>::type atomic_compare_exchange(
     T* const dest, T compare, T val, MemoryOrderRelaxed, MemoryScope) {
@@ -177,18 +210,21 @@ typename std::enable_if<sizeof(T) == 16, T>::type atomic_compare_exchange(
   return compare;
 }
 
-
 template <typename T, class MemoryOrder, class MemoryScope>
-typename std::enable_if<(sizeof(T) != 1 && sizeof(T) != 4 && sizeof(T) != 8 && sizeof(T) != 16), T>::type atomic_compare_exchange(
-     T* const dest, T compare, T val, MemoryOrder, MemoryScope scope) {
-  while (!Impl::lock_address((void*)dest, scope)) {}
+typename std::enable_if<(sizeof(T) != 1 && sizeof(T) != 2 && sizeof(T) != 4 &&
+                         sizeof(T) != 8 && sizeof(T) != 16),
+                        T>::type
+atomic_compare_exchange(
+    T* const dest, T compare, T val, MemoryOrder, MemoryScope scope) {
+  while (!Impl::lock_address((void*)dest, scope)) {
+  }
   if (std::is_same<MemoryOrder, MemoryOrderSeqCst>::value)
-          atomic_thread_fence(MemoryOrderRelease(), scope);
-  atomic_thread_fence(MemoryOrderAcquire(),scope);
+    atomic_thread_fence(MemoryOrderRelease(), scope);
+  atomic_thread_fence(MemoryOrderAcquire(), scope);
   T return_val = *dest;
-  if(return_val == compare) {
+  if (return_val == compare) {
     *dest = val;
-    atomic_thread_fence(MemoryOrderRelease(),scope);
+    atomic_thread_fence(MemoryOrderRelease(), scope);
   }
 
   Impl::unlock_address((void*)dest, scope);
diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_OpenMP.hpp b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_OpenMP.hpp
index a1d1c9124991d01640ca70243e9033e4c528e6cf..ded401f52d265db6f712e81bad67095b901079f8 100644
--- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_OpenMP.hpp
+++ b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_OpenMP.hpp
@@ -11,22 +11,6 @@ SPDX-License-Identifier: (BSD-3-Clause)
 #include <cstdio>
 #include <omp.h>
 
-namespace desul
-{
-namespace Impl
-{
-static constexpr bool omp_on_host() { return true; }
-
-#pragma omp begin declare variant match(device = {kind(host)})
-static constexpr bool omp_on_host() { return true; }
-#pragma omp end declare variant
-
-#pragma omp begin declare variant match(device = {kind(nohost)})
-static constexpr bool omp_on_host() { return false; }
-#pragma omp end declare variant
-} // namespace Impl
-} // namespace desul
-
 #ifdef DESUL_HAVE_OPENMP_ATOMICS
 namespace desul {
 
@@ -114,8 +98,6 @@ std::enable_if_t<Impl::atomic_always_lock_free(sizeof(T)),T> atomic_compare_exch
      reinterpret_cast<cas_t&>(value));
   return reinterpret_cast<T&>(retval);
 }
-// Make 16 byte cas work on host at least (is_initial_device check, note this requires C++17)
-#if __cplusplus>=201703L
 
 #if defined(__clang__) && (__clang_major__>=7)
 // Disable warning for large atomics on clang 7 and up (checked with godbolt)
@@ -124,21 +106,33 @@ std::enable_if_t<Impl::atomic_always_lock_free(sizeof(T)),T> atomic_compare_exch
 #pragma GCC diagnostic ignored "-Watomic-alignment"
 #endif
 
+// Make 16 byte cas work on host at least
+#pragma omp begin declare variant match(device = {kind(host)})
 template <typename T, class MemoryOrder, class MemoryScope>
-std::enable_if_t<!Impl::atomic_always_lock_free(sizeof(T)) && (sizeof(T)==16),T> atomic_compare_exchange(
-    T* dest, T compare, T value, MemoryOrder, MemoryScope) {
-  if constexpr (desul::Impl::omp_on_host()) {
-    (void)__atomic_compare_exchange(
-      dest, &compare, &value, false, GCCMemoryOrder<MemoryOrder>::value, GCCMemoryOrder<MemoryOrder>::value);
-    return compare;
-  } else {
-    return value;
-  }
+std::enable_if_t<!Impl::atomic_always_lock_free(sizeof(T)) && (sizeof(T) == 16), T>
+atomic_compare_exchange(T* dest, T compare, T value, MemoryOrder, MemoryScope) {
+  (void)__atomic_compare_exchange(dest,
+                                  &compare,
+                                  &value,
+                                  false,
+                                  GCCMemoryOrder<MemoryOrder>::value,
+                                  GCCMemoryOrder<MemoryOrder>::value);
+  return compare;
 }
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {kind(nohost)})
+template <typename T, class MemoryOrder, class MemoryScope>
+std::enable_if_t<!Impl::atomic_always_lock_free(sizeof(T)) && (sizeof(T) == 16), T>
+atomic_compare_exchange(T* /*dest*/, T /*compare*/, T value, MemoryOrder, MemoryScope) {
+  // FIXME make sure this never gets called
+  return value;
+}
+#pragma omp end declare variant
+
 #if defined(__clang__) && (__clang_major__>=7)
 #pragma GCC diagnostic pop
 #endif
-#endif
 
 }  // namespace desul
 #endif
diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_SYCL.hpp b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_SYCL.hpp
index a8fd2ebbe2beef39e4cd8dff5797b722e8d17582..14e0ab4cff97e0060582a55bafb7bbffa50d78c7 100644
--- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_SYCL.hpp
+++ b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_SYCL.hpp
@@ -1,4 +1,4 @@
-/* 
+/*
 Copyright (c) 2019, Lawrence Livermore National Security, LLC
 and DESUL project contributors. See the COPYRIGHT file for details.
 Source: https://github.com/desul/desul
@@ -8,44 +8,163 @@ SPDX-License-Identifier: (BSD-3-Clause)
 
 #ifndef DESUL_ATOMICS_COMPARE_EXCHANGE_SYCL_HPP_
 #define DESUL_ATOMICS_COMPARE_EXCHANGE_SYCL_HPP_
-#include "desul/atomics/Common.hpp"
+
+// clang-format off
 #include "desul/atomics/SYCLConversions.hpp"
-#include <CL/sycl.hpp>
+#include "desul/atomics/Common.hpp"
 
+#include <CL/sycl.hpp>
+// clang-format on
 
 #ifdef DESUL_HAVE_SYCL_ATOMICS
 
 namespace desul {
 
-template<class MemoryOrder, class MemoryScope>
+template <class MemoryOrder, class MemoryScope>
 inline void atomic_thread_fence(MemoryOrder, MemoryScope) {
-  DESUL_SYCL_NAMESPACE::atomic_fence(DesulToSYCLMemoryOrder<MemoryOrder>::value,
-                                     DesulToSYCLMemoryScope<MemoryScope>::value);
+  sycl::atomic_fence(
+      Impl::DesulToSYCLMemoryOrder<MemoryOrder, /*extended namespace*/ false>::value,
+      Impl::DesulToSYCLMemoryScope<MemoryScope, /*extended namespace*/ false>::value);
 }
 
+// FIXME_SYCL We need to either use generic_space or figure out how to check for the
+// correct adress space in a SYCL-portable way.
+#ifndef __NVPTX__
 template <typename T, class MemoryOrder, class MemoryScope>
 typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_exchange(
     T* const dest, T compare, T value, MemoryOrder, MemoryScope) {
-  static_assert(sizeof(unsigned int) == 4, "this function assumes an unsigned int is 32-bit");
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    unsigned int, 
-    DesulToSYCLMemoryOrder<MemoryOrder>::value, 
-    DesulToSYCLMemoryScope<MemoryScope>::value, 
-    sycl::access::address_space::global_device_space> 
+  static_assert(sizeof(unsigned int) == 4,
+                "this function assumes an unsigned int is 32-bit");
+  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<unsigned int>(dest);
+  if (l) {
+    Impl::sycl_atomic_ref<unsigned int,
+                          MemoryOrder,
+                          MemoryScopeDevice,
+                          sycl::access::address_space::local_space>
+    dest_ref(*reinterpret_cast<unsigned int*>(dest));
+    dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned int*>(&compare),
+                                     *reinterpret_cast<unsigned int*>(&value));
+    return compare;
+  } else {
+    Impl::sycl_atomic_ref<unsigned int,
+                          MemoryOrder,
+                          MemoryScopeDevice,
+                          sycl::access::address_space::global_space>
+    dest_ref(*reinterpret_cast<unsigned int*>(dest));
+    dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned int*>(&compare),
+                                     *reinterpret_cast<unsigned int*>(&value));
+    return compare;
+  }
+}
+template <typename T, class MemoryOrder, class MemoryScope>
+typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_exchange(
+    T* const dest, T compare, T value, MemoryOrder, MemoryScope) {
+  static_assert(sizeof(unsigned long long int) == 8,
+                "this function assumes an unsigned long long is 64-bit");
+  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<unsigned long long int>(dest);
+  if (l) {
+    Impl::sycl_atomic_ref<unsigned long long int,
+                          MemoryOrder,
+                          MemoryScopeDevice,
+                          sycl::access::address_space::local_space>
+    dest_ref(*reinterpret_cast<unsigned long long int*>(dest));
+    dest_ref.compare_exchange_strong(
+        *reinterpret_cast<unsigned long long int*>(&compare),
+        *reinterpret_cast<unsigned long long int*>(&value));
+    return compare;
+  } else {
+    Impl::sycl_atomic_ref<unsigned long long int,
+                          MemoryOrder,
+                          MemoryScopeDevice,
+                          sycl::access::address_space::global_space>
+    dest_ref(*reinterpret_cast<unsigned long long int*>(dest));
+    dest_ref.compare_exchange_strong(
+        *reinterpret_cast<unsigned long long int*>(&compare),
+        *reinterpret_cast<unsigned long long int*>(&value));
+    return compare;
+  }
+}
+
+template <typename T, class MemoryOrder, class MemoryScope>
+typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(T* const dest,
+                                                                 T value,
+                                                                 MemoryOrder,
+                                                                 MemoryScope) {
+  static_assert(sizeof(unsigned int) == 4,
+                "this function assumes an unsigned int is 32-bit");
+  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<unsigned int>(dest);
+  if (l) {
+    Impl::sycl_atomic_ref<unsigned int,
+                          MemoryOrder,
+                          MemoryScopeDevice,
+                          sycl::access::address_space::local_space>
+    dest_ref(*reinterpret_cast<unsigned int*>(dest));
+    unsigned int return_val =
+        dest_ref.exchange(*reinterpret_cast<unsigned int*>(&value));
+    return reinterpret_cast<T&>(return_val);
+  } else {
+    Impl::sycl_atomic_ref<unsigned int,
+                          MemoryOrder,
+                          MemoryScopeDevice,
+                          sycl::access::address_space::global_space>
+    dest_ref(*reinterpret_cast<unsigned int*>(dest));
+    unsigned int return_val =
+        dest_ref.exchange(*reinterpret_cast<unsigned int*>(&value));
+    return reinterpret_cast<T&>(return_val);
+  }
+}
+template <typename T, class MemoryOrder, class MemoryScope>
+typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(T* const dest,
+                                                                 T value,
+                                                                 MemoryOrder,
+                                                                 MemoryScope) {
+  static_assert(sizeof(unsigned long long int) == 8,
+                "this function assumes an unsigned long long is 64-bit");
+  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<unsigned long long int>(dest);
+  if (l) {
+    Impl::sycl_atomic_ref<unsigned long long int,
+                          MemoryOrder,
+                          MemoryScopeDevice,
+                          sycl::access::address_space::local_space>
+    dest_ref(*reinterpret_cast<unsigned long long int*>(dest));
+    unsigned long long int return_val =
+        dest_ref.exchange(*reinterpret_cast<unsigned long long int*>(&value));
+    return reinterpret_cast<T&>(return_val);
+  } else {
+    Impl::sycl_atomic_ref<unsigned long long int,
+                          MemoryOrder,
+                          MemoryScopeDevice,
+                          sycl::access::address_space::global_space>
+    dest_ref(*reinterpret_cast<unsigned long long int*>(dest));
+    unsigned long long int return_val =
+        dest_ref.exchange(*reinterpret_cast<unsigned long long int*>(&value));
+    return reinterpret_cast<T&>(return_val);
+  }
+}
+#else
+template <typename T, class MemoryOrder, class MemoryScope>
+typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_exchange(
+    T* const dest, T compare, T value, MemoryOrder, MemoryScope) {
+  static_assert(sizeof(unsigned int) == 4,
+                "this function assumes an unsigned int is 32-bit");
+  Impl::sycl_atomic_ref<unsigned int,
+                        MemoryOrder,
+                        MemoryScope,
+                        sycl::access::address_space::global_space>
   dest_ref(*reinterpret_cast<unsigned int*>(dest));
-  dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned int*>(&compare), 
+  dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned int*>(&compare),
                                    *reinterpret_cast<unsigned int*>(&value));
   return compare;
 }
 template <typename T, class MemoryOrder, class MemoryScope>
 typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_exchange(
     T* const dest, T compare, T value, MemoryOrder, MemoryScope) {
-  static_assert(sizeof(unsigned long long int) == 8, "this function assumes an unsigned long long  is 64-bit");
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    unsigned long long int, 
-    DesulToSYCLMemoryOrder<MemoryOrder>::value,
-    DesulToSYCLMemoryScope<MemoryScope>::value, 
-    sycl::access::address_space::global_device_space> 
+  static_assert(sizeof(unsigned long long int) == 8,
+                "this function assumes an unsigned long long is 64-bit");
+  Impl::sycl_atomic_ref<unsigned long long int,
+                        MemoryOrder,
+                        MemoryScope,
+                        sycl::access::address_space::global_space>
   dest_ref(*reinterpret_cast<unsigned long long int*>(dest));
   dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned long long int*>(&compare),
                                    *reinterpret_cast<unsigned long long int*>(&value));
@@ -53,39 +172,45 @@ typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_exchange(
 }
 
 template <typename T, class MemoryOrder, class MemoryScope>
-typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(
-    T* const dest, T value, MemoryOrder, MemoryScope) {
-  static_assert(sizeof(unsigned int) == 4, "this function assumes an unsigned int is 32-bit");
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    unsigned int, 
-    DesulToSYCLMemoryOrder<MemoryOrder>::value, 
-    DesulToSYCLMemoryScope<MemoryScope>::value,  
-    sycl::access::address_space::global_device_space> 
+typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(T* const dest,
+                                                                 T value,
+                                                                 MemoryOrder,
+                                                                 MemoryScope) {
+  static_assert(sizeof(unsigned int) == 4,
+                "this function assumes an unsigned int is 32-bit");
+  Impl::sycl_atomic_ref<unsigned int,
+                        MemoryOrder,
+                        MemoryScope,
+                        sycl::access::address_space::global_space>
   dest_ref(*reinterpret_cast<unsigned int*>(dest));
   unsigned int return_val = dest_ref.exchange(*reinterpret_cast<unsigned int*>(&value));
   return reinterpret_cast<T&>(return_val);
 }
 template <typename T, class MemoryOrder, class MemoryScope>
-typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(
-    T* const dest, T value, MemoryOrder, MemoryScope) {
-  static_assert(sizeof(unsigned long long int) == 8, "this function assumes an unsigned long long  is 64-bit");
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    unsigned long long int,
-    DesulToSYCLMemoryOrder<MemoryOrder>::value,
-    DesulToSYCLMemoryScope<MemoryScope>::value,
-    sycl::access::address_space::global_device_space>
+typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(T* const dest,
+                                                                 T value,
+                                                                 MemoryOrder,
+                                                                 MemoryScope) {
+  static_assert(sizeof(unsigned long long int) == 8,
+                "this function assumes an unsigned long long is 64-bit");
+  Impl::sycl_atomic_ref<unsigned long long int,
+                        MemoryOrder,
+                        MemoryScope,
+                        sycl::access::address_space::global_space>
   dest_ref(*reinterpret_cast<unsigned long long int*>(dest));
   unsigned long long int return_val =
       dest_ref.exchange(reinterpret_cast<unsigned long long int&>(value));
   return reinterpret_cast<T&>(return_val);
 }
+#endif
 
 template <typename T, class MemoryOrder, class MemoryScope>
-typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type atomic_compare_exchange(
+typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type
+atomic_compare_exchange(
     T* const /*dest*/, T compare, T /*value*/, MemoryOrder, MemoryScope) {
   // FIXME_SYCL not implemented
   assert(false);
-  return compare;  
+  return compare;
 }
 
 template <typename T, class MemoryOrder, class MemoryScope>
@@ -96,7 +221,7 @@ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type atomic_ex
   return value;
 }
 
-}
+}  // namespace desul
 
 #endif
 #endif
diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_ScopeCaller.hpp b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_ScopeCaller.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fff6320c51bbd3ba3d39e019161af96a59ab1902
--- /dev/null
+++ b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_ScopeCaller.hpp
@@ -0,0 +1,43 @@
+/*
+Copyright (c) 2019, Lawrence Livermore National Security, LLC
+and DESUL project contributors. See the COPYRIGHT file for details.
+Source: https://github.com/desul/desul
+
+SPDX-License-Identifier: (BSD-3-Clause)
+*/
+
+#ifndef DESUL_ATOMICS_COMPARE_EXCHANGE_SCOPECALLER_HPP_
+#define DESUL_ATOMICS_COMPARE_EXCHANGE_SCOPECALLER_HPP_
+#include "desul/atomics/Common.hpp"
+
+namespace desul {
+
+template <class MemoryOrder>
+DESUL_INLINE_FUNCTION void atomic_thread_fence(MemoryOrder, MemoryScopeCaller) {}
+
+#define DESUL_ATOMIC_EXCHANGE_SCOPECALLER(MEMORY_ORDER)               \
+  template <typename T>                                               \
+  DESUL_INLINE_FUNCTION T atomic_exchange(                            \
+      T* dest, T value, MEMORY_ORDER, MemoryScopeCaller) {            \
+    T return_val = *dest;                                             \
+    *dest = value;                                                    \
+    return return_val;                                                \
+  }                                                                   \
+                                                                      \
+  template <typename T>                                               \
+  DESUL_INLINE_FUNCTION T atomic_compare_exchange(                    \
+      T* dest, T compare, T value, MEMORY_ORDER, MemoryScopeCaller) { \
+    T current_val = *dest;                                            \
+    if (current_val == compare) *dest = value;                        \
+    return current_val;                                               \
+  }
+
+DESUL_ATOMIC_EXCHANGE_SCOPECALLER(MemoryOrderSeqCst)
+DESUL_ATOMIC_EXCHANGE_SCOPECALLER(MemoryOrderAcqRel)
+DESUL_ATOMIC_EXCHANGE_SCOPECALLER(MemoryOrderRelease)
+DESUL_ATOMIC_EXCHANGE_SCOPECALLER(MemoryOrderAcquire)
+DESUL_ATOMIC_EXCHANGE_SCOPECALLER(MemoryOrderRelaxed)
+
+#undef DESUL_ATOMIC_EXCHANGE_SCOPECALLER
+}  // namespace desul
+#endif
diff --git a/packages/kokkos/core/src/desul/atomics/Generic.hpp b/packages/kokkos/core/src/desul/atomics/Generic.hpp
index 9d5e87ece29f2c444522a91e4635598872f5b71f..1fffd3b2931c3f78ee6ca5911cdff31b55e9e566 100644
--- a/packages/kokkos/core/src/desul/atomics/Generic.hpp
+++ b/packages/kokkos/core/src/desul/atomics/Generic.hpp
@@ -1,4 +1,4 @@
-/* 
+/*
 Copyright (c) 2019, Lawrence Livermore National Security, LLC
 and DESUL project contributors. See the COPYRIGHT file for details.
 Source: https://github.com/desul/desul
@@ -10,8 +10,10 @@ SPDX-License-Identifier: (BSD-3-Clause)
 #define DESUL_ATOMICS_GENERIC_HPP_
 
 #include <type_traits>
+#if defined(__GNUC__) && (!defined(__clang__))
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#endif
 #include "desul/atomics/Common.hpp"
 #include "desul/atomics/Compare_Exchange.hpp"
 #include "desul/atomics/Lock_Array.hpp"
@@ -45,11 +47,11 @@ struct MinOper {
   }
 };
 
-// This exit early optimization causes weird compiler errors with MSVC 2019
-#ifndef DESUL_HAVE_MSVC_ATOMICS
 template <typename Op, typename Scalar1, typename Scalar2, typename = bool>
 struct may_exit_early : std::false_type {};
 
+// This exit early optimization causes weird compiler errors with MSVC 2019
+#ifndef DESUL_HAVE_MSVC_ATOMICS
 template <typename Op, typename Scalar1, typename Scalar2>
 struct may_exit_early<Op,
                       Scalar1,
@@ -57,19 +59,21 @@ struct may_exit_early<Op,
                       decltype(Op::check_early_exit(std::declval<Scalar1 const&>(),
                                                     std::declval<Scalar2 const&>()))>
     : std::true_type {};
+#endif
 
 template <typename Op, typename Scalar1, typename Scalar2>
-constexpr DESUL_FUNCTION typename std::enable_if<may_exit_early<Op, Scalar1, Scalar2>::value, bool>::type
-check_early_exit(Op const&, Scalar1 const& val1, Scalar2 const& val2) {
+constexpr DESUL_FUNCTION
+    typename std::enable_if<may_exit_early<Op, Scalar1, Scalar2>::value, bool>::type
+    check_early_exit(Op const&, Scalar1 const& val1, Scalar2 const& val2) {
   return Op::check_early_exit(val1, val2);
 }
 
 template <typename Op, typename Scalar1, typename Scalar2>
-constexpr DESUL_FUNCTION typename std::enable_if<!may_exit_early<Op, Scalar1, Scalar2>::value, bool>::type
-check_early_exit(Op const&, Scalar1 const&, Scalar2 const&) {
+constexpr DESUL_FUNCTION
+    typename std::enable_if<!may_exit_early<Op, Scalar1, Scalar2>::value, bool>::type
+    check_early_exit(Op const&, Scalar1 const&, Scalar2 const&) {
   return false;
 }
-#endif
 
 template <class Scalar1, class Scalar2>
 struct AddOper {
@@ -143,6 +147,22 @@ struct RShiftOper {
   }
 };
 
+template <class Scalar1, class Scalar2>
+struct IncModOper {
+  DESUL_FORCEINLINE_FUNCTION
+  static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
+    return ((val1 >= val2) ? Scalar1(0) : val1 + Scalar1(1));
+  }
+};
+
+template <class Scalar1, class Scalar2>
+struct DecModOper {
+  DESUL_FORCEINLINE_FUNCTION
+  static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
+    return (((val1 == Scalar1(0)) | (val1 > val2)) ? val2 : (val1 - Scalar1(1)));
+  }
+};
+
 template <class Scalar1, class Scalar2>
 struct StoreOper {
   DESUL_FORCEINLINE_FUNCTION
@@ -155,82 +175,89 @@ struct LoadOper {
   static Scalar1 apply(const Scalar1& val1, const Scalar2&) { return val1; }
 };
 
-
-template <class Oper, typename T, class MemoryOrder, class MemoryScope,
-  // equivalent to:
-  //   requires atomic_always_lock_free(sizeof(T))
-  std::enable_if_t<atomic_always_lock_free(sizeof(T)), int> = 0
->
-DESUL_INLINE_FUNCTION T
-atomic_fetch_oper(const Oper& op,
-                  T* const dest,
-                  dont_deduce_this_parameter_t<const T> val,
-                  MemoryOrder order,
-                  MemoryScope scope) {
+template <class Oper,
+          typename T,
+          class MemoryOrder,
+          class MemoryScope,
+          // equivalent to:
+          //   requires atomic_always_lock_free(sizeof(T))
+          std::enable_if_t<atomic_always_lock_free(sizeof(T)), int> = 0>
+DESUL_INLINE_FUNCTION T atomic_fetch_oper(const Oper& op,
+                                          T* const dest,
+                                          dont_deduce_this_parameter_t<const T> val,
+                                          MemoryOrder order,
+                                          MemoryScope scope) {
   using cas_t = typename atomic_compare_exchange_type<sizeof(T)>::type;
   cas_t oldval = reinterpret_cast<cas_t&>(*dest);
   cas_t assume = oldval;
 
   do {
-#ifndef DESUL_HAVE_MSVC_ATOMICS
-    if (Impl::check_early_exit(op, reinterpret_cast<T&>(oldval), val)) return reinterpret_cast<T&>(oldval);
-#endif
+    if (Impl::check_early_exit(op, reinterpret_cast<T&>(oldval), val))
+      return reinterpret_cast<T&>(oldval);
     assume = oldval;
     T newval = op.apply(reinterpret_cast<T&>(assume), val);
-    oldval = desul::atomic_compare_exchange(
-        reinterpret_cast<cas_t*>(dest), assume, reinterpret_cast<cas_t&>(newval), order, scope);
+    oldval = desul::atomic_compare_exchange(reinterpret_cast<cas_t*>(dest),
+                                            assume,
+                                            reinterpret_cast<cas_t&>(newval),
+                                            order,
+                                            scope);
   } while (assume != oldval);
 
   return reinterpret_cast<T&>(oldval);
 }
 
-template <class Oper, typename T, class MemoryOrder, class MemoryScope,
-  // equivalent to:
-  //   requires atomic_always_lock_free(sizeof(T))
-  std::enable_if_t<atomic_always_lock_free(sizeof(T)), int> = 0
->
-DESUL_INLINE_FUNCTION T
-atomic_oper_fetch(const Oper& op,
-                  T* const dest,
-                  dont_deduce_this_parameter_t<const T> val,
-                  MemoryOrder order,
-                  MemoryScope scope) {
+template <class Oper,
+          typename T,
+          class MemoryOrder,
+          class MemoryScope,
+          // equivalent to:
+          //   requires atomic_always_lock_free(sizeof(T))
+          std::enable_if_t<atomic_always_lock_free(sizeof(T)), int> = 0>
+DESUL_INLINE_FUNCTION T atomic_oper_fetch(const Oper& op,
+                                          T* const dest,
+                                          dont_deduce_this_parameter_t<const T> val,
+                                          MemoryOrder order,
+                                          MemoryScope scope) {
   using cas_t = typename atomic_compare_exchange_type<sizeof(T)>::type;
   cas_t oldval = reinterpret_cast<cas_t&>(*dest);
   T newval = val;
   cas_t assume = oldval;
   do {
-#ifndef DESUL_HAVE_MSVC_ATOMICS
-    if (Impl::check_early_exit(op, reinterpret_cast<T&>(oldval), val)) return reinterpret_cast<T&>(oldval);
-#endif
+    if (Impl::check_early_exit(op, reinterpret_cast<T&>(oldval), val))
+      return reinterpret_cast<T&>(oldval);
     assume = oldval;
     newval = op.apply(reinterpret_cast<T&>(assume), val);
-    oldval = desul::atomic_compare_exchange(
-        reinterpret_cast<cas_t*>(dest), assume, reinterpret_cast<cas_t&>(newval), order, scope);
+    oldval = desul::atomic_compare_exchange(reinterpret_cast<cas_t*>(dest),
+                                            assume,
+                                            reinterpret_cast<cas_t&>(newval),
+                                            order,
+                                            scope);
   } while (assume != oldval);
 
   return newval;
 }
 
-template <class Oper, typename T, class MemoryOrder, class MemoryScope,
-  // equivalent to:
-  //   requires !atomic_always_lock_free(sizeof(T))
-  std::enable_if_t<!atomic_always_lock_free(sizeof(T)), int> = 0
->
-DESUL_INLINE_FUNCTION T
-atomic_fetch_oper(const Oper& op,
-                  T* const dest,
-                  dont_deduce_this_parameter_t<const T> val,
-                  MemoryOrder /*order*/,
-                  MemoryScope scope) {
+template <class Oper,
+          typename T,
+          class MemoryOrder,
+          class MemoryScope,
+          // equivalent to:
+          //   requires !atomic_always_lock_free(sizeof(T))
+          std::enable_if_t<!atomic_always_lock_free(sizeof(T)), int> = 0>
+DESUL_INLINE_FUNCTION T atomic_fetch_oper(const Oper& op,
+                                          T* const dest,
+                                          dont_deduce_this_parameter_t<const T> val,
+                                          MemoryOrder /*order*/,
+                                          MemoryScope scope) {
 #if defined(DESUL_HAVE_FORWARD_PROGRESS)
   // Acquire a lock for the address
-  while (!Impl::lock_address((void*)dest, scope)) {}
+  while (!Impl::lock_address((void*)dest, scope)) {
+  }
 
-  atomic_thread_fence(MemoryOrderAcquire(),scope);
+  atomic_thread_fence(MemoryOrderAcquire(), scope);
   T return_val = *dest;
   *dest = op.apply(return_val, val);
-  atomic_thread_fence(MemoryOrderRelease(),scope);
+  atomic_thread_fence(MemoryOrderRelease(), scope);
   Impl::unlock_address((void*)dest, scope);
   return return_val;
 #elif defined(DESUL_HAVE_GPU_LIKE_PROGRESS)
@@ -256,11 +283,11 @@ atomic_fetch_oper(const Oper& op,
   return return_val;
 // FIXME_SYCL not implemented
 #elif defined(__SYCL_DEVICE_ONLY__)
-  (void) op;
-  (void) dest;
-  (void) scope;
-  (void) return_val;
-  (void) done;
+  (void)op;
+  (void)dest;
+  (void)scope;
+  (void)return_val;
+  (void)done;
 
   assert(false);
   return val;
@@ -271,10 +298,10 @@ atomic_fetch_oper(const Oper& op,
   while (active != done_active) {
     if (!done) {
       if (Impl::lock_address_cuda((void*)dest, scope)) {
-        atomic_thread_fence(MemoryOrderAcquire(),scope);
+        atomic_thread_fence(MemoryOrderAcquire(), scope);
         return_val = *dest;
         *dest = op.apply(return_val, val);
-        atomic_thread_fence(MemoryOrderRelease(),scope);
+        atomic_thread_fence(MemoryOrderRelease(), scope);
         Impl::unlock_address_cuda((void*)dest, scope);
         done = 1;
       }
@@ -284,30 +311,32 @@ atomic_fetch_oper(const Oper& op,
   return return_val;
 #endif
 #else
-  static_assert(false, "Unimplemented lock based attomic\n");
+  static_assert(false, "Unimplemented lock based atomic\n");
   return val;
 #endif
 }
 
-template <class Oper, typename T, class MemoryOrder, class MemoryScope,
-  // equivalent to:
-  //   requires !atomic_always_lock_free(sizeof(T))
-  std::enable_if_t<!atomic_always_lock_free(sizeof(T)), int> = 0
->
-DESUL_INLINE_FUNCTION T
-atomic_oper_fetch(const Oper& op,
-                  T* const dest,
-                  dont_deduce_this_parameter_t<const T> val,
-                  MemoryOrder /*order*/,
-                  MemoryScope scope) {
+template <class Oper,
+          typename T,
+          class MemoryOrder,
+          class MemoryScope,
+          // equivalent to:
+          //   requires !atomic_always_lock_free(sizeof(T))
+          std::enable_if_t<!atomic_always_lock_free(sizeof(T)), int> = 0>
+DESUL_INLINE_FUNCTION T atomic_oper_fetch(const Oper& op,
+                                          T* const dest,
+                                          dont_deduce_this_parameter_t<const T> val,
+                                          MemoryOrder /*order*/,
+                                          MemoryScope scope) {
 #if defined(DESUL_HAVE_FORWARD_PROGRESS)
   // Acquire a lock for the address
-  while (!Impl::lock_address((void*)dest, scope)) {}
+  while (!Impl::lock_address((void*)dest, scope)) {
+  }
 
-  atomic_thread_fence(MemoryOrderAcquire(),scope);
+  atomic_thread_fence(MemoryOrderAcquire(), scope);
   T return_val = op.apply(*dest, val);
   *dest = return_val;
-  atomic_thread_fence(MemoryOrderRelease(),scope);
+  atomic_thread_fence(MemoryOrderRelease(), scope);
   Impl::unlock_address((void*)dest, scope);
   return return_val;
 #elif defined(DESUL_HAVE_GPU_LIKE_PROGRESS)
@@ -333,10 +362,10 @@ atomic_oper_fetch(const Oper& op,
   return return_val;
   // FIXME_SYCL not implemented
 #elif defined(__SYCL_DEVICE_ONLY__)
-  (void) op;
-  (void) dest;
-  (void) scope;
-  (void) done;
+  (void)op;
+  (void)dest;
+  (void)scope;
+  (void)done;
 
   assert(false);
   return val;
@@ -347,10 +376,10 @@ atomic_oper_fetch(const Oper& op,
   while (active != done_active) {
     if (!done) {
       if (Impl::lock_address_cuda((void*)dest, scope)) {
-        atomic_thread_fence(MemoryOrderAcquire(),scope);
+        atomic_thread_fence(MemoryOrderAcquire(), scope);
         return_val = op.apply(*dest, val);
         *dest = return_val;
-        atomic_thread_fence(MemoryOrderRelease(),scope);
+        atomic_thread_fence(MemoryOrderRelease(), scope);
         Impl::unlock_address_cuda((void*)dest, scope);
         done = 1;
       }
@@ -365,6 +394,29 @@ atomic_oper_fetch(const Oper& op,
 #endif
 }
 
+template <class Oper, typename T, class MemoryOrder>
+DESUL_INLINE_FUNCTION T atomic_fetch_oper(const Oper& op,
+                                          T* const dest,
+                                          dont_deduce_this_parameter_t<const T> val,
+                                          MemoryOrder /*order*/,
+                                          MemoryScopeCaller /*scope*/) {
+  T oldval = *dest;
+  *dest = op.apply(oldval, val);
+  return oldval;
+}
+
+template <class Oper, typename T, class MemoryOrder>
+DESUL_INLINE_FUNCTION T atomic_oper_fetch(const Oper& op,
+                                          T* const dest,
+                                          dont_deduce_this_parameter_t<const T> val,
+                                          MemoryOrder /*order*/,
+                                          MemoryScopeCaller /*scope*/) {
+  T oldval = *dest;
+  T newval = op.apply(oldval, val);
+  *dest = newval;
+  return newval;
+}
+
 }  // namespace Impl
 }  // namespace desul
 
@@ -546,7 +598,8 @@ template <typename T, class MemoryOrder, class MemoryScope>
 DESUL_INLINE_FUNCTION T atomic_load(const T* const dest,
                                     MemoryOrder order,
                                     MemoryScope scope) {
-  return Impl::atomic_fetch_oper(Impl::LoadOper<T, const T>(), const_cast<T*>(dest), T(), order, scope);
+  return Impl::atomic_fetch_oper(
+      Impl::LoadOper<T, const T>(), const_cast<T*>(dest), T(), order, scope);
 }
 
 template <typename T, class MemoryOrder, class MemoryScope>
@@ -606,14 +659,16 @@ DESUL_INLINE_FUNCTION void atomic_max(T* const dest,
 }
 
 template <typename T, class MemoryOrder, class MemoryScope>
-DESUL_INLINE_FUNCTION T
-atomic_inc_fetch(T* const dest, MemoryOrder order, MemoryScope scope) {
+DESUL_INLINE_FUNCTION T atomic_inc_fetch(T* const dest,
+                                         MemoryOrder order,
+                                         MemoryScope scope) {
   return atomic_add_fetch(dest, T(1), order, scope);
 }
 
 template <typename T, class MemoryOrder, class MemoryScope>
-DESUL_INLINE_FUNCTION T
-atomic_dec_fetch(T* const dest, MemoryOrder order, MemoryScope scope) {
+DESUL_INLINE_FUNCTION T atomic_dec_fetch(T* const dest,
+                                         MemoryOrder order,
+                                         MemoryScope scope) {
   return atomic_sub_fetch(dest, T(1), order, scope);
 }
 
@@ -624,23 +679,42 @@ DESUL_INLINE_FUNCTION T atomic_fetch_inc(T* const dest,
   return atomic_fetch_add(dest, T(1), order, scope);
 }
 
+template <typename T, class MemoryOrder, class MemoryScope>
+DESUL_INLINE_FUNCTION T
+atomic_fetch_inc_mod(T* const dest, T val, MemoryOrder order, MemoryScope scope) {
+  static_assert(std::is_unsigned<T>::value,
+                "Signed types not supported by atomic_fetch_inc_mod.");
+  return Impl::atomic_fetch_oper(
+      Impl::IncModOper<T, const T>(), dest, val, order, scope);
+}
+
 template <typename T, class MemoryOrder, class MemoryScope>
 DESUL_INLINE_FUNCTION T atomic_fetch_dec(T* const dest,
                                          MemoryOrder order,
                                          MemoryScope scope) {
   return atomic_fetch_sub(dest, T(1), order, scope);
 }
+
+template <typename T, class MemoryOrder, class MemoryScope>
+DESUL_INLINE_FUNCTION T
+atomic_fetch_dec_mod(T* const dest, T val, MemoryOrder order, MemoryScope scope) {
+  static_assert(std::is_unsigned<T>::value,
+                "Signed types not supported by atomic_fetch_dec_mod.");
+  return Impl::atomic_fetch_oper(
+      Impl::DecModOper<T, const T>(), dest, val, order, scope);
+}
+
 template <typename T, class MemoryOrder, class MemoryScope>
 DESUL_INLINE_FUNCTION void atomic_inc(T* const dest,
-                                         MemoryOrder order,
-                                         MemoryScope scope) {
+                                      MemoryOrder order,
+                                      MemoryScope scope) {
   return atomic_add(dest, T(1), order, scope);
 }
 
 template <typename T, class MemoryOrder, class MemoryScope>
 DESUL_INLINE_FUNCTION void atomic_dec(T* const dest,
-                                         MemoryOrder order,
-                                         MemoryScope scope) {
+                                      MemoryOrder order,
+                                      MemoryScope scope) {
   return atomic_sub(dest, T(1), order, scope);
 }
 
@@ -681,10 +755,12 @@ DESUL_INLINE_FUNCTION bool atomic_compare_exchange_weak(T* const dest,
 
 }  // namespace desul
 
-#include <desul/atomics/SYCL.hpp>
 #include <desul/atomics/CUDA.hpp>
 #include <desul/atomics/GCC.hpp>
 #include <desul/atomics/HIP.hpp>
 #include <desul/atomics/OpenMP.hpp>
+#include <desul/atomics/SYCL.hpp>
+#if defined(__GNUC__) && (!defined(__clang__))
 #pragma GCC diagnostic pop
 #endif
+#endif
diff --git a/packages/kokkos/core/src/desul/atomics/HIP.hpp b/packages/kokkos/core/src/desul/atomics/HIP.hpp
index 16c1f510b7a2627408ccea374004d280997e96df..5365ab91316e4bab9381b13a7135e61a3b2ca9b2 100644
--- a/packages/kokkos/core/src/desul/atomics/HIP.hpp
+++ b/packages/kokkos/core/src/desul/atomics/HIP.hpp
@@ -10,329 +10,216 @@ SPDX-License-Identifier: (BSD-3-Clause)
 
 #ifdef __HIP_DEVICE_COMPILE__
 namespace desul {
-namespace Impl {
-template <typename T>
-struct is_hip_atomic_integer_type {
-  static constexpr bool value = std::is_same<T, int>::value ||
-                                std::is_same<T, unsigned int>::value ||
-                                std::is_same<T, unsigned long long int>::value;
-};
-
-template <typename T>
-struct is_hip_atomic_add_type {
-  static constexpr bool value = is_hip_atomic_integer_type<T>::value ||
-                                std::is_same<T, double>::value ||
-                                std::is_same<T, float>::value;
-};
-
-template <typename T>
-struct is_hip_atomic_sub_type {
-  static constexpr bool value =
-      std::is_same<T, int>::value || std::is_same<T, unsigned int>::value;
-};
-}  // namespace Impl
-
-// Atomic Add
-template <typename T>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_add_type<T>::value, T>::type
-    atomic_fetch_add(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) {
-  return atomicAdd(dest, val);
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_add_type<T>::value, T>::type
-    atomic_fetch_add(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  __threadfence();
-  T return_val = atomicAdd(dest, val);
-  __threadfence();
-
-  return return_val;
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_add_type<T>::value, T>::type
-    atomic_fetch_add(T* dest, T val, MemoryOrder, MemoryScopeCore) {
-  return atomic_fetch_add(dest, val, MemoryOrder(), MemoryScopeDevice());
-}
-
-// Atomic Sub
-template <typename T>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_sub_type<T>::value, T>::type
-    atomic_fetch_sub(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) {
-  return atomicSub(dest, val);
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_sub_type<T>::value, T>::type
-    atomic_fetch_sub(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  __threadfence();
-  T return_val = atomicSub(dest, val);
-  __threadfence();
-  return return_val;
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_sub_type<T>::value, T>::type
-    atomic_fetch_sub(T* dest, T val, MemoryOrder, MemoryScopeCore) {
-  return atomic_fetch_sub(dest, val, MemoryOrder(), MemoryScopeDevice());
-}
-
-// Atomic Inc
-__device__ inline unsigned int atomic_fetch_inc(unsigned int* dest,
-                                                unsigned int val,
-                                                MemoryOrderRelaxed,
-                                                MemoryScopeDevice) {
-  return atomicInc(dest, val);
-}
-
-template <typename MemoryOrder>
-__device__ inline unsigned int atomic_fetch_inc(unsigned int* dest,
-                                                unsigned int val,
-                                                MemoryOrder,
-                                                MemoryScopeDevice) {
-  __threadfence();
-  unsigned int return_val = atomicInc(dest, val);
-  __threadfence();
-  return return_val;
-}
-
-template <typename MemoryOrder>
-__device__ inline unsigned int atomic_fetch_inc(unsigned int* dest,
-                                                unsigned int val,
-                                                MemoryOrder,
-                                                MemoryScopeCore) {
-  return atomic_fetch_inc(dest, val, MemoryOrder(), MemoryScopeDevice());
-}
-
-// Atomic Dec
-__device__ inline unsigned int atomic_fetch_dec(unsigned int* dest,
-                                                unsigned int val,
-                                                MemoryOrderRelaxed,
-                                                MemoryScopeDevice) {
-  return atomicDec(dest, val);
-}
-
-template <typename MemoryOrder>
-__device__ inline unsigned int atomic_fetch_dec(unsigned int* dest,
-                                                unsigned int val,
-                                                MemoryOrder,
-                                                MemoryScopeDevice) {
-  __threadfence();
-  unsigned int return_val = atomicDec(dest, val);
-  __threadfence();
-  return return_val;
-}
-
-template <typename MemoryOrder>
-__device__ inline unsigned int atomic_fetch_dec(unsigned int* dest,
-                                                unsigned int val,
-                                                MemoryOrder,
-                                                MemoryScopeCore) {
-  return atomic_fetch_dec(dest, val, MemoryOrder(), MemoryScopeDevice());
-}
-
-// Atomic Max
-template <typename T>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_max(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) {
-  return atomicMax(dest, val);
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_max(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  __threadfence();
-  T return_val = atomicMax(dest, val);
-  __threadfence();
-  return return_val;
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_max(T* dest, T val, MemoryOrder, MemoryScopeCore) {
-  return atomic_fetch_max(dest, val, MemoryOrder(), MemoryScopeDevice());
-}
-
-// Atomic Min
-template <typename T>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_min(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) {
-  return atomicMin(dest, val);
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_min(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  __threadfence();
-  T return_val = atomicMin(dest, val);
-  __threadfence();
-  return return_val;
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_min(T* dest, T val, MemoryOrder, MemoryScopeCore) {
-  return atomic_fetch_min(dest, val, MemoryOrder(), MemoryScopeDevice());
-}
-
-// Atomic And
-template <typename T>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_and(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) {
-  return atomicAnd(dest, val);
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_and(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  __threadfence();
-  T return_val = atomicAnd(dest, val);
-  __threadfence();
-  return return_val;
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_and(T* dest, T val, MemoryOrder, MemoryScopeCore) {
-  return atomic_fetch_and(dest, val, MemoryOrder(), MemoryScopeDevice());
-}
-
-// Atomic XOR
-template <typename T>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_xor(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) {
-  return atomicXor(dest, val);
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_xor(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  __threadfence();
-  T return_val = atomicXor(dest, val);
-  __threadfence();
-  return return_val;
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_xor(T* dest, T val, MemoryOrder, MemoryScopeCore) {
-  return atomic_fetch_xor(dest, val, MemoryOrder(), MemoryScopeDevice());
-}
-
-// Atomic OR
-template <typename T>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_or(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) {
-  return atomicOr(dest, val);
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_or(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  __threadfence();
-  T return_val = atomicOr(dest, val);
-  __threadfence();
-  return return_val;
-}
-
-template <typename T, typename MemoryOrder>
-__device__ inline
-    typename std::enable_if<Impl::is_hip_atomic_integer_type<T>::value, T>::type
-    atomic_fetch_or(T* dest, T val, MemoryOrder, MemoryScopeCore) {
-  return atomic_fetch_or(dest, val, MemoryOrder(), MemoryScopeDevice());
-}
-
-}
-
-#define DESUL_HIP_GCC_INTEGRAL_OP_ATOMICS_COMPATIBILITY(MEMORY_ORDER, MEMORY_SCOPE)                 \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_add_type<T>::value, T>::type atomic_fetch_add(  \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_fetch_oper(Impl::AddOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_sub_type<T>::value, T>::type atomic_fetch_sub(  \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_fetch_oper(Impl::SubOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_integer_type<T>::value, T>::type atomic_fetch_and(  \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_fetch_oper(Impl::AndOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_integer_type<T>::value, T>::type atomic_fetch_or(   \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_fetch_oper(Impl::OrOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_integer_type<T>::value, T>::type atomic_fetch_xor(  \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_fetch_oper(Impl::XorOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_integer_type<T>::value, T>::type atomic_fetch_nand( \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_fetch_oper(Impl::NandOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_add_type<T>::value, T>::type atomic_add_fetch(  \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_oper_fetch(Impl::AddOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_sub_type<T>::value, T>::type atomic_sub_fetch(  \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_oper_fetch(Impl::SubOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_integer_type<T>::value, T>::type atomic_and_fetch(  \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_oper_fetch(Impl::AndOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_integer_type<T>::value, T>::type atomic_or_fetch(   \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_oper_fetch(Impl::OrOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_integer_type<T>::value, T>::type atomic_xor_fetch(  \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_oper_fetch(Impl::XorOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
-  }                                                                               \
-  template <typename T>                                                           \
-  __device__ typename std::enable_if<std::is_integral<T>::value && !Impl::is_hip_atomic_integer_type<T>::value, T>::type atomic_nand_fetch( \
-      T* const dest, T value, MEMORY_ORDER, MEMORY_SCOPE) {                       \
-       return Impl::atomic_oper_fetch(Impl::NandOper<T, const T>(), dest, value, MEMORY_ORDER(), MEMORY_SCOPE()); \
+
+// header file is organized as follows:
+//   1/ device-side overload set from atomic functions provided by HIP
+//   2/ fallback implementation on host-side for atomic functions defined in 1/ that are
+//      not included in the GCC overload set
+//   3/ fallback implementation on device-side for atomic functions from the GCC
+//      overload set that are not defined in 1/
+
+// clang-format off
+inline __device__                int atomic_fetch_add(               int* ptr,                int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); }
+inline __device__       unsigned int atomic_fetch_add(      unsigned int* ptr,       unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); }
+inline __device__ unsigned long long atomic_fetch_add(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); }
+inline __device__              float atomic_fetch_add(             float* ptr,              float val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); }
+inline __device__             double atomic_fetch_add(            double* ptr,             double val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); }
+
+inline __device__                int atomic_fetch_sub(               int* ptr,                int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, val); }
+inline __device__       unsigned int atomic_fetch_sub(      unsigned int* ptr,       unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, val); }
+inline __device__ unsigned long long atomic_fetch_sub(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -val); }
+inline __device__              float atomic_fetch_sub(             float* ptr,              float val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -val); }
+inline __device__             double atomic_fetch_sub(            double* ptr,             double val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -val); }
+
+inline __device__                int atomic_fetch_min(               int* ptr,                int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMin(ptr, val); }
+inline __device__       unsigned int atomic_fetch_min(      unsigned int* ptr,       unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMin(ptr, val); }
+inline __device__ unsigned long long atomic_fetch_min(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMin(ptr, val); }
+
+inline __device__                int atomic_fetch_max(               int* ptr,                int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMax(ptr, val); }
+inline __device__       unsigned int atomic_fetch_max(      unsigned int* ptr,       unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMax(ptr, val); }
+inline __device__ unsigned long long atomic_fetch_max(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMax(ptr, val); }
+
+inline __device__                int atomic_fetch_and(               int* ptr,                int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAnd(ptr, val); }
+inline __device__       unsigned int atomic_fetch_and(      unsigned int* ptr,       unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAnd(ptr, val); }
+inline __device__ unsigned long long atomic_fetch_and(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAnd(ptr, val); }
+
+inline __device__                int atomic_fetch_or (               int* ptr,                int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicOr (ptr, val); }
+inline __device__       unsigned int atomic_fetch_or (      unsigned int* ptr,       unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicOr (ptr, val); }
+inline __device__ unsigned long long atomic_fetch_or (unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicOr (ptr, val); }
+
+inline __device__                int atomic_fetch_xor(               int* ptr,                int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicXor(ptr, val); }
+inline __device__       unsigned int atomic_fetch_xor(      unsigned int* ptr,       unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicXor(ptr, val); }
+inline __device__ unsigned long long atomic_fetch_xor(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicXor(ptr, val); }
+
+inline __device__                int atomic_fetch_inc(               int* ptr,                         MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, 1   ); }
+inline __device__       unsigned int atomic_fetch_inc(      unsigned int* ptr,                         MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, 1u  ); }
+inline __device__ unsigned long long atomic_fetch_inc(unsigned long long* ptr,                         MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, 1ull); }
+
+inline __device__                int atomic_fetch_dec(               int* ptr,                         MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, 1   ); }
+inline __device__       unsigned int atomic_fetch_dec(      unsigned int* ptr,                         MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, 1u  ); }
+inline __device__ unsigned long long atomic_fetch_dec(unsigned long long* ptr,                         MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -1  ); }
+
+inline __device__       unsigned int atomic_fetch_inc_mod(  unsigned int* ptr,       unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicInc(ptr, val); }
+inline __device__       unsigned int atomic_fetch_dec_mod(  unsigned int* ptr,       unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicDec(ptr, val); }
+// clang-format on
+
+#define DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, TYPE)                         \
+  template <class MemoryOrder>                                                  \
+  inline __device__ TYPE atomic_fetch_##OP(                                     \
+      TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeDevice) {                    \
+    __threadfence();                                                            \
+    TYPE return_val =                                                           \
+        atomic_fetch_##OP(ptr, val, MemoryOrderRelaxed(), MemoryScopeDevice()); \
+    __threadfence();                                                            \
+    return return_val;                                                          \
+  }                                                                             \
+  template <class MemoryOrder>                                                  \
+  inline __device__ TYPE atomic_fetch_##OP(                                     \
+      TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeCore) {                      \
+    return atomic_fetch_##OP(ptr, val, MemoryOrder(), MemoryScopeDevice());     \
   }
-namespace desul {
-DESUL_HIP_GCC_INTEGRAL_OP_ATOMICS_COMPATIBILITY(MemoryOrderRelaxed, MemoryScopeNode)
-DESUL_HIP_GCC_INTEGRAL_OP_ATOMICS_COMPATIBILITY(MemoryOrderRelaxed, MemoryScopeDevice)
-DESUL_HIP_GCC_INTEGRAL_OP_ATOMICS_COMPATIBILITY(MemoryOrderRelaxed, MemoryScopeCore)
-DESUL_HIP_GCC_INTEGRAL_OP_ATOMICS_COMPATIBILITY(MemoryOrderSeqCst, MemoryScopeNode)
-DESUL_HIP_GCC_INTEGRAL_OP_ATOMICS_COMPATIBILITY(MemoryOrderSeqCst, MemoryScopeDevice)
-DESUL_HIP_GCC_INTEGRAL_OP_ATOMICS_COMPATIBILITY(MemoryOrderSeqCst, MemoryScopeCore)
+
+#define DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(OP) \
+  DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, int)           \
+  DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, unsigned int)  \
+  DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, unsigned long long)
+
+#define DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(OP) \
+  DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, float)               \
+  DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, double)
+
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(min)
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(max)
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(and)
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(or)
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(xor)
+
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(add)
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(add)
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(sub)
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(sub)
+
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(inc)
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(dec)
+
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(inc_mod, unsigned int)
+DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(dec_mod, unsigned int)
+
+#undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT
+#undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL
+#undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP
+
+
+// 2/ host-side fallback implementation for atomic functions not provided by GCC
+
+#define DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN(OP_LOWERCASE, OP_PASCAL_CASE, TYPE) \
+  template <class MemoryOrder>                                                      \
+  inline __host__ TYPE atomic_fetch_##OP_LOWERCASE(                                 \
+      TYPE* ptr, TYPE val, MemoryOrder order, MemoryScopeDevice scope) {            \
+    return Impl::atomic_fetch_oper(                                                 \
+        Impl::OP_PASCAL_CASE##Oper<TYPE, const TYPE>(), ptr, val, order, scope);    \
+  }                                                                                 \
+  template <class MemoryOrder>                                                      \
+  inline __host__ TYPE atomic_fetch_##OP_LOWERCASE(                                 \
+      TYPE* ptr, TYPE val, MemoryOrder order, MemoryScopeCore scope) {              \
+    return Impl::atomic_fetch_oper(                                                 \
+        Impl::OP_PASCAL_CASE##Oper<TYPE, const TYPE>(), ptr, val, order, scope);    \
+  }
+
+#define DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN_INTEGRAL(OP_LOWERCASE, OP_PASCAL_CASE) \
+  DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN(OP_LOWERCASE, OP_PASCAL_CASE, int)           \
+  DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN(OP_LOWERCASE, OP_PASCAL_CASE, unsigned int)  \
+  DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN(                                             \
+      OP_LOWERCASE, OP_PASCAL_CASE, unsigned long long)
+
+#define DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN_FLOATING_POINT(OP_LOWERCASE,   \
+                                                               OP_PASCAL_CASE) \
+  DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN(OP_LOWERCASE, OP_PASCAL_CASE, float) \
+  DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN(OP_LOWERCASE, OP_PASCAL_CASE, double)
+
+DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN_INTEGRAL(min, Min)
+DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN_INTEGRAL(max, Max)
+DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN_FLOATING_POINT(add, Add)
+DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN_FLOATING_POINT(sub, Sub)
+
+DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN(inc_mod, IncMod, unsigned int)
+DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN(dec_mod, DecMod, unsigned int)
+
+#undef DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN_FLOATING_POINT
+#undef DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN_INTEGRAL
+#undef DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN
+
+#define DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_INCREMENT_DECREMENT(TYPE) \
+  template <class MemoryOrder>                                        \
+  inline __host__ TYPE atomic_fetch_inc(                              \
+      TYPE* ptr, MemoryOrder order, MemoryScopeDevice scope) {        \
+    return atomic_fetch_add(ptr, static_cast<TYPE>(1), order, scope); \
+  }                                                                   \
+  template <class MemoryOrder>                                        \
+  inline __host__ TYPE atomic_fetch_inc(                              \
+      TYPE* ptr, MemoryOrder order, MemoryScopeCore scope) {          \
+    return atomic_fetch_add(ptr, static_cast<TYPE>(1), order, scope); \
+  }                                                                   \
+  template <class MemoryOrder>                                        \
+  inline __host__ TYPE atomic_fetch_dec(                              \
+      TYPE* ptr, MemoryOrder order, MemoryScopeDevice scope) {        \
+    return atomic_fetch_sub(ptr, static_cast<TYPE>(1), order, scope); \
+  }                                                                   \
+  template <class MemoryOrder>                                        \
+  inline __host__ TYPE atomic_fetch_dec(                              \
+      TYPE* ptr, MemoryOrder order, MemoryScopeCore scope) {          \
+    return atomic_fetch_sub(ptr, static_cast<TYPE>(1), order, scope); \
+  }
+
+DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_INCREMENT_DECREMENT(int)
+DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_INCREMENT_DECREMENT(unsigned int)
+DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_INCREMENT_DECREMENT(unsigned long long)
+
+#undef DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_INCREMENT_DECREMENT
+
+
+// 3/ device-side fallback implementation for atomic functions defined in GCC overload
+// set
+
+#define DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN_ORDER_SCOPE(             \
+    OP_LOWERCASE, OP_PASCAL_CASE, MEMORY_ORDER, MEMORY_SCOPE)              \
+  template <class T>                                                       \
+  inline __device__ std::enable_if_t<std::is_integral<T>::value, T>        \
+      atomic_##OP_LOWERCASE##_fetch(                                       \
+          T* ptr, T val, MEMORY_ORDER order, MEMORY_SCOPE scope) {         \
+    return Impl::atomic_oper_fetch(                                        \
+        Impl::OP_PASCAL_CASE##Oper<T, const T>(), ptr, val, order, scope); \
+  }                                                                        \
+  template <class T>                                                       \
+  inline __device__ std::enable_if_t<std::is_integral<T>::value, T>        \
+      atomic_fetch_##OP_LOWERCASE(                                         \
+          T* ptr, T val, MEMORY_ORDER order, MEMORY_SCOPE scope) {         \
+    return Impl::atomic_fetch_oper(                                        \
+        Impl::OP_PASCAL_CASE##Oper<T, const T>(), ptr, val, order, scope); \
+  }
+
+// clang-format off
+#define DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN(OP_LOWERCASE, OP_PASCAL_CASE) \
+  DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN_ORDER_SCOPE(OP_LOWERCASE, OP_PASCAL_CASE, MemoryOrderRelaxed, MemoryScopeNode) \
+  DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN_ORDER_SCOPE(OP_LOWERCASE, OP_PASCAL_CASE, MemoryOrderRelaxed, MemoryScopeDevice) \
+  DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN_ORDER_SCOPE(OP_LOWERCASE, OP_PASCAL_CASE, MemoryOrderRelaxed, MemoryScopeCore) \
+  DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN_ORDER_SCOPE(OP_LOWERCASE, OP_PASCAL_CASE, MemoryOrderSeqCst,  MemoryScopeNode) \
+  DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN_ORDER_SCOPE(OP_LOWERCASE, OP_PASCAL_CASE, MemoryOrderSeqCst,  MemoryScopeDevice) \
+  DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN_ORDER_SCOPE(OP_LOWERCASE, OP_PASCAL_CASE, MemoryOrderSeqCst,  MemoryScopeCore)
+// clang-format on
+
+DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN(add, Add)
+DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN(sub, Sub)
+DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN(and, And)
+DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN(or, Or)
+DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN(xor, Xor)
+DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN(nand, Nand)
+
+#undef DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN
+#undef DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN_ORDER_SCOPE
+
 }  // namespace desul
 
 #endif
 #endif
+
diff --git a/packages/kokkos/core/src/desul/atomics/Macros.hpp b/packages/kokkos/core/src/desul/atomics/Macros.hpp
index db9962e03bd84052a4d61a89cf39892e40051b89..0890b2dbc5af20f3d3f111ee3846d8117ce05c5b 100644
--- a/packages/kokkos/core/src/desul/atomics/Macros.hpp
+++ b/packages/kokkos/core/src/desul/atomics/Macros.hpp
@@ -11,19 +11,27 @@ SPDX-License-Identifier: (BSD-3-Clause)
 
 // Macros
 
-#if defined(__GNUC__) && \
-    (!defined(__CUDA_ARCH__) || !defined(__NVCC__)) && \
-    (!defined(__HIP_DEVICE_COMPILE) || !defined(__HIP_PLATFORM_HCC__)) && \
-    !defined(__SYCL_DEVICE_ONLY__) && \
-    !defined(DESUL_HAVE_OPENMP_ATOMICS) && \
+#if (!defined(__CUDA_ARCH__) || !defined(__NVCC__)) &&                       \
+    (!defined(__HIP_DEVICE_COMPILE) || !defined(__HIP_PLATFORM_HCC__)) &&    \
+    !defined(__SYCL_DEVICE_ONLY__) && !defined(DESUL_HAVE_OPENMP_ATOMICS) && \
     !defined(DESUL_HAVE_SERIAL_ATOMICS)
+#define DESUL_IMPL_HAVE_GCC_OR_MSVC_ATOMICS
+#endif
+
+// ONLY use GNUC atomics if not compiling for the device
+// and we didn't explicitly say to use OPENMP or SERIAL atomics
+#if defined(__GNUC__) && defined(DESUL_IMPL_HAVE_GCC_OR_MSVC_ATOMICS)
 #define DESUL_HAVE_GCC_ATOMICS
 #endif
 
-#ifdef _MSC_VER
+// Equivalent to above: if we are compiling for the device we
+// need to use CUDA/HIP/SYCL atomics instead of MSVC atomics
+#if defined(_MSC_VER) && defined(DESUL_IMPL_HAVE_GCC_OR_MSVC_ATOMICS)
 #define DESUL_HAVE_MSVC_ATOMICS
 #endif
 
+#undef DESUL_IMPL_HAVE_GCC_OR_MSVC_ATOMICS
+
 #ifdef __CUDACC__
 #define DESUL_HAVE_CUDA_ATOMICS
 #endif
@@ -34,14 +42,10 @@ SPDX-License-Identifier: (BSD-3-Clause)
 
 #ifdef __SYCL_DEVICE_ONLY__
 #define DESUL_HAVE_SYCL_ATOMICS
-#ifdef __clang__
-#define DESUL_SYCL_NAMESPACE sycl::ONEAPI
-#else
-#define DESUL_SYCL_NAMESPACE sycl
-#endif
 #endif
 
-#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || defined(__SYCL_DEVICE_ONLY__)
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || \
+    defined(__SYCL_DEVICE_ONLY__)
 #define DESUL_HAVE_GPU_LIKE_PROGRESS
 #endif
 
diff --git a/packages/kokkos/core/src/desul/atomics/SYCL.hpp b/packages/kokkos/core/src/desul/atomics/SYCL.hpp
index 44e2dc0ec4ea843d6b6b4e9896b27fc63df6baad..852559101740c6d52a1ce1db85207417fa570d4f 100644
--- a/packages/kokkos/core/src/desul/atomics/SYCL.hpp
+++ b/packages/kokkos/core/src/desul/atomics/SYCL.hpp
@@ -1,4 +1,4 @@
-/* 
+/*
 Copyright (c) 2019, Lawrence Livermore National Security, LLC
 and DESUL project contributors. See the COPYRIGHT file for details.
 Source: https://github.com/desul/desul
@@ -9,135 +9,108 @@ SPDX-License-Identifier: (BSD-3-Clause)
 #define DESUL_ATOMICS_SYCL_HPP_
 
 #ifdef DESUL_HAVE_SYCL_ATOMICS
+
+// clang-format off
+#include "desul/atomics/SYCLConversions.hpp"
 #include "desul/atomics/Common.hpp"
+// clang-format on
 
 namespace desul {
-namespace Impl {
-template<class T>
-struct is_sycl_atomic_type {
-  static constexpr bool value = std::is_same<T, int>::value ||
-                                std::is_same<T, unsigned int>::value ||
-				std::is_same<T, long>::value ||
-				std::is_same<T, unsigned long>::value ||
-				std::is_same<T, long long>::value ||
-                                std::is_same<T, unsigned long long int>::value ||
-				std::is_same<T, float>::value ||
-				std::is_same<T, double>::value;
-};
-} // Impl
-
-// Atomic Add
-template<class T, class MemoryOrder/*, class MemoryScope*/>
-inline
-typename std::enable_if<Impl::is_sycl_atomic_type<T>::value,T>::type
-atomic_fetch_add(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    T, 
-    DesulToSYCLMemoryOrder<MemoryOrder>::value, 
-    DesulToSYCLMemoryScope<MemoryScopeDevice>::value,  
-    sycl::access::address_space::global_device_space> 
-  dest_ref(*dest);
-  return dest_ref.fetch_add(val);
-}
-
-// Atomic Sub 
-template<class T, class MemoryOrder/*, class MemoryScope*/>
-inline
-typename std::enable_if<Impl::is_sycl_atomic_type<T>::value,T>::type
-atomic_fetch_sub(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    T,
-    DesulToSYCLMemoryOrder<MemoryOrder>::value,
-    DesulToSYCLMemoryScope<MemoryScopeDevice>::value,
-    sycl::access::address_space::global_device_space>
-  dest_ref(*dest);
-  return dest_ref.fetch_sub(val);
-}
 
-// Atomic Inc
-template<class MemoryOrder/*, class MemoryScope*/>
-inline
-unsigned int atomic_fetch_inc(unsigned int* dest, unsigned int val, MemoryOrder memory_order, MemoryScopeDevice memory_scope) {
-  return atomic_fetch_add(dest, val, memory_order, memory_scope);
-}
+// FIXME_SYCL We need to either use generic_space or figure out how to check for the
+// correct adress space in a SYCL-portable way.
+#ifndef __NVPTX__
+#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, TYPE)                              \
+  template <class MemoryOrder>                                                     \
+  TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeDevice) { \
+    auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<TYPE>(dest);                  \
+    if (l) {                                                                       \
+      Impl::sycl_atomic_ref<TYPE,                                                  \
+                            MemoryOrder,                                           \
+                            MemoryScopeDevice,                                     \
+                            sycl::access::address_space::local_space>              \
+          dest_ref(*dest);                                                         \
+      return dest_ref.fetch_##OPER(val);                                           \
+    } else {                                                                       \
+      Impl::sycl_atomic_ref<TYPE,                                                  \
+                            MemoryOrder,                                           \
+                            MemoryScopeDevice,                                     \
+                            sycl::access::address_space::global_space>             \
+          dest_ref(*dest);                                                         \
+      return dest_ref.fetch_##OPER(val);                                           \
+    }                                                                              \
+  }                                                                                \
+  template <class MemoryOrder>                                                     \
+  TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeCore) {   \
+    auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<TYPE>(dest);                  \
+    if (l) {                                                                       \
+      Impl::sycl_atomic_ref<TYPE,                                                  \
+                            MemoryOrder,                                           \
+                            MemoryScopeDevice,                                     \
+                            sycl::access::address_space::local_space>              \
+          dest_ref(*dest);                                                         \
+      return dest_ref.fetch_##OPER(val);                                           \
+    } else {                                                                       \
+      Impl::sycl_atomic_ref<TYPE,                                                  \
+                            MemoryOrder,                                           \
+                            MemoryScopeDevice,                                     \
+                            sycl::access::address_space::global_space>             \
+          dest_ref(*dest);                                                         \
+      return dest_ref.fetch_##OPER(val);                                           \
+    }                                                                              \
+  }
+#else
+#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, TYPE)                              \
+  template <class MemoryOrder>                                                     \
+  TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeDevice) { \
+    Impl::sycl_atomic_ref<TYPE,                                                    \
+                          MemoryOrder,                                             \
+                          MemoryScopeDevice,                                       \
+                          sycl::access::address_space::global_space>               \
+        dest_ref(*dest);                                                           \
+    return dest_ref.fetch_##OPER(val);                                             \
+  }                                                                                \
+  template <class MemoryOrder>                                                     \
+  TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeCore) {   \
+    Impl::sycl_atomic_ref<TYPE,                                                    \
+                          MemoryOrder,                                             \
+                          MemoryScopeCore,                                         \
+                          sycl::access::address_space::global_space>               \
+        dest_ref(*dest);                                                           \
+    return dest_ref.fetch_##OPER(val);                                             \
+  }
+#endif
 
-// Atomic Dec
-template<class MemoryOrder/*, class MemoryScope*/>
-inline
-unsigned int atomic_fetch_dec(unsigned int* dest, unsigned int val, MemoryOrder memory_order, MemoryScopeDevice memory_scope) {
-  return atomic_fetch_sub(dest, val, memory_order, memory_scope);
-}
+#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(OPER) \
+  DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, int)           \
+  DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, unsigned int)  \
+  DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, long)          \
+  DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, unsigned long) \
+  DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, long long)     \
+  DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, unsigned long long)
 
-// Atomic Max
-template<class T, class MemoryOrder/*, class MemoryScope*/>
-inline
-typename std::enable_if<Impl::is_sycl_atomic_type<T>::value,T>::type
-atomic_fetch_max(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    T,
-    DesulToSYCLMemoryOrder<MemoryOrder>::value,
-    DesulToSYCLMemoryScope<MemoryScopeDevice>::value,
-    sycl::access::address_space::global_device_space>
-  dest_ref(*dest);
-  return dest_ref.fetch_max(val);
-}
+#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(OPER) \
+  DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, float)               \
+  DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, double)
 
-// Atomic Min
-template<class T, class MemoryOrder/*, class MemoryScope*/>
-inline
-typename std::enable_if<Impl::is_sycl_atomic_type<T>::value,T>::type
-atomic_fetch_min(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    T,
-    DesulToSYCLMemoryOrder<MemoryOrder>::value,
-    DesulToSYCLMemoryScope<MemoryScopeDevice>::value,
-    sycl::access::address_space::global_device_space>
-  dest_ref(*dest);
-  return dest_ref.fetch_min(val);
-}
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(add)
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(sub)
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(and)
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(or)
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(xor)
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(min)
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(max)
 
-// Atomic And
-template<class T, class MemoryOrder/*, class MemoryScope*/>
-inline
-typename std::enable_if<Impl::is_sycl_atomic_type<T>::value,T>::type
-atomic_fetch_and(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    T,
-    DesulToSYCLMemoryOrder<MemoryOrder>::value,
-    DesulToSYCLMemoryScope<MemoryScopeDevice>::value,
-    sycl::access::address_space::global_device_space>
-  dest_ref(*dest);
-  return dest_ref.fetch_and(val);
-}
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(add)
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(sub)
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(min)
+DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(max)
 
-// Atomic XOR
-template<class T, class MemoryOrder/*, class MemoryScope*/>
-inline
-typename std::enable_if<Impl::is_sycl_atomic_type<T>::value,T>::type
-atomic_fetch_xor(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    T,
-    DesulToSYCLMemoryOrder<MemoryOrder>::value,
-    DesulToSYCLMemoryScope<MemoryScopeDevice>::value,
-    sycl::access::address_space::global_device_space>
-  dest_ref(*dest);
-  return dest_ref.fetch_xor(val);
-}
+#undef DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT
+#undef DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL
+#undef DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER
 
-// Atomic OR
-template<class T, class MemoryOrder/*, class MemoryScope*/>
-inline
-typename std::enable_if<Impl::is_sycl_atomic_type<T>::value,T>::type
-atomic_fetch_or(T* dest, T val, MemoryOrder, MemoryScopeDevice) {
-  DESUL_SYCL_NAMESPACE::atomic_ref<
-    T,
-    DesulToSYCLMemoryOrder<MemoryOrder>::value,
-    DesulToSYCLMemoryScope<MemoryScopeDevice>::value,
-    sycl::access::address_space::global_device_space>
-  dest_ref(*dest);
-  return dest_ref.fetch_or(val);
-}
+}  // namespace desul
 
-} // desul
 #endif  // DESUL_HAVE_SYCL_ATOMICS
 #endif  // DESUL_ATOMICS_SYCL_HPP_
diff --git a/packages/kokkos/core/src/desul/atomics/SYCLConversions.hpp b/packages/kokkos/core/src/desul/atomics/SYCLConversions.hpp
index a66e5cf051f684695b17b7fae9fe2aaa5009a2c3..0ff3c7fee7ec436cd727bed96c3b80d0b742a67c 100644
--- a/packages/kokkos/core/src/desul/atomics/SYCLConversions.hpp
+++ b/packages/kokkos/core/src/desul/atomics/SYCLConversions.hpp
@@ -1,4 +1,4 @@
-/* 
+/*
 Copyright (c) 2019, Lawrence Livermore National Security, LLC
 and DESUL project contributors. See the COPYRIGHT file for details.
 Source: https://github.com/desul/desul
@@ -9,50 +9,89 @@ SPDX-License-Identifier: (BSD-3-Clause)
 #ifndef DESUL_ATOMICS_SYCL_CONVERSIONS_HPP_
 #define DESUL_ATOMICS_SYCL_CONVERSIONS_HPP_
 #ifdef DESUL_HAVE_SYCL_ATOMICS
+
+// clang-format off
 #include "desul/atomics/Common.hpp"
+
 #include <CL/sycl.hpp>
+// clang-format on
 
 namespace desul {
+namespace Impl {
+
+#ifdef __clang__
+namespace sycl_sync_and_atomics = ::sycl::ext::oneapi;
+#else
+namespace sycl_sync_and_atomics = ::sycl;
+#endif
+
+template <bool extended_namespace>
+using sycl_memory_order = std::conditional_t<extended_namespace,
+                                             sycl_sync_and_atomics::memory_order,
+                                             sycl::memory_order>;
+template <bool extended_namespace>
+using sycl_memory_scope = std::conditional_t<extended_namespace,
+                                             sycl_sync_and_atomics::memory_scope,
+                                             sycl::memory_scope>;
 
-template<class MemoryOrder>
+template <class MemoryOrder, bool extended_namespace = true>
 struct DesulToSYCLMemoryOrder;
-template<>
-struct DesulToSYCLMemoryOrder<MemoryOrderSeqCst> {
-  static constexpr DESUL_SYCL_NAMESPACE::memory_order value = DESUL_SYCL_NAMESPACE::memory_order::seq_cst;
+template <bool extended_namespace>
+struct DesulToSYCLMemoryOrder<MemoryOrderSeqCst, extended_namespace> {
+  static constexpr sycl_memory_order<extended_namespace> value =
+      sycl_memory_order<extended_namespace>::seq_cst;
 };
-template<>
-struct DesulToSYCLMemoryOrder<MemoryOrderAcquire> {
-  static constexpr DESUL_SYCL_NAMESPACE::memory_order value = DESUL_SYCL_NAMESPACE::memory_order::acquire;
+template <bool extended_namespace>
+struct DesulToSYCLMemoryOrder<MemoryOrderAcquire, extended_namespace> {
+  static constexpr sycl_memory_order<extended_namespace> value =
+      sycl_memory_order<extended_namespace>::acquire;
 };
-template<>
-struct DesulToSYCLMemoryOrder<MemoryOrderRelease> {
-  static constexpr DESUL_SYCL_NAMESPACE::memory_order value = DESUL_SYCL_NAMESPACE::memory_order::release;
+template <bool extended_namespace>
+struct DesulToSYCLMemoryOrder<MemoryOrderRelease, extended_namespace> {
+  static constexpr sycl_memory_order<extended_namespace> value =
+      sycl_memory_order<extended_namespace>::release;
 };
-template<>
-struct DesulToSYCLMemoryOrder<MemoryOrderAcqRel> {
-  static constexpr DESUL_SYCL_NAMESPACE::memory_order value = DESUL_SYCL_NAMESPACE::memory_order::acq_rel;
+template <bool extended_namespace>
+struct DesulToSYCLMemoryOrder<MemoryOrderAcqRel, extended_namespace> {
+  static constexpr sycl_memory_order<extended_namespace> value =
+      sycl_memory_order<extended_namespace>::acq_rel;
 };
-template<>
-struct DesulToSYCLMemoryOrder<MemoryOrderRelaxed> {
-  static constexpr DESUL_SYCL_NAMESPACE::memory_order value = DESUL_SYCL_NAMESPACE::memory_order::relaxed;
+template <bool extended_namespace>
+struct DesulToSYCLMemoryOrder<MemoryOrderRelaxed, extended_namespace> {
+  static constexpr sycl_memory_order<extended_namespace> value =
+      sycl_memory_order<extended_namespace>::relaxed;
 };
 
-template<class MemoryScope>
+template <class MemoryScope, bool extended_namespace = true>
 struct DesulToSYCLMemoryScope;
-template<>
-struct DesulToSYCLMemoryScope<MemoryScopeCore> {
-  static constexpr DESUL_SYCL_NAMESPACE::memory_scope value = DESUL_SYCL_NAMESPACE::memory_scope::work_group;
+template <bool extended_namespace>
+struct DesulToSYCLMemoryScope<MemoryScopeCore, extended_namespace> {
+  static constexpr sycl_memory_scope<extended_namespace> value =
+      sycl_memory_scope<extended_namespace>::work_group;
 };
-template<>
-struct DesulToSYCLMemoryScope<MemoryScopeDevice> {
-  static constexpr DESUL_SYCL_NAMESPACE::memory_scope value = DESUL_SYCL_NAMESPACE::memory_scope::device;
+template <bool extended_namespace>
+struct DesulToSYCLMemoryScope<MemoryScopeDevice, extended_namespace> {
+  static constexpr sycl_memory_scope<extended_namespace> value =
+      sycl_memory_scope<extended_namespace>::device;
 };
-template<>
-struct DesulToSYCLMemoryScope<MemoryScopeSystem> {
-  static constexpr DESUL_SYCL_NAMESPACE::memory_scope value = DESUL_SYCL_NAMESPACE::memory_scope::system;
+template <bool extended_namespace>
+struct DesulToSYCLMemoryScope<MemoryScopeSystem, extended_namespace> {
+  static constexpr sycl_memory_scope<extended_namespace> value =
+      sycl_memory_scope<extended_namespace>::system;
 };
 
-}
+template <class T,
+          class MemoryOrder,
+          class MemoryScope,
+          sycl::access::address_space AddressSpace>
+using sycl_atomic_ref =
+    sycl::ext::oneapi::atomic_ref<T,
+                                  DesulToSYCLMemoryOrder<MemoryOrder>::value,
+                                  DesulToSYCLMemoryScope<MemoryScope>::value,
+                                  AddressSpace>;
+
+}  // namespace Impl
+}  // namespace desul
 
 #endif
 #endif
diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_forceglobal b/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_forceglobal
index d00e2223d22485f4ee831dd53dce064a49deec5e..b235163820782a664189470f01dd7df095f117e5 100644
--- a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_forceglobal
+++ b/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_forceglobal
@@ -89,6 +89,11 @@ inline __device__ ctype atomic_fetch_inc(ctype* dest, __DESUL_IMPL_CUDA_ASM_MEMO
   ctype limit = desul::Impl::numeric_limits_max<ctype>::value; \
   asm volatile("atom.inc.global" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
   return result; \
+} \
+inline __device__ ctype atomic_fetch_inc_mod(ctype* dest, ctype limit, __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER, __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE) { \
+  ctype result = 0; \
+  asm volatile("atom.inc.global" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  return result; \
 }
 
 #define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_DEC(ctype,asm_ctype,reg_ctype,reg_ret_ctype) \
@@ -97,6 +102,11 @@ inline __device__ ctype atomic_fetch_dec(ctype* dest, __DESUL_IMPL_CUDA_ASM_MEMO
   ctype limit = desul::Impl::numeric_limits_max<ctype>::value; \
   asm volatile("atom.dec.global" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
   return result; \
+} \
+inline __device__ ctype atomic_fetch_dec_mod(ctype* dest, ctype limit, __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER, __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE) { \
+  ctype result = 0; \
+  asm volatile("atom.dec.global" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  return result; \
 }
 
 // Group ops for integer ctypes
@@ -112,10 +122,10 @@ __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_SUB(ctype,asm_ctype,reg_ctype,reg_ret_ctype)
 __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_MIN(ctype,asm_ctype,reg_ctype,reg_ret_ctype) \
 __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_MAX(ctype,asm_ctype,reg_ctype,reg_ret_ctype)
 
-#define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_BIN_OP(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_AND(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_OR(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_XOR(ctype)
+#define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_BIN_OP() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_AND() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_OR() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_XOR()
 
 
 // Instantiate Functions
diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_generic b/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_generic
index 364b6a2e4d1950f110c29958976a660d01d05771..0484d109c3db39440267b06ecd7736bb7b35b2fe 100644
--- a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_generic
+++ b/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_generic
@@ -88,6 +88,11 @@ inline __device__ ctype atomic_fetch_inc(ctype* dest, __DESUL_IMPL_CUDA_ASM_MEMO
   ctype limit = desul::Impl::numeric_limits_max<ctype>::value; \
   asm volatile("atom.inc" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
   return result; \
+} \
+inline __device__ ctype atomic_fetch_inc_mod(ctype* dest, ctype limit, __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER, __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE) { \
+  ctype result = 0; \
+  asm volatile("atom.inc" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  return result; \
 }
 
 #define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_DEC(ctype,asm_ctype,reg_ctype,reg_ret_ctype) \
@@ -96,8 +101,12 @@ inline __device__ ctype atomic_fetch_dec(ctype* dest, __DESUL_IMPL_CUDA_ASM_MEMO
   ctype limit = desul::Impl::numeric_limits_max<ctype>::value; \
   asm volatile("atom.dec" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
   return result; \
+} \
+inline __device__ ctype atomic_fetch_dec_mod(ctype* dest, ctype limit, __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER, __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE) { \
+  ctype result = 0; \
+  asm volatile("atom.dec" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  return result; \
 }
-
 // Group ops for integer ctypes
 #define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_INTEGER_OP(ctype,asm_ctype,reg_ctype,reg_ret_ctype) \
 __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_ADD(ctype,asm_ctype,reg_ctype,reg_ret_ctype) \
@@ -111,10 +120,10 @@ __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_SUB(ctype,asm_ctype,reg_ctype,reg_ret_ctype)
 __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_MIN(ctype,asm_ctype,reg_ctype,reg_ret_ctype) \
 __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_MAX(ctype,asm_ctype,reg_ctype,reg_ret_ctype)
 
-#define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_BIN_OP(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_AND(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_OR(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_XOR(ctype)
+#define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_BIN_OP() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_AND() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_OR() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_XOR()
 
 
 // Instantiate Functions
diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_isglobal b/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_isglobal
index 2e8e54062dd3494f7440b959618359ef0547d87b..3d077ae637ad9960ef687fff2456181033bf8d73 100644
--- a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_isglobal
+++ b/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_isglobal
@@ -132,6 +132,15 @@ inline __device__ ctype atomic_fetch_inc(ctype* dest, __DESUL_IMPL_CUDA_ASM_MEMO
   asm volatile("atom.inc"        __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
   } \
   return result; \
+} \
+inline __device__ ctype atomic_fetch_inc_mod(ctype* dest, ctype limit, __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER, __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE) { \
+  ctype result = 0; \
+  if(__isGlobal(dest)) { \
+  asm volatile("atom.inc.global" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  } else { \
+  asm volatile("atom.inc"        __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  } \
+  return result; \
 }
 
 #define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_DEC(ctype,asm_ctype,reg_ctype,reg_ret_ctype) \
@@ -144,6 +153,15 @@ inline __device__ ctype atomic_fetch_dec(ctype* dest, __DESUL_IMPL_CUDA_ASM_MEMO
   asm volatile("atom.dec"        __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
   } \
   return result; \
+} \
+inline __device__ ctype atomic_fetch_dec_mod(ctype* dest, ctype limit, __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER, __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE) { \
+  ctype result = 0; \
+  if(__isGlobal(dest)) { \
+  asm volatile("atom.dec.global" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  } else { \
+  asm volatile("atom.dec"        __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;" : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  } \
+  return result; \
 }
 
 // Group ops for integer ctypes
@@ -159,10 +177,10 @@ __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_SUB(ctype,asm_ctype,reg_ctype,reg_ret_ctype)
 __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_MIN(ctype,asm_ctype,reg_ctype,reg_ret_ctype) \
 __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_MAX(ctype,asm_ctype,reg_ctype,reg_ret_ctype)
 
-#define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_BIN_OP(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_AND(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_OR(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_XOR(ctype)
+#define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_BIN_OP() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_AND() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_OR() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_XOR()
 
 
 // Instantiate Functions
diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_predicate b/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_predicate
index 5f53279daf541aad169e1bc5a046518cc5b084c8..4039448c6bb63038b65b58bd819c9adf0a75a8a8 100644
--- a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_predicate
+++ b/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_predicate
@@ -160,7 +160,19 @@ inline __device__ ctype atomic_fetch_inc(ctype* dest, __DESUL_IMPL_CUDA_ASM_MEMO
           "{\n\t" \
           ".reg .pred p;\n\t" \
           "isspacep.global p, %1;\n\t" \
-          "@p  atom.inc.gobal" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;\n\t" \
+          "@p  atom.inc.global" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;\n\t" \
+          "@!p atom.inc"       __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;\n\t" \
+          "}\n\t" \
+    : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  return result; \
+} \
+inline __device__ ctype atomic_fetch_inc_mod(ctype* dest, ctype limit, __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER, __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE) { \
+  ctype result = 0; \
+  asm volatile( \
+          "{\n\t" \
+          ".reg .pred p;\n\t" \
+          "isspacep.global p, %1;\n\t" \
+          "@p  atom.inc.global" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;\n\t" \
           "@!p atom.inc"       __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;\n\t" \
           "}\n\t" \
     : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
@@ -180,6 +192,18 @@ inline __device__ ctype atomic_fetch_dec(ctype* dest, __DESUL_IMPL_CUDA_ASM_MEMO
           "}\n\t" \
     : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
   return result; \
+} \
+inline __device__ ctype atomic_fetch_dec_mod(ctype* dest, ctype limit, __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER, __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE) { \
+  ctype result = 0; \
+  asm volatile( \
+          "{\n\t" \
+          ".reg .pred p;\n\t" \
+          "isspacep.global p, %1;\n\t" \
+          "@p  atom.dec.global" __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;\n\t" \
+          "@!p atom.dec"        __DESUL_IMPL_CUDA_ASM_MEMORY_ORDER_ASM __DESUL_IMPL_CUDA_ASM_MEMORY_SCOPE_ASM asm_ctype " %0,[%1],%2;\n\t" \
+          "}\n\t" \
+    : reg_ret_ctype(result) : "l"(dest),reg_ctype(limit) : "memory"); \
+  return result; \
 }
 
 // Group ops for integer ctypes
@@ -195,10 +219,10 @@ __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_SUB(ctype,asm_ctype,reg_ctype,reg_ret_ctype)
 __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_MIN(ctype,asm_ctype,reg_ctype,reg_ret_ctype) \
 __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_MAX(ctype,asm_ctype,reg_ctype,reg_ret_ctype)
 
-#define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_BIN_OP(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_AND(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_OR(ctype) \
-__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_XOR(ctype)
+#define __DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_BIN_OP() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_AND() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_OR() \
+__DESUL_IMPL_CUDA_ASM_ATOMIC_FETCH_XOR()
 
 
 // Instantiate Functions
diff --git a/packages/kokkos/core/src/fwd/Kokkos_Fwd_THREADS.hpp b/packages/kokkos/core/src/fwd/Kokkos_Fwd_THREADS.hpp
index 28ffb685df85658064e3888e5c389aeed6ab0ced..af53777ddea2311f55198a14407238d7bc5c466a 100644
--- a/packages/kokkos/core/src/fwd/Kokkos_Fwd_THREADS.hpp
+++ b/packages/kokkos/core/src/fwd/Kokkos_Fwd_THREADS.hpp
@@ -47,7 +47,7 @@
 
 #if defined(KOKKOS_ENABLE_THREADS)
 namespace Kokkos {
-class Threads;  ///< Execution space with pthreads back-end.
+class Threads;  ///< Execution space with C++11 threads back-end.
 }  // namespace Kokkos
 #endif
 #endif
diff --git a/packages/kokkos/core/src/impl/Kokkos_BitOps.hpp b/packages/kokkos/core/src/impl/Kokkos_BitOps.hpp
index eb0f07557fe5f22569797e17c50cb7a82bfac431..fc58b96a450992177804effe491192904587296b 100644
--- a/packages/kokkos/core/src/impl/Kokkos_BitOps.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_BitOps.hpp
@@ -54,9 +54,10 @@
 #endif
 
 namespace Kokkos {
+namespace Impl {
 
 KOKKOS_FORCEINLINE_FUNCTION
-int log2(unsigned i) {
+int int_log2(unsigned i) {
   enum : int { shift = sizeof(unsigned) * CHAR_BIT - 1 };
 #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
   return shift - __clz(i);
@@ -76,8 +77,6 @@ int log2(unsigned i) {
 #endif
 }
 
-namespace Impl {
-
 /**\brief  Find first zero bit.
  *
  *  If none then return -1 ;
@@ -148,11 +147,20 @@ int bit_count(unsigned i) {
 
 KOKKOS_INLINE_FUNCTION
 unsigned integral_power_of_two_that_contains(const unsigned N) {
-  const unsigned i = Kokkos::log2(N);
+  const unsigned i = int_log2(N);
   return ((1u << i) < N) ? i + 1 : i;
 }
 
 }  // namespace Impl
+
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
+
+KOKKOS_DEPRECATED KOKKOS_INLINE_FUNCTION int log2(unsigned i) {
+  return Impl::int_log2(i);
+}
+
+#endif
+
 }  // namespace Kokkos
 
 #endif  // KOKKOS_BITOPS_HPP
diff --git a/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.cpp b/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..edaae0fd47750bda2b0218fe91b2eb0a3ba51330
--- /dev/null
+++ b/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.cpp
@@ -0,0 +1,133 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <cstring>
+#include <impl/Kokkos_Command_Line_Parsing.hpp>
+/** Duplicates of Kokkos_Error.cpp/hpp, reproduced here
+ * for use in non-Kokkos applications
+ */
+namespace {
+void traceback_callstack(std::ostream& msg) {
+  msg << std::endl << "Traceback functionality not available" << std::endl;
+}
+void throw_runtime_exception(const std::string& msg) {
+  std::ostringstream o;
+  o << msg;
+  traceback_callstack(o);
+  throw std::runtime_error(o.str());
+}
+}  // namespace
+
+bool Kokkos::Impl::is_unsigned_int(const char* str) {
+  const size_t len = strlen(str);
+  for (size_t i = 0; i < len; ++i) {
+    if (!isdigit(str[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Kokkos::Impl::check_arg(char const* arg, char const* expected) {
+  std::size_t arg_len = std::strlen(arg);
+  std::size_t exp_len = std::strlen(expected);
+  if (arg_len < exp_len) return false;
+  if (std::strncmp(arg, expected, exp_len) != 0) return false;
+  if (arg_len == exp_len) return true;
+
+  if (std::isalnum(arg[exp_len]) || arg[exp_len] == '-' ||
+      arg[exp_len] == '_') {
+    return false;
+  }
+  return true;
+}
+
+bool Kokkos::Impl::check_int_arg(char const* arg, char const* expected,
+                                 int* value) {
+  if (!check_arg(arg, expected)) return false;
+  std::size_t arg_len = std::strlen(arg);
+  std::size_t exp_len = std::strlen(expected);
+  bool okay           = true;
+  if (arg_len == exp_len || arg[exp_len] != '=') okay = false;
+  char const* number = arg + exp_len + 1;
+  if (!Kokkos::Impl::is_unsigned_int(number) || strlen(number) == 0)
+    okay = false;
+  *value = std::stoi(number);
+  if (!okay) {
+    std::ostringstream ss;
+    ss << "Error: expecting an '=INT' after command line argument '" << expected
+       << "'";
+    ss << ". Raised by Kokkos::initialize(int narg, char* argc[]).";
+    throw_runtime_exception(ss.str());
+  }
+  return true;
+}
+bool Kokkos::Impl::check_str_arg(char const* arg, char const* expected,
+                                 std::string& value) {
+  if (!check_arg(arg, expected)) return false;
+  std::size_t arg_len = std::strlen(arg);
+  std::size_t exp_len = std::strlen(expected);
+  bool okay           = true;
+  if (arg_len == exp_len || arg[exp_len] != '=') okay = false;
+  char const* remain = arg + exp_len + 1;
+  value              = remain;
+  if (!okay) {
+    std::ostringstream ss;
+    ss << "Error: expecting an '=STRING' after command line argument '"
+       << expected << "'";
+    ss << ". Raised by Kokkos::initialize(int narg, char* argc[]).";
+    throw_runtime_exception(ss.str());
+  }
+  return true;
+}
+void Kokkos::Impl::warn_deprecated_command_line_argument(std::string deprecated,
+                                                         std::string valid) {
+  std::cerr
+      << "Warning: command line argument '" << deprecated
+      << "' is deprecated. Use '" << valid
+      << "' instead. Raised by Kokkos::initialize(int narg, char* argc[])."
+      << std::endl;
+}
diff --git a/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.hpp b/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7e1d3049e3668b1ce812d1738bd43968930402fd
--- /dev/null
+++ b/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.hpp
@@ -0,0 +1,63 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_COMMAND_LINE_PARSING_HPP
+#define KOKKOS_COMMAND_LINE_PARSING_HPP
+
+#include <string>
+#include <iosfwd>
+
+namespace Kokkos {
+namespace Impl {
+bool is_unsigned_int(const char* str);
+bool check_arg(char const* arg, char const* expected);
+// void throw_runtime_exception(const std::string& msg);
+bool check_int_arg(char const* arg, char const* expected, int* value);
+bool check_str_arg(char const* arg, char const* expected, std::string& value);
+void warn_deprecated_command_line_argument(std::string deprecated,
+                                           std::string valid);
+}  // namespace Impl
+}  // namespace Kokkos
+
+#endif
diff --git a/packages/kokkos/core/src/impl/Kokkos_Core.cpp b/packages/kokkos/core/src/impl/Kokkos_Core.cpp
index a1f9d336329fff0426863f28f493854e7f8091f3..0a3b649fe98aac76b3240162f07e5b58b01c7fbb 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Core.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Core.cpp
@@ -45,6 +45,7 @@
 #include <Kokkos_Core.hpp>
 #include <impl/Kokkos_Error.hpp>
 #include <impl/Kokkos_ExecSpaceInitializer.hpp>
+#include <impl/Kokkos_Command_Line_Parsing.hpp>
 #include <cctype>
 #include <cstring>
 #include <iostream>
@@ -58,7 +59,7 @@
 #ifndef _WIN32
 #include <unistd.h>
 #else
-#include <Windows.h>
+#include <windows.h>
 #endif
 
 //----------------------------------------------------------------------------
@@ -282,16 +283,8 @@ int get_gpu(const InitArguments& args) {
   }
   return use_gpu;
 }
+
 namespace {
-bool is_unsigned_int(const char* str) {
-  const size_t len = strlen(str);
-  for (size_t i = 0; i < len; ++i) {
-    if (!isdigit(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
 
 void initialize_backends(const InitArguments& args) {
 // This is an experimental setting
@@ -304,21 +297,29 @@ void initialize_backends(const InitArguments& args) {
   Impl::ExecSpaceManager::get_instance().initialize_spaces(args);
 }
 
-void initialize_profiling(const InitArguments& args) {
-  Kokkos::Profiling::initialize(args.tool_lib);
-  if (args.tool_help) {
-    if (!Kokkos::Tools::printHelp(args.tool_args)) {
-      std::cerr << "Tool has not provided a help message" << std::endl;
-    }
+void initialize_profiling(const Tools::InitArguments& args) {
+  auto initialization_status =
+      Kokkos::Tools::Impl::initialize_tools_subsystem(args);
+  if (initialization_status.result ==
+      Kokkos::Tools::Impl::InitializationStatus::InitializationResult::
+          help_request) {
     g_is_initialized = true;
     ::Kokkos::finalize();
     std::exit(EXIT_SUCCESS);
-  }
-  Kokkos::Tools::parseArgs(args.tool_args);
-  for (const auto& category_value : Kokkos::Impl::metadata_map) {
-    for (const auto& key_value : category_value.second) {
-      Kokkos::Tools::declareMetadata(key_value.first, key_value.second);
+  } else if (initialization_status.result ==
+             Kokkos::Tools::Impl::InitializationStatus::InitializationResult::
+                 success) {
+    Kokkos::Tools::parseArgs(args.args);
+    for (const auto& category_value : Kokkos::Impl::metadata_map) {
+      for (const auto& key_value : category_value.second) {
+        Kokkos::Tools::declareMetadata(key_value.first, key_value.second);
+      }
     }
+  } else {
+    std::cerr << "Error initializing Kokkos Tools subsystem" << std::endl;
+    g_is_initialized = true;
+    ::Kokkos::finalize();
+    std::exit(EXIT_FAILURE);
   }
 }
 
@@ -517,7 +518,7 @@ void pre_initialize_internal(const InitArguments& args) {
 }
 
 void post_initialize_internal(const InitArguments& args) {
-  initialize_profiling(args);
+  initialize_profiling(args.impl_get_tools_init_arguments());
   g_is_initialized = true;
 }
 
@@ -568,68 +569,6 @@ void fence_internal(const std::string& name) {
   Impl::ExecSpaceManager::get_instance().static_fence(name);
 }
 
-bool check_arg(char const* arg, char const* expected) {
-  std::size_t arg_len = std::strlen(arg);
-  std::size_t exp_len = std::strlen(expected);
-  if (arg_len < exp_len) return false;
-  if (std::strncmp(arg, expected, exp_len) != 0) return false;
-  if (arg_len == exp_len) return true;
-  /* if expected is "--threads", ignore "--threads-for-application"
-     by checking this character          ---------^
-     to see if it continues to make a longer name */
-  if (std::isalnum(arg[exp_len]) || arg[exp_len] == '-' ||
-      arg[exp_len] == '_') {
-    return false;
-  }
-  return true;
-}
-
-bool check_int_arg(char const* arg, char const* expected, int* value) {
-  if (!check_arg(arg, expected)) return false;
-  std::size_t arg_len = std::strlen(arg);
-  std::size_t exp_len = std::strlen(expected);
-  bool okay           = true;
-  if (arg_len == exp_len || arg[exp_len] != '=') okay = false;
-  char const* number = arg + exp_len + 1;
-  if (!Impl::is_unsigned_int(number) || strlen(number) == 0) okay = false;
-  *value = std::stoi(number);
-  if (!okay) {
-    std::ostringstream ss;
-    ss << "Error: expecting an '=INT' after command line argument '" << expected
-       << "'";
-    ss << ". Raised by Kokkos::initialize(int narg, char* argc[]).";
-    Impl::throw_runtime_exception(ss.str());
-  }
-  return true;
-}
-
-bool check_str_arg(char const* arg, char const* expected, std::string& value) {
-  if (!check_arg(arg, expected)) return false;
-  std::size_t arg_len = std::strlen(arg);
-  std::size_t exp_len = std::strlen(expected);
-  bool okay           = true;
-  if (arg_len == exp_len || arg[exp_len] != '=') okay = false;
-  char const* remain = arg + exp_len + 1;
-  value              = remain;
-  if (!okay) {
-    std::ostringstream ss;
-    ss << "Error: expecting an '=STRING' after command line argument '"
-       << expected << "'";
-    ss << ". Raised by Kokkos::initialize(int narg, char* argc[]).";
-    Impl::throw_runtime_exception(ss.str());
-  }
-  return true;
-}
-
-void warn_deprecated_command_line_argument(std::string deprecated,
-                                           std::string valid) {
-  std::cerr
-      << "Warning: command line argument '" << deprecated
-      << "' is deprecated. Use '" << valid
-      << "' instead. Raised by Kokkos::initialize(int narg, char* argc[])."
-      << std::endl;
-}
-
 unsigned get_process_id() {
 #ifdef _WIN32
   return unsigned(GetCurrentProcessId());
@@ -655,6 +594,26 @@ void parse_command_line_arguments(int& narg, char* arg[],
   bool kokkos_numa_found     = false;
   bool kokkos_device_found   = false;
   bool kokkos_ndevices_found = false;
+  auto tools_init_arguments  = arguments.impl_get_tools_init_arguments();
+  Tools::Impl::parse_command_line_arguments(narg, arg, tools_init_arguments);
+  if (tools_init_arguments.tune_internals !=
+      Kokkos::Tools::InitArguments::PossiblyUnsetOption::unset) {
+    tune_internals = (tools_init_arguments.tune_internals ==
+                      Kokkos::Tools::InitArguments::PossiblyUnsetOption::on);
+  }
+  if (tools_init_arguments.help !=
+      Kokkos::Tools::InitArguments::PossiblyUnsetOption::unset) {
+    tool_help = (tools_init_arguments.help ==
+                 Kokkos::Tools::InitArguments::PossiblyUnsetOption::on);
+  }
+  if (tools_init_arguments.lib !=
+      Kokkos::Tools::InitArguments::unset_string_option) {
+    tool_lib = tools_init_arguments.lib;
+  }
+  if (tools_init_arguments.args !=
+      Kokkos::Tools::InitArguments::unset_string_option) {
+    tool_args = tools_init_arguments.args;
+  }
 
   int iarg = 0;
 
@@ -770,37 +729,6 @@ void parse_command_line_arguments(int& narg, char* arg[],
         arg[k] = arg[k + 1];
       }
       narg--;
-    } else if (check_str_arg(arg[iarg], "--kokkos-tools-library", tool_lib)) {
-      for (int k = iarg; k < narg - 1; k++) {
-        arg[k] = arg[k + 1];
-      }
-      narg--;
-    } else if (check_str_arg(arg[iarg], "--kokkos-tools-args", tool_args)) {
-      for (int k = iarg; k < narg - 1; k++) {
-        arg[k] = arg[k + 1];
-      }
-      narg--;
-      // strip any leading and/or trailing quotes if they were retained in the
-      // string because this will very likely cause parsing issues for tools.
-      // If the quotes are retained (via bypassing the shell):
-      //    <EXE> --kokkos-tools-args="-c my example"
-      // would be tokenized as:
-      //    "<EXE>" "\"-c" "my" "example\""
-      // instead of:
-      //    "<EXE>" "-c" "my" "example"
-      if (!tool_args.empty()) {
-        if (tool_args.front() == '"') tool_args = tool_args.substr(1);
-        if (tool_args.back() == '"')
-          tool_args = tool_args.substr(0, tool_args.length() - 1);
-      }
-      // add the name of the executable to the beginning
-      if (narg > 0) tool_args = std::string(arg[0]) + " " + tool_args;
-    } else if (check_arg(arg[iarg], "--kokkos-tools-help")) {
-      tool_help = true;
-      for (int k = iarg; k < narg - 1; k++) {
-        arg[k] = arg[k + 1];
-      }
-      narg--;
     } else if (check_arg(arg[iarg], "--kokkos-help") ||
                check_arg(arg[iarg], "--help")) {
       auto const help_message = R"(
@@ -859,7 +787,10 @@ void parse_command_line_arguments(int& narg, char* arg[],
     } else
       iarg++;
   }
-  if (tool_args.empty() && narg > 0) tool_args = arg[0];
+  if ((tools_init_arguments.args ==
+       Kokkos::Tools::InitArguments::unset_string_option) &&
+      narg > 0)
+    tool_args = arg[0];
 }
 
 void parse_environment_variables(InitArguments& arguments) {
@@ -871,7 +802,43 @@ void parse_environment_variables(InitArguments& arguments) {
   auto& disable_warnings = arguments.disable_warnings;
   auto& tune_internals   = arguments.tune_internals;
   auto& tool_lib         = arguments.tool_lib;
+  auto& tool_args        = arguments.tool_args;
+  auto& tool_help        = arguments.tool_help;
   char* endptr;
+
+  auto tools_init_arguments = arguments.impl_get_tools_init_arguments();
+  auto init_result =
+      Tools::Impl::parse_environment_variables(tools_init_arguments);
+  if (init_result.result == Kokkos::Tools::Impl::InitializationStatus::
+                                environment_argument_mismatch) {
+    Impl::throw_runtime_exception(init_result.error_message);
+  }
+
+  tool_lib = tools_init_arguments.lib;
+
+  if (tools_init_arguments.tune_internals !=
+      Kokkos::Tools::InitArguments::PossiblyUnsetOption::unset) {
+    tune_internals = (tools_init_arguments.tune_internals ==
+                      Kokkos::Tools::InitArguments::PossiblyUnsetOption::on)
+                         ? true
+                         : false;
+  }
+  if (tools_init_arguments.help !=
+      Kokkos::Tools::InitArguments::PossiblyUnsetOption::unset) {
+    tool_help = (tools_init_arguments.help ==
+                 Kokkos::Tools::InitArguments::PossiblyUnsetOption::on)
+                    ? true
+                    : false;
+  }
+  if (tools_init_arguments.lib !=
+      Kokkos::Tools::InitArguments::unset_string_option) {
+    tool_lib = tools_init_arguments.lib;
+  }
+  if (tools_init_arguments.args !=
+      Kokkos::Tools::InitArguments::unset_string_option) {
+    tool_args = tools_init_arguments.args;
+  }
+
   auto env_num_threads_str = std::getenv("KOKKOS_NUM_THREADS");
   if (env_num_threads_str != nullptr) {
     errno                = 0;
@@ -1026,30 +993,6 @@ void parse_environment_variables(InitArguments& arguments) {
           "KOKKOS_DISABLE_WARNINGS if both are set. Raised by "
           "Kokkos::initialize(int narg, char* argc[]).");
   }
-  char* env_tuneinternals_str = std::getenv("KOKKOS_TUNE_INTERNALS");
-  if (env_tuneinternals_str != nullptr) {
-    std::string env_str(env_tuneinternals_str);  // deep-copies string
-    for (char& c : env_str) {
-      c = toupper(c);
-    }
-    if ((env_str == "TRUE") || (env_str == "ON") || (env_str == "1"))
-      tune_internals = true;
-    else if (tune_internals)
-      Impl::throw_runtime_exception(
-          "Error: expecting a match between --kokkos-tune-internals and "
-          "KOKKOS_TUNE_INTERNALS if both are set. Raised by "
-          "Kokkos::initialize(int narg, char* argc[]).");
-  }
-  auto env_tool_lib = std::getenv("KOKKOS_PROFILE_LIBRARY");
-  if (env_tool_lib != nullptr) {
-    if (!tool_lib.empty() && std::string(env_tool_lib) != tool_lib)
-      Impl::throw_runtime_exception(
-          "Error: expecting a match between --kokkos-tools-library and "
-          "KOKKOS_PROFILE_LIBRARY if both are set. Raised by "
-          "Kokkos::initialize(int narg, char* argc[]).");
-    else
-      tool_lib = env_tool_lib;
-  }
 }
 
 }  // namespace
diff --git a/packages/kokkos/core/src/impl/Kokkos_Error.cpp b/packages/kokkos/core/src/impl/Kokkos_Error.cpp
index 9c8024cbd03ee9230b1ed27468c7cb82aadc5d97..a28d008587a82ff33b9cb9f23125c8a906f4be66 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Error.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Error.cpp
@@ -42,15 +42,15 @@
 //@HEADER
 */
 
-#include <cstdio>
 #include <cstring>
 #include <cstdlib>
 
-#include <ostream>
+#include <iostream>
 #include <sstream>
 #include <iomanip>
 #include <stdexcept>
 #include <impl/Kokkos_Error.hpp>
+#include <impl/Kokkos_Stacktrace.hpp>
 #include <Cuda/Kokkos_Cuda_Error.hpp>
 
 //----------------------------------------------------------------------------
@@ -58,18 +58,24 @@
 
 namespace Kokkos {
 namespace Impl {
-
-void host_abort(const char *const message) {
-  fwrite(message, 1, strlen(message), stderr);
-  fflush(stderr);
-  ::abort();
+void traceback_callstack(std::ostream &msg) {
+#ifdef KOKKOS_IMPL_ENABLE_STACKTRACE
+  msg << "\nBacktrace:\n";
+  save_stacktrace();
+  print_demangled_saved_stacktrace(msg);
+#else
+  msg << "\nTraceback functionality not available\n";
+#endif
 }
 
 void throw_runtime_exception(const std::string &msg) {
-  std::ostringstream o;
-  o << msg;
-  traceback_callstack(o);
-  throw std::runtime_error(o.str());
+  throw std::runtime_error(msg);
+}
+
+void host_abort(const char *const message) {
+  std::cerr << message;
+  traceback_callstack(std::cerr);
+  ::abort();
 }
 
 std::string human_memory_size(size_t arg_bytes) {
@@ -159,13 +165,6 @@ std::string Experimental::RawMemoryAllocationFailure::get_error_message()
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Impl {
-
-void traceback_callstack(std::ostream &msg) {
-  msg << std::endl << "Traceback functionality not available" << std::endl;
-}
-
-}  // namespace Impl
 
 #ifdef KOKKOS_ENABLE_CUDA
 namespace Experimental {
diff --git a/packages/kokkos/core/src/impl/Kokkos_Error.hpp b/packages/kokkos/core/src/impl/Kokkos_Error.hpp
index dc9bfe2b5a9e0eb66dc2a6ae43fd296726e7a458..5d7c60fba9a2b7bec65b6a3ba48c50d94af0d6cd 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Error.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Error.hpp
@@ -72,6 +72,7 @@ void throw_runtime_exception(const std::string &);
 void traceback_callstack(std::ostream &);
 
 std::string human_memory_size(size_t arg_bytes);
+void throw_runtime_exception(const std::string &msg);
 
 }  // namespace Impl
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp
index 5de92fc45741234aafaa97fb0c31dc11aa9d9c10..504fba0268815669acff6a2a925bff4df0b0faae 100644
--- a/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp
@@ -162,21 +162,6 @@ struct FunctorDeclaresValueType<FunctorType, ArgTag,
                                 void_t<typename FunctorType::value_type>>
     : public std::true_type {};
 
-template <class FunctorType,
-          bool Enable = (FunctorDeclaresValueType<FunctorType, void>::value) ||
-                        (ReduceFunctorHasInit<FunctorType>::value) ||
-                        (ReduceFunctorHasJoin<FunctorType>::value) ||
-                        (ReduceFunctorHasFinal<FunctorType>::value) ||
-                        (ReduceFunctorHasShmemSize<FunctorType>::value)>
-struct IsNonTrivialReduceFunctor {
-  enum : bool { value = false };
-};
-
-template <class FunctorType>
-struct IsNonTrivialReduceFunctor<FunctorType, true> {
-  enum : bool { value = true };
-};
-
 /** \brief  Query Functor and execution policy argument tag for value type.
  *
  *  If C++11 enabled and 'value_type' is not explicitly declared then attempt
diff --git a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
index 5c0eaa0a1ef80fa02e2f745f1d7e53d6fc45b8d3..6fc649cfc4452caebe3408edb02db546d16d0f37 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
@@ -222,7 +222,7 @@ SharedAllocationRecord<Kokkos::Experimental::HBWSpace,
 #endif
 {
 
-  m_space.deallocate(RecordBase::m_alloc_ptr->m_label,
+  m_space.deallocate(m_label.c_str(),
                      SharedAllocationRecord<void, void>::m_alloc_ptr,
                      SharedAllocationRecord<void, void>::m_alloc_size,
                      (SharedAllocationRecord<void, void>::m_alloc_size -
@@ -243,7 +243,8 @@ SharedAllocationRecord<Kokkos::Experimental::HBWSpace, void>::
 #endif
           Impl::checked_allocation_with_header(arg_space, arg_label,
                                                arg_alloc_size),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_space(arg_space) {
   // Fill in the Header information
   RecordBase::m_alloc_ptr->m_record =
@@ -291,6 +292,9 @@ void *SharedAllocationRecord<Kokkos::Experimental::HBWSpace, void>::
   Kokkos::Impl::DeepCopy<Kokkos::Experimental::HBWSpace,
                          Kokkos::Experimental::HBWSpace>(
       r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size()));
+  Kokkos::fence(
+      "SharedAllocationRecord<Kokkos::Experimental::HBWSpace, "
+      "void>::reallocate_tracked(): fence after copying data");
 
   RecordBase::increment(r_new);
   RecordBase::decrement(r_old);
diff --git a/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp b/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp
index 79ee7e80db3115f1c9c14366e2c237c042ab0bdb..4f93eebc0ed3f7c8061bf62b24db7bed5a0a5fde 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp
@@ -64,7 +64,7 @@ void HostBarrier::impl_backoff_wait_until_equal(
   unsigned count = 0u;
 
   while (!test_equal(ptr, v)) {
-    const int c = ::Kokkos::log2(++count);
+    const int c = int_log2(++count);
     if (!active_wait || c > log2_iterations_till_sleep) {
       std::this_thread::sleep_for(
           std::chrono::nanoseconds(c < 16 ? 256 * c : 4096));
diff --git a/packages/kokkos/core/src/impl/Kokkos_HostBarrier.hpp b/packages/kokkos/core/src/impl/Kokkos_HostBarrier.hpp
index 4b9235ab70260e3b4a80d4bec735e033f71bf443..49142940be34e5f5ca8666f5240f63c326cc4b35 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HostBarrier.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HostBarrier.hpp
@@ -207,7 +207,13 @@ class HostBarrier {
   KOKKOS_INLINE_FUNCTION
   static void wait_until_equal(int* ptr, const int v,
                                bool active_wait = true) noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
+    KOKKOS_IF_ON_HOST((impl_wait_until_equal_host(ptr, v, active_wait);))
+
+    KOKKOS_IF_ON_DEVICE(((void)active_wait; while (!test_equal(ptr, v)){}))
+  }
+
+  static void impl_wait_until_equal_host(int* ptr, const int v,
+                                         bool active_wait = true) noexcept {
     bool result = test_equal(ptr, v);
     for (int i = 0; !result && i < iterations_till_backoff; ++i) {
 #if defined(KOKKOS_ENABLE_ASM)
@@ -234,11 +240,6 @@ class HostBarrier {
     if (!result) {
       impl_backoff_wait_until_equal(ptr, v, active_wait);
     }
-#else
-    (void)active_wait;
-    while (!test_equal(ptr, v)) {
-    }
-#endif
   }
 
   static void impl_backoff_wait_until_equal(int* ptr, const int v,
diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSharedPtr.hpp b/packages/kokkos/core/src/impl/Kokkos_HostSharedPtr.hpp
index 3b7b194db58cb693f69d8a6560896565062b9d99..a2a792a88ee678e077e1d324fee005d6f23e1242 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HostSharedPtr.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HostSharedPtr.hpp
@@ -90,12 +90,9 @@ class HostSharedPtr {
 
   KOKKOS_FUNCTION HostSharedPtr(const HostSharedPtr& other) noexcept
       : m_element_ptr(other.m_element_ptr), m_control(other.m_control) {
-    // FIXME_OPENMPTARGET requires something like KOKKOS_IMPL_IF_ON_HOST
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-    if (m_control) Kokkos::atomic_add(&(m_control->m_counter), 1);
-#else
-    m_control = nullptr;
-#endif
+    KOKKOS_IF_ON_HOST(
+        (if (m_control) Kokkos::atomic_add(&(m_control->m_counter), 1);))
+    KOKKOS_IF_ON_DEVICE(m_control = nullptr;)
   }
 
   KOKKOS_FUNCTION HostSharedPtr& operator=(HostSharedPtr&& other) noexcept {
@@ -115,12 +112,9 @@ class HostSharedPtr {
       cleanup();
       m_element_ptr = other.m_element_ptr;
       m_control     = other.m_control;
-      // FIXME_OPENMPTARGET
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-      if (m_control) Kokkos::atomic_add(&(m_control->m_counter), 1);
-#else
-      m_control = nullptr;
-#endif
+      KOKKOS_IF_ON_HOST(
+          (if (m_control) Kokkos::atomic_add(&(m_control->m_counter), 1);))
+      KOKKOS_IF_ON_DEVICE(m_control = nullptr;)
     }
     return *this;
   }
@@ -145,31 +139,30 @@ class HostSharedPtr {
     return get() != nullptr;
   }
 
-  // returns the number of HostSharedPtr instances managing the curent object or
-  // 0 if there is no managed object.
+  // returns the number of HostSharedPtr instances managing the current object
+  // or 0 if there is no managed object.
   int use_count() const noexcept {
     return m_control ? m_control->m_counter : 0;
   }
 
  private:
   KOKKOS_FUNCTION void cleanup() noexcept {
-    // FIXME_OPENMPTARGET
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-    // If m_counter is set, then this instance is responsible for managing the
-    // object pointed to by m_counter and m_element_ptr.
-    if (m_control) {
-      int const count = Kokkos::atomic_fetch_sub(&(m_control->m_counter), 1);
-      // atomic_fetch_sub might have memory order relaxed so we need to force
-      // synchronization to avoid multiple threads doing the cleanup.
-      Kokkos::memory_fence();
-      if (count == 1) {
-        (m_control->m_deleter)(m_element_ptr);
-        m_element_ptr = nullptr;
-        delete m_control;
-        m_control = nullptr;
-      }
-    }
-#endif
+    KOKKOS_IF_ON_HOST((
+        // If m_counter is set, then this instance is responsible for managing
+        // the object pointed to by m_counter and m_element_ptr.
+        if (m_control) {
+          int const count =
+              Kokkos::atomic_fetch_sub(&(m_control->m_counter), 1);
+          // atomic_fetch_sub might have memory order relaxed, so we need to
+          // force synchronization to avoid multiple threads doing the cleanup.
+          Kokkos::memory_fence();
+          if (count == 1) {
+            (m_control->m_deleter)(m_element_ptr);
+            m_element_ptr = nullptr;
+            delete m_control;
+            m_control = nullptr;
+          }
+        }))
   }
 
   struct Control {
diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp
index ed46d170e53ebb58e118c8d020073ed12d3c1064..1728fe90c8fbe404724b6962a4be88692a9245ed 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp
@@ -308,6 +308,7 @@ void HostSpace::impl_deallocate(
     const size_t arg_alloc_size, const size_t arg_logical_size,
     const Kokkos::Tools::SpaceHandle arg_handle) const {
   if (arg_alloc_ptr) {
+    Kokkos::fence("HostSpace::impl_deallocate before free");
     size_t reported_size =
         (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size;
     if (Kokkos::Profiling::profileLibraryLoaded()) {
@@ -357,7 +358,7 @@ SharedAllocationRecord<Kokkos::HostSpace, void>::~SharedAllocationRecord()
     noexcept
 #endif
 {
-  m_space.deallocate(RecordBase::m_alloc_ptr->m_label,
+  m_space.deallocate(m_label.c_str(),
                      SharedAllocationRecord<void, void>::m_alloc_ptr,
                      SharedAllocationRecord<void, void>::m_alloc_size,
                      (SharedAllocationRecord<void, void>::m_alloc_size -
@@ -398,7 +399,8 @@ SharedAllocationRecord<Kokkos::HostSpace, void>::SharedAllocationRecord(
 #endif
           Impl::checked_allocation_with_header(arg_space, arg_label,
                                                arg_alloc_size),
-          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc),
+          sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
+          arg_label),
       m_space(arg_space) {
   this->base_t::_fill_host_accessible_header_info(*RecordBase::m_alloc_ptr,
                                                   arg_label);
diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
index b86670346c6466b4c4cff860b8e4873bbb540ce8..4ac0941a300822e9d86137660b18ef72f61ac33b 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
@@ -49,19 +49,37 @@ namespace Kokkos {
 
 namespace Impl {
 
-#ifndef KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT
-#define KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT 10 * 8192
-#endif
-
 void hostspace_parallel_deepcopy(void* dst, const void* src, ptrdiff_t n) {
-  if ((n < KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT) ||
-      (Kokkos::DefaultHostExecutionSpace().concurrency() == 1)) {
+  Kokkos::DefaultHostExecutionSpace exec;
+  hostspace_parallel_deepcopy_async(exec, dst, src, n);
+}
+
+// DeepCopy called with an execution space that can't access HostSpace
+void hostspace_parallel_deepcopy_async(void* dst, const void* src,
+                                       ptrdiff_t n) {
+  Kokkos::DefaultHostExecutionSpace exec;
+  hostspace_parallel_deepcopy_async(exec, dst, src, n);
+  exec.fence(
+      "Kokkos::Impl::hostspace_parallel_deepcopy_async: fence after copy");
+}
+
+void hostspace_parallel_deepcopy_async(const DefaultHostExecutionSpace& exec,
+                                       void* dst, const void* src,
+                                       ptrdiff_t n) {
+  using policy_t = Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>;
+  constexpr int host_deep_copy_serial_limit = 10 * 8192;
+
+  // If the asynchronous HPX backend is enabled, do *not* copy anything
+  // synchronously. The deep copy must be correctly sequenced with respect to
+  // other kernels submitted to the same instance, so we only use the fallback
+  // parallel_for version in this case.
+#if !(defined(KOKKOS_ENABLE_HPX) && defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH))
+  if ((n < host_deep_copy_serial_limit) ||
+      (DefaultHostExecutionSpace().concurrency() == 1)) {
     std::memcpy(dst, src, n);
     return;
   }
 
-  using policy_t = Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>;
-
   // Both src and dst are aligned the same way with respect to 8 byte words
   if (reinterpret_cast<ptrdiff_t>(src) % 8 ==
       reinterpret_cast<ptrdiff_t>(dst) % 8) {
@@ -80,7 +98,7 @@ void hostspace_parallel_deepcopy(void* dst, const void* src, ptrdiff_t n) {
     double* dst_p       = reinterpret_cast<double*>(dst_c);
     const double* src_p = reinterpret_cast<const double*>(src_c);
     Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_double",
-                         policy_t(0, (n - count) / 8),
+                         policy_t(exec, 0, (n - count) / 8),
                          [=](const ptrdiff_t i) { dst_p[i] = src_p[i]; });
 
     // get final data copied
@@ -113,7 +131,7 @@ void hostspace_parallel_deepcopy(void* dst, const void* src, ptrdiff_t n) {
     int32_t* dst_p       = reinterpret_cast<int32_t*>(dst_c);
     const int32_t* src_p = reinterpret_cast<const int32_t*>(src_c);
     Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_int",
-                         policy_t(0, (n - count) / 4),
+                         policy_t(exec, 0, (n - count) / 4),
                          [=](const ptrdiff_t i) { dst_p[i] = src_p[i]; });
 
     // get final data copied
@@ -127,13 +145,14 @@ void hostspace_parallel_deepcopy(void* dst, const void* src, ptrdiff_t n) {
     }
     return;
   }
+#endif
 
   // Src and dst are not aligned the same way, we can only to byte wise copy.
   {
     char* dst_p       = reinterpret_cast<char*>(dst);
     const char* src_p = reinterpret_cast<const char*>(src);
     Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_char",
-                         policy_t(0, n),
+                         policy_t(exec, 0, n),
                          [=](const ptrdiff_t i) { dst_p[i] = src_p[i]; });
   }
 }
diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp
index e9e0ef52a0dd6ef5254f82b9bad10d9bc569805d..6eec3566ab3711c9bc2b60042c56e0cbc99f5953 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp
@@ -41,6 +41,10 @@
 // ************************************************************************
 //@HEADER
 */
+
+#ifndef KOKKOS_IMPL_HOSTSPACE_DEEPCOPY_HPP
+#define KOKKOS_IMPL_HOSTSPACE_DEEPCOPY_HPP
+
 #include <cstdint>
 
 namespace Kokkos {
@@ -48,7 +52,13 @@ namespace Kokkos {
 namespace Impl {
 
 void hostspace_parallel_deepcopy(void* dst, const void* src, ptrdiff_t n);
+// DeepCopy called with an execution space that can't access HostSpace
+void hostspace_parallel_deepcopy_async(void* dst, const void* src, ptrdiff_t n);
+void hostspace_parallel_deepcopy_async(const DefaultHostExecutionSpace& exec,
+                                       void* dst, const void* src, ptrdiff_t n);
 
 }  // namespace Impl
 
 }  // namespace Kokkos
+
+#endif  // KOKKOS_IMPL_HOSTSPACE_DEEPCOPY_HPP
diff --git a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp
index 0652b55bb71cfb3923374e774bbb2db2f58ee90d..82aed19659b70d90029322892798464772fdd347 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp
@@ -483,27 +483,18 @@ class HostThreadTeamMember {
   // Team collectives
   //--------------------------------------------------------------------------
 
-  KOKKOS_INLINE_FUNCTION void team_barrier() const noexcept
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  {
-    if (m_data.team_rendezvous()) {
-      m_data.team_rendezvous_release();
-    };
+  KOKKOS_INLINE_FUNCTION void team_barrier() const noexcept {
+    KOKKOS_IF_ON_HOST(
+        (if (m_data.team_rendezvous()) { m_data.team_rendezvous_release(); }))
   }
-#else
-  {
-  }
-#endif
 
   //--------------------------------------------------------------------------
 
   template <typename T>
   KOKKOS_INLINE_FUNCTION void team_broadcast(T& value,
                                              const int source_team_rank) const
-      noexcept
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  {
-    if (1 < m_data.m_team_size) {
+      noexcept {
+    KOKKOS_IF_ON_HOST((if (1 < m_data.m_team_size) {
       T volatile* const shared_value = (T*)m_data.team_reduce();
 
       // Don't overwrite shared memory until all threads arrive
@@ -521,54 +512,43 @@ class HostThreadTeamMember {
       } else {
         value = *shared_value;
       }
-    }
-  }
-#else
-  {
-    (void)value;
-    (void)source_team_rank;
-    Kokkos::abort("HostThreadTeamMember team_broadcast\n");
+    }))
+
+    KOKKOS_IF_ON_DEVICE(((void)value; (void)source_team_rank; Kokkos::abort(
+                             "HostThreadTeamMember team_broadcast\n");))
   }
-#endif
 
   //--------------------------------------------------------------------------
 
   template <class Closure, typename T>
   KOKKOS_INLINE_FUNCTION void team_broadcast(Closure const& f, T& value,
                                              const int source_team_rank) const
-      noexcept
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  {
-    T volatile* const shared_value = (T*)m_data.team_reduce();
+      noexcept {
+    KOKKOS_IF_ON_HOST((
+        T volatile* const shared_value = (T*)m_data.team_reduce();
 
-    // Don't overwrite shared memory until all threads arrive
+        // Don't overwrite shared memory until all threads arrive
 
-    if (m_data.team_rendezvous(source_team_rank)) {
-      // All threads have entered 'team_rendezvous'
-      // only this thread returned from 'team_rendezvous'
-      // with a return value of 'true'
+        if (m_data.team_rendezvous(source_team_rank)) {
+          // All threads have entered 'team_rendezvous'
+          // only this thread returned from 'team_rendezvous'
+          // with a return value of 'true'
 
-      f(value);
+          f(value);
 
-      if (1 < m_data.m_team_size) {
-        *shared_value = value;
-      }
+          if (1 < m_data.m_team_size) {
+            *shared_value = value;
+          }
 
-      m_data.team_rendezvous_release();
-      // This thread released all other threads from 'team_rendezvous'
-      // with a return value of 'false'
-    } else {
-      value = *shared_value;
-    }
-  }
-#else
-  {
-    (void)f;
-    (void)value;
-    (void)source_team_rank;
-    Kokkos::abort("HostThreadTeamMember team_broadcast\n");
+          m_data.team_rendezvous_release();
+          // This thread released all other threads from 'team_rendezvous'
+          // with a return value of 'false'
+        } else { value = *shared_value; }))
+
+    KOKKOS_IF_ON_DEVICE(
+        ((void)f; (void)value; (void)source_team_rank;
+         Kokkos::abort("HostThreadTeamMember team_broadcast\n");))
   }
-#endif
 
   //--------------------------------------------------------------------------
   // team_reduce( Sum(result) );
@@ -586,171 +566,115 @@ class HostThreadTeamMember {
   KOKKOS_INLINE_FUNCTION
       typename std::enable_if<is_reducer<ReducerType>::value>::type
       team_reduce(ReducerType const& reducer,
-                  typename ReducerType::value_type contribution) const noexcept
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  {
-    if (1 < m_data.m_team_size) {
-      using value_type = typename ReducerType::value_type;
-
-      if (0 != m_data.m_team_rank) {
-        // Non-root copies to their local buffer:
-        /*reducer.copy( (value_type*) m_data.team_reduce_local()
-                    , reducer.data() );*/
-        *((value_type*)m_data.team_reduce_local()) = contribution;
-      }
-
-      // Root does not overwrite shared memory until all threads arrive
-      // and copy to their local buffer.
-
-      if (m_data.team_rendezvous()) {
-        // All threads have entered 'team_rendezvous'
-        // only this thread returned from 'team_rendezvous'
-        // with a return value of 'true'
-        //
-        // This thread sums contributed values
-        for (int i = 1; i < m_data.m_team_size; ++i) {
-          value_type* const src =
-              (value_type*)m_data.team_member(i)->team_reduce_local();
-
-          reducer.join(contribution, *src);
-        }
-
-        // Copy result to root member's buffer:
-        // reducer.copy( (value_type*) m_data.team_reduce() , reducer.data() );
-        *((value_type*)m_data.team_reduce()) = contribution;
-        reducer.reference()                  = contribution;
-        m_data.team_rendezvous_release();
-        // This thread released all other threads from 'team_rendezvous'
-        // with a return value of 'false'
-      } else {
-        // Copy from root member's buffer:
-        reducer.reference() = *((value_type*)m_data.team_reduce());
-      }
-    } else {
-      reducer.reference() = contribution;
-    }
-  }
-#else
-  {
-    (void)reducer;
-    (void)contribution;
-    Kokkos::abort("HostThreadTeamMember team_reduce\n");
+                  typename ReducerType::value_type contribution) const
+      noexcept {
+    KOKKOS_IF_ON_HOST((
+        if (1 < m_data.m_team_size) {
+          using value_type = typename ReducerType::value_type;
+
+          if (0 != m_data.m_team_rank) {
+            // Non-root copies to their local buffer:
+            /*reducer.copy( (value_type*) m_data.team_reduce_local()
+                        , reducer.data() );*/
+            *((value_type*)m_data.team_reduce_local()) = contribution;
+          }
+
+          // Root does not overwrite shared memory until all threads arrive
+          // and copy to their local buffer.
+
+          if (m_data.team_rendezvous()) {
+            // All threads have entered 'team_rendezvous'
+            // only this thread returned from 'team_rendezvous'
+            // with a return value of 'true'
+            //
+            // This thread sums contributed values
+            for (int i = 1; i < m_data.m_team_size; ++i) {
+              value_type* const src =
+                  (value_type*)m_data.team_member(i)->team_reduce_local();
+
+              reducer.join(contribution, *src);
+            }
+
+            // Copy result to root member's buffer:
+            // reducer.copy( (value_type*) m_data.team_reduce() , reducer.data()
+            // );
+            *((value_type*)m_data.team_reduce()) = contribution;
+            reducer.reference()                  = contribution;
+            m_data.team_rendezvous_release();
+            // This thread released all other threads from 'team_rendezvous'
+            // with a return value of 'false'
+          } else {
+            // Copy from root member's buffer:
+            reducer.reference() = *((value_type*)m_data.team_reduce());
+          }
+        } else { reducer.reference() = contribution; }))
+
+    KOKKOS_IF_ON_DEVICE(((void)reducer; (void)contribution;
+                         Kokkos::abort("HostThreadTeamMember team_reduce\n");))
   }
-#endif
 
   //--------------------------------------------------------------------------
 
-  /*template< typename ValueType , class JoinOp >
-  KOKKOS_INLINE_FUNCTION
-  ValueType
-  team_reduce( ValueType const & value
-             , JoinOp    const & join ) const noexcept
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-    {
-      if ( 0 != m_data.m_team_rank ) {
-        // Non-root copies to their local buffer:
-        *((ValueType*) m_data.team_reduce_local()) = value ;
-      }
-
-      // Root does not overwrite shared memory until all threads arrive
-      // and copy to their local buffer.
-
-      if ( m_data.team_rendezvous() ) {
-        const Impl::Reducer< ValueType , JoinOp > reducer( join );
-
-        // All threads have entered 'team_rendezvous'
-        // only this thread returned from 'team_rendezvous'
-        // with a return value of 'true'
-        //
-        // This thread sums contributed values
-
-        ValueType * const dst = (ValueType*) m_data.team_reduce_local();
-
-        *dst = value ;
-
-        for ( int i = 1 ; i < m_data.m_team_size ; ++i ) {
-          ValueType * const src =
-            (ValueType*) m_data.team_member(i)->team_reduce_local();
-
-          reducer.join( dst , src );
-        }
-
-        m_data.team_rendezvous_release();
-        // This thread released all other threads from 'team_rendezvous'
-        // with a return value of 'false'
-      }
-
-      return *((ValueType*) m_data.team_reduce());
-    }
-#else
-    { Kokkos::abort("HostThreadTeamMember team_reduce\n"); return ValueType(); }
-#endif*/
-
   template <typename T>
   KOKKOS_INLINE_FUNCTION T team_scan(T const& value,
-                                     T* const global = nullptr) const noexcept
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-  {
-    if (0 != m_data.m_team_rank) {
-      // Non-root copies to their local buffer:
-      ((T*)m_data.team_reduce_local())[1] = value;
-    }
+                                     T* const global = nullptr) const noexcept {
+    KOKKOS_IF_ON_HOST((
+        if (0 != m_data.m_team_rank) {
+          // Non-root copies to their local buffer:
+          ((T*)m_data.team_reduce_local())[1] = value;
+        }
 
-    // Root does not overwrite shared memory until all threads arrive
-    // and copy to their local buffer.
+        // Root does not overwrite shared memory until all threads arrive
+        // and copy to their local buffer.
 
-    if (m_data.team_rendezvous()) {
-      // All threads have entered 'team_rendezvous'
-      // only this thread returned from 'team_rendezvous'
-      // with a return value of 'true'
-      //
-      // This thread scans contributed values
+        if (m_data.team_rendezvous()) {
+          // All threads have entered 'team_rendezvous'
+          // only this thread returned from 'team_rendezvous'
+          // with a return value of 'true'
+          //
+          // This thread scans contributed values
 
-      {
-        T* prev = (T*)m_data.team_reduce_local();
+          {
+            T* prev = (T*)m_data.team_reduce_local();
 
-        prev[0] = 0;
-        prev[1] = value;
+            prev[0] = 0;
+            prev[1] = value;
 
-        for (int i = 1; i < m_data.m_team_size; ++i) {
-          T* const ptr = (T*)m_data.team_member(i)->team_reduce_local();
+            for (int i = 1; i < m_data.m_team_size; ++i) {
+              T* const ptr = (T*)m_data.team_member(i)->team_reduce_local();
 
-          ptr[0] = prev[0] + prev[1];
+              ptr[0] = prev[0] + prev[1];
 
-          prev = ptr;
-        }
-      }
+              prev = ptr;
+            }
+          }
 
-      // If adding to global value then atomic_fetch_add to that value
-      // and sum previous value to every entry of the scan.
-      if (global) {
-        T* prev = (T*)m_data.team_reduce_local();
+          // If adding to global value then atomic_fetch_add to that value
+          // and sum previous value to every entry of the scan.
+          if (global) {
+            T* prev = (T*)m_data.team_reduce_local();
 
-        {
-          T* ptr = (T*)m_data.team_member(m_data.m_team_size - 1)
-                       ->team_reduce_local();
-          prev[0] = Kokkos::atomic_fetch_add(global, ptr[0] + ptr[1]);
-        }
+            {
+              T* ptr = (T*)m_data.team_member(m_data.m_team_size - 1)
+                           ->team_reduce_local();
+              prev[0] = Kokkos::atomic_fetch_add(global, ptr[0] + ptr[1]);
+            }
 
-        for (int i = 1; i < m_data.m_team_size; ++i) {
-          T* ptr = (T*)m_data.team_member(i)->team_reduce_local();
-          ptr[0] += prev[0];
+            for (int i = 1; i < m_data.m_team_size; ++i) {
+              T* ptr = (T*)m_data.team_member(i)->team_reduce_local();
+              ptr[0] += prev[0];
+            }
+          }
+
+          m_data.team_rendezvous_release();
         }
-      }
 
-      m_data.team_rendezvous_release();
-    }
+        return ((T*)m_data.team_reduce_local())[0];))
 
-    return ((T*)m_data.team_reduce_local())[0];
-  }
-#else
-  {
-    (void)value;
-    (void)global;
-    Kokkos::abort("HostThreadTeamMember team_scan\n");
-    return T();
+    KOKKOS_IF_ON_DEVICE(((void)value; (void)global;
+                         Kokkos::abort("HostThreadTeamMember team_scan\n");
+                         return T();))
   }
-#endif
 };
 
 }  // namespace Impl
diff --git a/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
index 865d1c47faacfe3d6b39d4227bb180bf483c89dd..f6870899632a7049d19082364aac26a439754e64 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp
@@ -58,8 +58,8 @@ void memory_fence() {
 #elif defined(__HIP_DEVICE_COMPILE__)
   __threadfence();
 #elif defined(KOKKOS_ENABLE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
-  sycl::ONEAPI::atomic_fence(sycl::ONEAPI::memory_order::acq_rel,
-                             sycl::ONEAPI::memory_scope::device);
+  sycl::atomic_fence(sycl::ext::oneapi::memory_order::acq_rel,
+                     sycl::ext::oneapi::memory_scope::device);
 #elif defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64)
   asm volatile("mfence" ::: "memory");
 #elif defined(KOKKOS_ENABLE_GNU_ATOMICS) || \
diff --git a/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp b/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp
index e53afe436daff997726be8cb0c880887c32de1a4..71067b8e1b4ec0d61fd58cc2e2a19d3afaa0f8f4 100644
--- a/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp
@@ -1,18 +1,18 @@
 #include <Kokkos_NumericTraits.hpp>
 
 // NOTE These out-of class definitions are only required with C++14.  Since
-// C++17, a static data member declared constrexpr is impllictly inline.
+// C++17, a static data member declared constexpr is implicitly inline.
 
 #if !defined(KOKKOS_ENABLE_CXX17)
 namespace Kokkos {
 namespace Experimental {
 namespace Impl {
-#define OUT_OF_CLASS_DEFINTION_FLOATING_POINT(TRAIT) \
-  constexpr float TRAIT##_helper<float>::value;      \
-  constexpr double TRAIT##_helper<double>::value;    \
+#define OUT_OF_CLASS_DEFINITION_FLOATING_POINT(TRAIT) \
+  constexpr float TRAIT##_helper<float>::value;       \
+  constexpr double TRAIT##_helper<double>::value;     \
   constexpr long double TRAIT##_helper<long double>::value
 
-#define OUT_OF_CLASS_DEFINTION_INTEGRAL(TRAIT)                          \
+#define OUT_OF_CLASS_DEFINITION_INTEGRAL(TRAIT)                         \
   constexpr bool TRAIT##_helper<bool>::value;                           \
   constexpr char TRAIT##_helper<char>::value;                           \
   constexpr signed char TRAIT##_helper<signed char>::value;             \
@@ -26,12 +26,12 @@ namespace Impl {
   constexpr long long int TRAIT##_helper<long long int>::value;         \
   constexpr unsigned long long int TRAIT##_helper<unsigned long long int>::value
 
-#define OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(TRAIT) \
-  constexpr int TRAIT##_helper<float>::value;          \
-  constexpr int TRAIT##_helper<double>::value;         \
+#define OUT_OF_CLASS_DEFINITION_FLOATING_POINT_2(TRAIT) \
+  constexpr int TRAIT##_helper<float>::value;           \
+  constexpr int TRAIT##_helper<double>::value;          \
   constexpr int TRAIT##_helper<long double>::value
 
-#define OUT_OF_CLASS_DEFINTION_INTEGRAL_2(TRAIT)          \
+#define OUT_OF_CLASS_DEFINITION_INTEGRAL_2(TRAIT)         \
   constexpr int TRAIT##_helper<bool>::value;              \
   constexpr int TRAIT##_helper<char>::value;              \
   constexpr int TRAIT##_helper<signed char>::value;       \
@@ -45,28 +45,32 @@ namespace Impl {
   constexpr int TRAIT##_helper<long long int>::value;     \
   constexpr int TRAIT##_helper<unsigned long long int>::value
 
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT(infinity);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT(epsilon);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT(round_error);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT(norm_min);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(infinity);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(epsilon);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(round_error);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(norm_min);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(denorm_min);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(reciprocal_overflow_threshold);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(quiet_NaN);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(signaling_NaN);
 
-OUT_OF_CLASS_DEFINTION_INTEGRAL(finite_min);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT(finite_min);
-OUT_OF_CLASS_DEFINTION_INTEGRAL(finite_max);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT(finite_max);
+OUT_OF_CLASS_DEFINITION_INTEGRAL(finite_min);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(finite_min);
+OUT_OF_CLASS_DEFINITION_INTEGRAL(finite_max);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT(finite_max);
 
-OUT_OF_CLASS_DEFINTION_INTEGRAL_2(digits);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(digits);
-OUT_OF_CLASS_DEFINTION_INTEGRAL_2(digits10);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(digits10);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(max_digits10);
-OUT_OF_CLASS_DEFINTION_INTEGRAL_2(radix);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(radix);
+OUT_OF_CLASS_DEFINITION_INTEGRAL_2(digits);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT_2(digits);
+OUT_OF_CLASS_DEFINITION_INTEGRAL_2(digits10);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT_2(digits10);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT_2(max_digits10);
+OUT_OF_CLASS_DEFINITION_INTEGRAL_2(radix);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT_2(radix);
 
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(min_exponent);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(min_exponent10);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(max_exponent);
-OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(max_exponent10);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT_2(min_exponent);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT_2(min_exponent10);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT_2(max_exponent);
+OUT_OF_CLASS_DEFINITION_FLOATING_POINT_2(max_exponent10);
 }  // namespace Impl
 }  // namespace Experimental
 }  // namespace Kokkos
diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp b/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp
index 8505e8f51aec744c00dfe2fef3f29d1eb9cb7306..9c8118e2bfb326de725555d441667e0cce87d670 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp
@@ -42,11 +42,18 @@
 //@HEADER
 */
 
+#ifndef KOKKOS_TOOLS_INDEPENDENT_BUILD
 #include <Kokkos_Macros.hpp>
 #include <Kokkos_Tuners.hpp>
+#endif
+
 #include <impl/Kokkos_Profiling.hpp>
-#if defined(KOKKOS_ENABLE_LIBDL)
+#include <impl/Kokkos_Profiling_Interface.hpp>
+#include <impl/Kokkos_Command_Line_Parsing.hpp>
+
+#if defined(KOKKOS_ENABLE_LIBDL) || defined(KOKKOS_TOOLS_INDEPENDENT_BUILD)
 #include <dlfcn.h>
+#define KOKKOS_TOOLS_ENABLE_LIBDL
 #endif
 
 #include <algorithm>
@@ -58,10 +65,136 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
+#include <sstream>
+#include <iostream>
 namespace Kokkos {
 
 namespace Tools {
 
+const std::string InitArguments::unset_string_option = {
+    "kokkos_tools_impl_unset_option"};
+
+InitArguments tool_arguments;
+
+namespace Impl {
+void parse_command_line_arguments(int& narg, char* arg[],
+                                  InitArguments& arguments) {
+  int iarg = 0;
+  using Kokkos::Impl::check_arg;
+  using Kokkos::Impl::check_int_arg;
+  using Kokkos::Impl::check_str_arg;
+
+  auto& lib            = arguments.lib;
+  auto& args           = arguments.args;
+  auto& help           = arguments.help;
+  auto& tune_internals = arguments.tune_internals;
+  while (iarg < narg) {
+    if (check_arg(arg[iarg], "--kokkos-tune-internals")) {
+      tune_internals = InitArguments::PossiblyUnsetOption::on;
+      for (int k = iarg; k < narg - 1; k++) {
+        arg[k] = arg[k + 1];
+      }
+      narg--;
+    } else if (check_str_arg(arg[iarg], "--kokkos-tools-library", lib)) {
+      for (int k = iarg; k < narg - 1; k++) {
+        arg[k] = arg[k + 1];
+      }
+      narg--;
+    } else if (check_str_arg(arg[iarg], "--kokkos-tools-args", args)) {
+      for (int k = iarg; k < narg - 1; k++) {
+        arg[k] = arg[k + 1];
+      }
+      narg--;
+      // strip any leading and/or trailing quotes if they were retained in the
+      // string because this will very likely cause parsing issues for tools.
+      // If the quotes are retained (via bypassing the shell):
+      //    <EXE> --kokkos-tools-args="-c my example"
+      // would be tokenized as:
+      //    "<EXE>" "\"-c" "my" "example\""
+      // instead of:
+      //    "<EXE>" "-c" "my" "example"
+      if (!args.empty()) {
+        if (args.front() == '"') args = args.substr(1);
+        if (args.back() == '"') args = args.substr(0, args.length() - 1);
+      }
+      // add the name of the executable to the beginning
+      if (narg > 0) args = std::string(arg[0]) + " " + args;
+    } else if (check_arg(arg[iarg], "--kokkos-tools-help")) {
+      help = InitArguments::PossiblyUnsetOption::on;
+      for (int k = iarg; k < narg - 1; k++) {
+        arg[k] = arg[k + 1];
+      }
+      narg--;
+    } else {
+      iarg++;
+    }
+    if ((args == Kokkos::Tools::InitArguments::unset_string_option) && narg > 0)
+      args = arg[0];
+  }
+}
+Kokkos::Tools::Impl::InitializationStatus parse_environment_variables(
+    InitArguments& arguments) {
+  auto& tool_lib       = arguments.lib;
+  auto& tune_internals = arguments.tune_internals;
+  auto env_tool_lib    = std::getenv("KOKKOS_PROFILE_LIBRARY");
+  if (env_tool_lib != nullptr) {
+    if ((tool_lib != Kokkos::Tools::InitArguments::unset_string_option) &&
+        std::string(env_tool_lib) != tool_lib)
+      return {Kokkos::Tools::Impl::InitializationStatus::InitializationResult::
+                  environment_argument_mismatch,
+              "Error: expecting a match between --kokkos-tools-library and "
+              "KOKKOS_PROFILE_LIBRARY if both are set. Raised by "
+              "Kokkos::initialize(int narg, char* argc[])."};
+    else
+      tool_lib = env_tool_lib;
+  }
+  char* env_tuneinternals_str = std::getenv("KOKKOS_TUNE_INTERNALS");
+  if (env_tuneinternals_str != nullptr) {
+    std::string env_str(env_tuneinternals_str);  // deep-copies string
+    for (char& c : env_str) {
+      c = toupper(c);
+    }
+    if ((env_str == "TRUE") || (env_str == "ON") || (env_str == "1"))
+      tune_internals = InitArguments::PossiblyUnsetOption::on;
+    else if (tune_internals)
+      return {Kokkos::Tools::Impl::InitializationStatus::InitializationResult::
+                  environment_argument_mismatch,
+              "Error: expecting a match between --kokkos-tune-internals and "
+              "KOKKOS_TUNE_INTERNALS if both are set. Raised by "
+              "Kokkos::initialize(int narg, char* argc[])."};
+  }
+  return {
+      Kokkos::Tools::Impl::InitializationStatus::InitializationResult::success};
+}
+InitializationStatus initialize_tools_subsystem(
+    const Kokkos::Tools::InitArguments& args) {
+  Kokkos::Profiling::initialize(args.lib);
+  auto final_args =
+      (args.args != Kokkos::Tools::InitArguments::unset_string_option)
+          ? args.args
+          : "";
+
+  if (args.help) {
+    if (!Kokkos::Tools::printHelp(final_args)) {
+      std::cerr << "Tool has not provided a help message" << std::endl;
+    }
+    return {InitializationStatus::InitializationResult::help_request};
+  }
+  Kokkos::Tools::parseArgs(final_args);
+  return {InitializationStatus::InitializationResult::success};
+}
+
+}  // namespace Impl
+void initialize(const InitArguments& arguments) {
+  Impl::initialize_tools_subsystem(arguments);
+}
+void initialize(int argc, char* argv[]) {
+  InitArguments arguments;
+  Impl::parse_command_line_arguments(argc, argv, arguments);
+  Impl::parse_environment_variables(arguments);
+  initialize(arguments);
+}
+
 namespace Experimental {
 
 namespace Impl {
@@ -71,11 +204,14 @@ void tool_invoked_fence(const uint32_t /* devID */) {
    * Eventually we want to support fencing only
    * a given stream/resource
    */
+#ifndef KOKKOS_TOOLS_INDEPENDENT_BUILD
   Kokkos::fence(
       "Kokkos::Tools::Experimental::Impl::tool_invoked_fence: Tool Requested "
       "Fence");
+#endif
 }
 }  // namespace Impl
+
 #ifdef KOKKOS_ENABLE_TUNING
 static size_t kernel_name_context_variable_id;
 static size_t kernel_type_context_variable_id;
@@ -88,7 +224,7 @@ static std::unordered_map<size_t, VariableInfo> variable_metadata;
 static EventSet current_callbacks;
 static EventSet backup_callbacks;
 static EventSet no_profiling;
-static Kokkos::Tools::Experimental::ToolSettings tool_requirements;
+static ToolSettings tool_requirements;
 bool eventSetsEqual(const EventSet& l, const EventSet& r) {
   return l.init == r.init && l.finalize == r.finalize &&
          l.parse_args == r.parse_args && l.print_help == r.print_help &&
@@ -132,10 +268,11 @@ inline void invoke_kokkosp_callback(
     // if the tool requires global fencing (default true, but tools can
     // overwrite)
     if (may_require_global_fencing == MayRequireGlobalFencing::Yes &&
-        (Kokkos::Tools::Experimental::tool_requirements
-             .requires_global_fencing)) {
+        (tool_requirements.requires_global_fencing)) {
+#ifndef KOKKOS_TOOLS_INDEPENDENT_BUILD
       Kokkos::fence(
           "Kokkos::Tools::invoke_kokkosp_callback: Kokkos Profile Tool Fence");
+#endif
     }
     (*callback)(std::forward<Args>(args)...);
   }
@@ -421,12 +558,16 @@ SpaceHandle make_space_handle(const char* space_name) {
 template <typename Callback>
 void lookup_function(void* dlopen_handle, const std::string& basename,
                      Callback& callback) {
-#ifdef KOKKOS_ENABLE_LIBDL
+#ifdef KOKKOS_TOOLS_ENABLE_LIBDL
   // dlsym returns a pointer to an object, while we want to assign to
   // pointer to function A direct cast will give warnings hence, we have to
   // workaround the issue by casting pointer to pointers.
   void* p  = dlsym(dlopen_handle, basename.c_str());
   callback = *reinterpret_cast<Callback*>(&p);
+#else
+  (void)dlopen_handle;
+  (void)basename;
+  (void)callback;
 #endif
 }
 
@@ -438,8 +579,8 @@ void initialize(const std::string& profileLibrary) {
 
   auto invoke_init_callbacks = []() {
     Experimental::invoke_kokkosp_callback(
-        Kokkos::Tools::Experimental::MayRequireGlobalFencing::No,
-        Kokkos::Tools::Experimental::current_callbacks.init, 0,
+        Experimental::MayRequireGlobalFencing::No,
+        Experimental::current_callbacks.init, 0,
         (uint64_t)KOKKOSP_INTERFACE_VERSION, (uint32_t)0, nullptr);
 
     Experimental::tool_requirements.requires_global_fencing = true;
@@ -458,10 +599,11 @@ void initialize(const std::string& profileLibrary) {
         actions);
   };
 
-#ifdef KOKKOS_ENABLE_LIBDL
+#ifdef KOKKOS_TOOLS_ENABLE_LIBDL
   void* firstProfileLibrary = nullptr;
 
-  if (profileLibrary.empty()) {
+  if ((profileLibrary.empty()) ||
+      (profileLibrary == InitArguments::unset_string_option)) {
     invoke_init_callbacks();
     return;
   }
@@ -489,115 +631,85 @@ void initialize(const std::string& profileLibrary) {
       std::cout << "KokkosP: Library Loaded: " << profileLibraryName
                 << std::endl;
 #endif
-      lookup_function(
-          firstProfileLibrary, "kokkosp_begin_parallel_scan",
-          Kokkos::Tools::Experimental::current_callbacks.begin_parallel_scan);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_begin_parallel_for",
-          Kokkos::Tools::Experimental::current_callbacks.begin_parallel_for);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_begin_parallel_reduce",
-          Kokkos::Tools::Experimental::current_callbacks.begin_parallel_reduce);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_end_parallel_scan",
-          Kokkos::Tools::Experimental::current_callbacks.end_parallel_scan);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_end_parallel_for",
-          Kokkos::Tools::Experimental::current_callbacks.end_parallel_for);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_end_parallel_reduce",
-          Kokkos::Tools::Experimental::current_callbacks.end_parallel_reduce);
+      lookup_function(firstProfileLibrary, "kokkosp_begin_parallel_scan",
+                      Experimental::current_callbacks.begin_parallel_scan);
+      lookup_function(firstProfileLibrary, "kokkosp_begin_parallel_for",
+                      Experimental::current_callbacks.begin_parallel_for);
+      lookup_function(firstProfileLibrary, "kokkosp_begin_parallel_reduce",
+                      Experimental::current_callbacks.begin_parallel_reduce);
+      lookup_function(firstProfileLibrary, "kokkosp_end_parallel_scan",
+                      Experimental::current_callbacks.end_parallel_scan);
+      lookup_function(firstProfileLibrary, "kokkosp_end_parallel_for",
+                      Experimental::current_callbacks.end_parallel_for);
+      lookup_function(firstProfileLibrary, "kokkosp_end_parallel_reduce",
+                      Experimental::current_callbacks.end_parallel_reduce);
 
       lookup_function(firstProfileLibrary, "kokkosp_init_library",
-                      Kokkos::Tools::Experimental::current_callbacks.init);
+                      Experimental::current_callbacks.init);
       lookup_function(firstProfileLibrary, "kokkosp_finalize_library",
-                      Kokkos::Tools::Experimental::current_callbacks.finalize);
-
-      lookup_function(
-          firstProfileLibrary, "kokkosp_push_profile_region",
-          Kokkos::Tools::Experimental::current_callbacks.push_region);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_pop_profile_region",
-          Kokkos::Tools::Experimental::current_callbacks.pop_region);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_allocate_data",
-          Kokkos::Tools::Experimental::current_callbacks.allocate_data);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_deallocate_data",
-          Kokkos::Tools::Experimental::current_callbacks.deallocate_data);
-
-      lookup_function(
-          firstProfileLibrary, "kokkosp_begin_deep_copy",
-          Kokkos::Tools::Experimental::current_callbacks.begin_deep_copy);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_end_deep_copy",
-          Kokkos::Tools::Experimental::current_callbacks.end_deep_copy);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_begin_fence",
-          Kokkos::Tools::Experimental::current_callbacks.begin_fence);
+                      Experimental::current_callbacks.finalize);
+
+      lookup_function(firstProfileLibrary, "kokkosp_push_profile_region",
+                      Experimental::current_callbacks.push_region);
+      lookup_function(firstProfileLibrary, "kokkosp_pop_profile_region",
+                      Experimental::current_callbacks.pop_region);
+      lookup_function(firstProfileLibrary, "kokkosp_allocate_data",
+                      Experimental::current_callbacks.allocate_data);
+      lookup_function(firstProfileLibrary, "kokkosp_deallocate_data",
+                      Experimental::current_callbacks.deallocate_data);
+
+      lookup_function(firstProfileLibrary, "kokkosp_begin_deep_copy",
+                      Experimental::current_callbacks.begin_deep_copy);
+      lookup_function(firstProfileLibrary, "kokkosp_end_deep_copy",
+                      Experimental::current_callbacks.end_deep_copy);
+      lookup_function(firstProfileLibrary, "kokkosp_begin_fence",
+                      Experimental::current_callbacks.begin_fence);
       lookup_function(firstProfileLibrary, "kokkosp_end_fence",
-                      Kokkos::Tools::Experimental::current_callbacks.end_fence);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_dual_view_sync",
-          Kokkos::Tools::Experimental::current_callbacks.sync_dual_view);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_dual_view_modify",
-          Kokkos::Tools::Experimental::current_callbacks.modify_dual_view);
-
-      lookup_function(
-          firstProfileLibrary, "kokkosp_declare_metadata",
-          Kokkos::Tools::Experimental::current_callbacks.declare_metadata);
+                      Experimental::current_callbacks.end_fence);
+      lookup_function(firstProfileLibrary, "kokkosp_dual_view_sync",
+                      Experimental::current_callbacks.sync_dual_view);
+      lookup_function(firstProfileLibrary, "kokkosp_dual_view_modify",
+                      Experimental::current_callbacks.modify_dual_view);
+
+      lookup_function(firstProfileLibrary, "kokkosp_declare_metadata",
+                      Experimental::current_callbacks.declare_metadata);
       lookup_function(firstProfileLibrary, "kokkosp_create_profile_section",
-                      Kokkos::Tools::Experimental::current_callbacks
-                          .create_profile_section);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_start_profile_section",
-          Kokkos::Tools::Experimental::current_callbacks.start_profile_section);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_stop_profile_section",
-          Kokkos::Tools::Experimental::current_callbacks.stop_profile_section);
+                      Experimental::current_callbacks.create_profile_section);
+      lookup_function(firstProfileLibrary, "kokkosp_start_profile_section",
+                      Experimental::current_callbacks.start_profile_section);
+      lookup_function(firstProfileLibrary, "kokkosp_stop_profile_section",
+                      Experimental::current_callbacks.stop_profile_section);
       lookup_function(firstProfileLibrary, "kokkosp_destroy_profile_section",
-                      Kokkos::Tools::Experimental::current_callbacks
-                          .destroy_profile_section);
+                      Experimental::current_callbacks.destroy_profile_section);
 
-      lookup_function(
-          firstProfileLibrary, "kokkosp_profile_event",
-          Kokkos::Tools::Experimental::current_callbacks.profile_event);
+      lookup_function(firstProfileLibrary, "kokkosp_profile_event",
+                      Experimental::current_callbacks.profile_event);
 #ifdef KOKKOS_ENABLE_TUNING
+      lookup_function(firstProfileLibrary, "kokkosp_declare_output_type",
+                      Experimental::current_callbacks.declare_output_type);
+
+      lookup_function(firstProfileLibrary, "kokkosp_declare_input_type",
+                      Experimental::current_callbacks.declare_input_type);
+      lookup_function(firstProfileLibrary, "kokkosp_request_values",
+                      Experimental::current_callbacks.request_output_values);
+      lookup_function(firstProfileLibrary, "kokkosp_end_context",
+                      Experimental::current_callbacks.end_tuning_context);
+      lookup_function(firstProfileLibrary, "kokkosp_begin_context",
+                      Experimental::current_callbacks.begin_tuning_context);
       lookup_function(
-          firstProfileLibrary, "kokkosp_declare_output_type",
-          Kokkos::Tools::Experimental::current_callbacks.declare_output_type);
-
-      lookup_function(
-          firstProfileLibrary, "kokkosp_declare_input_type",
-          Kokkos::Tools::Experimental::current_callbacks.declare_input_type);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_request_values",
-          Kokkos::Tools::Experimental::current_callbacks.request_output_values);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_end_context",
-          Kokkos::Tools::Experimental::current_callbacks.end_tuning_context);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_begin_context",
-          Kokkos::Tools::Experimental::current_callbacks.begin_tuning_context);
-      lookup_function(firstProfileLibrary, "kokkosp_declare_optimization_goal",
-                      Kokkos::Tools::Experimental::current_callbacks
-                          .declare_optimization_goal);
+          firstProfileLibrary, "kokkosp_declare_optimization_goal",
+          Experimental::current_callbacks.declare_optimization_goal);
 #endif  // KOKKOS_ENABLE_TUNING
 
+      lookup_function(firstProfileLibrary, "kokkosp_print_help",
+                      Experimental::current_callbacks.print_help);
+      lookup_function(firstProfileLibrary, "kokkosp_parse_args",
+                      Experimental::current_callbacks.parse_args);
       lookup_function(
-          firstProfileLibrary, "kokkosp_print_help",
-          Kokkos::Tools::Experimental::current_callbacks.print_help);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_parse_args",
-          Kokkos::Tools::Experimental::current_callbacks.parse_args);
-      lookup_function(firstProfileLibrary,
-                      "kokkosp_provide_tool_programming_interface",
-                      Kokkos::Tools::Experimental::current_callbacks
-                          .provide_tool_programming_interface);
-      lookup_function(
-          firstProfileLibrary, "kokkosp_request_tool_settings",
-          Kokkos::Tools::Experimental::current_callbacks.request_tool_settings);
+          firstProfileLibrary, "kokkosp_provide_tool_programming_interface",
+          Experimental::current_callbacks.provide_tool_programming_interface);
+      lookup_function(firstProfileLibrary, "kokkosp_request_tool_settings",
+                      Experimental::current_callbacks.request_tool_settings);
     }
   }
 #else
@@ -804,24 +916,34 @@ void set_dual_view_modify_callback(dualViewModifyFunction callback) {
 void set_declare_metadata_callback(declareMetadataFunction callback) {
   current_callbacks.declare_metadata = callback;
 }
+void set_request_tool_settings_callback(requestToolSettingsFunction callback) {
+  current_callbacks.request_tool_settings = callback;
+}
+void set_provide_tool_programming_interface_callback(
+    provideToolProgrammingInterfaceFunction callback) {
+  current_callbacks.provide_tool_programming_interface = callback;
+}
 
-void set_declare_output_type_callback(outputTypeDeclarationFunction callback) {
+void set_declare_output_type_callback(
+    Experimental::outputTypeDeclarationFunction callback) {
   current_callbacks.declare_output_type = callback;
 }
-void set_declare_input_type_callback(inputTypeDeclarationFunction callback) {
+void set_declare_input_type_callback(
+    Experimental::inputTypeDeclarationFunction callback) {
   current_callbacks.declare_input_type = callback;
 }
-void set_request_output_values_callback(requestValueFunction callback) {
+void set_request_output_values_callback(
+    Experimental::requestValueFunction callback) {
   current_callbacks.request_output_values = callback;
 }
-void set_end_context_callback(contextEndFunction callback) {
+void set_end_context_callback(Experimental::contextEndFunction callback) {
   current_callbacks.end_tuning_context = callback;
 }
-void set_begin_context_callback(contextBeginFunction callback) {
+void set_begin_context_callback(Experimental::contextBeginFunction callback) {
   current_callbacks.begin_tuning_context = callback;
 }
 void set_declare_optimization_goal_callback(
-    optimizationGoalDeclarationFunction callback) {
+    Experimental::optimizationGoalDeclarationFunction callback) {
   current_callbacks.declare_optimization_goal = callback;
 }
 
@@ -832,8 +954,12 @@ void pause_tools() {
 
 void resume_tools() { current_callbacks = backup_callbacks; }
 
-EventSet get_callbacks() { return current_callbacks; }
-void set_callbacks(EventSet new_events) { current_callbacks = new_events; }
+Kokkos::Tools::Experimental::EventSet get_callbacks() {
+  return current_callbacks;
+}
+void set_callbacks(Kokkos::Tools::Experimental::EventSet new_events) {
+  current_callbacks = new_events;
+}
 }  // namespace Experimental
 }  // namespace Tools
 
@@ -915,11 +1041,8 @@ SpaceHandle make_space_handle(const char* space_name) {
 }
 }  // namespace Profiling
 
-}  // namespace Kokkos
-
 // Tuning
 
-namespace Kokkos {
 namespace Tools {
 namespace Experimental {
 static size_t& get_context_counter() {
diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp b/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp
index 86a4cfa4a8543e58feab3aee24f1a9ac8530bb5e..4a8527f5e3d00b7311ebc7c7340d359d7f55a0cd 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp
@@ -45,12 +45,9 @@
 #ifndef KOKKOS_IMPL_KOKKOS_PROFILING_HPP
 #define KOKKOS_IMPL_KOKKOS_PROFILING_HPP
 
-#include <Kokkos_Core_fwd.hpp>
-#include <Kokkos_ExecPolicy.hpp>
-#include <Kokkos_Macros.hpp>
-#include <Kokkos_Tuners.hpp>
 #include <impl/Kokkos_Profiling_Interface.hpp>
 #include <memory>
+#include <iosfwd>
 #include <unordered_map>
 #include <map>
 #include <string>
@@ -63,6 +60,40 @@ bool tune_internals() noexcept;
 
 namespace Tools {
 
+struct InitArguments {
+  // NOTE DZP: PossiblyUnsetOption was introduced
+  // before C++17, std::optional is a better choice
+  // for this long-term
+  static const std::string unset_string_option;
+  enum PossiblyUnsetOption { unset, off, on };
+  PossiblyUnsetOption tune_internals = unset;
+  PossiblyUnsetOption help           = unset;
+  std::string lib                    = unset_string_option;
+  std::string args                   = unset_string_option;
+};
+
+namespace Impl {
+
+struct InitializationStatus {
+  enum InitializationResult {
+    success,
+    failure,
+    help_request,
+    environment_argument_mismatch
+  };
+  InitializationResult result;
+  std::string error_message;
+};
+InitializationStatus initialize_tools_subsystem(
+    const Kokkos::Tools::InitArguments& args);
+
+void parse_command_line_arguments(int& narg, char* arg[],
+                                  InitArguments& arguments);
+Kokkos::Tools::Impl::InitializationStatus parse_environment_variables(
+    InitArguments& arguments);
+
+}  // namespace Impl
+
 bool profileLibraryLoaded();
 
 void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID,
@@ -129,7 +160,10 @@ void modifyDualView(const std::string& label, const void* const ptr,
                     bool on_device);
 
 void declareMetadata(const std::string& key, const std::string& value);
-void initialize(const std::string& = {});
+void initialize(
+    const std::string& = {});  // should rename to impl_initialize ASAP
+void initialize(const Kokkos::Tools::InitArguments&);
+void initialize(int argc, char* argv[]);
 void finalize();
 bool printHelp(const std::string&);
 void parseArgs(const std::string&);
@@ -230,7 +264,9 @@ void set_end_fence_callback(endFenceFunction callback);
 void set_dual_view_sync_callback(dualViewSyncFunction callback);
 void set_dual_view_modify_callback(dualViewModifyFunction callback);
 void set_declare_metadata_callback(declareMetadataFunction callback);
-
+void set_request_tool_settings_callback(requestToolSettingsFunction callback);
+void set_provide_tool_programming_interface_callback(
+    provideToolProgrammingInterfaceFunction callback);
 void set_declare_output_type_callback(outputTypeDeclarationFunction callback);
 void set_declare_input_type_callback(inputTypeDeclarationFunction callback);
 void set_request_output_values_callback(requestValueFunction callback);
@@ -252,426 +288,6 @@ size_t get_new_context_id();
 size_t get_current_context_id();
 }  // namespace Experimental
 
-namespace Impl {
-
-static std::map<std::string, Kokkos::Tools::Experimental::TeamSizeTuner>
-    team_tuners;
-
-template <int Rank>
-using MDRangeTuningMap =
-    std::map<std::string, Kokkos::Tools::Experimental::MDRangeTuner<Rank>>;
-
-template <int Rank>
-static MDRangeTuningMap<Rank> mdrange_tuners;
-
-// For any policies without a tuning implementation, with a reducer
-template <class ReducerType, class ExecPolicy, class Functor, typename TagType>
-void tune_policy(const size_t, const std::string&, ExecPolicy&, const Functor&,
-                 TagType) {}
-
-// For any policies without a tuning implementation, without a reducer
-template <class ExecPolicy, class Functor, typename TagType>
-void tune_policy(const size_t, const std::string&, ExecPolicy&, const Functor&,
-                 const TagType&) {}
-
-/**
- * Tuning for parallel_fors and parallel_scans is a fairly simple process.
- *
- * Tuning for a parallel_reduce turns out to be a little more complicated.
- *
- * If you're tuning a reducer, it might be a complex or a simple reducer
- * (an example of simple would be one where the join is just "+".
- *
- * Unfortunately these two paths are very different in terms of which classes
- * get instantiated. Thankfully, all of this complexity is encoded in the
- * ReducerType. If it's a "simple" reducer, this will be Kokkos::InvalidType,
- * otherwise it'll be something else.
- *
- * If the type is complex, for the code to be generally right you _must_
- * pass an instance of that ReducerType to functions that determine
- * eligible team sizes. If the type is simple, you can't construct one,
- * you use the simpler 2-arg formulation of team_size_recommended/max.
- */
-
-namespace Impl {
-
-struct SimpleTeamSizeCalculator {
-  template <typename Policy, typename Functor, typename Tag>
-  int get_max_team_size(const Policy& policy, const Functor& functor,
-                        const Tag tag) {
-    auto max = policy.team_size_max(functor, tag);
-    return max;
-  }
-  template <typename Policy, typename Functor, typename Tag>
-  int get_recommended_team_size(const Policy& policy, const Functor& functor,
-                                const Tag tag) {
-    auto max = policy.team_size_recommended(functor, tag);
-    return max;
-  }
-  template <typename Policy, typename Functor>
-  int get_mdrange_max_tile_size_product(const Policy& policy,
-                                        const Functor& functor,
-                                        const Kokkos::ParallelForTag&) {
-    using exec_space = typename Policy::execution_space;
-    using driver     = Kokkos::Impl::ParallelFor<Functor, Policy, exec_space>;
-    return driver::max_tile_size_product(policy, functor);
-  }
-  template <typename Policy, typename Functor>
-  int get_mdrange_max_tile_size_product(const Policy& policy,
-                                        const Functor& functor,
-                                        const Kokkos::ParallelReduceTag&) {
-    using exec_space = typename Policy::execution_space;
-    using driver =
-        Kokkos::Impl::ParallelReduce<Functor, Policy, Kokkos::InvalidType,
-                                     exec_space>;
-    return driver::max_tile_size_product(policy, functor);
-  }
-};
-
-// when we have a complex reducer, we need to pass an
-// instance to team_size_recommended/max. Reducers
-// aren't default constructible, but they are
-// constructible from a reference to an
-// instance of their value_type so we construct
-// a value_type and temporary reducer here
-template <typename ReducerType>
-struct ComplexReducerSizeCalculator {
-  template <typename Policy, typename Functor, typename Tag>
-  int get_max_team_size(const Policy& policy, const Functor& functor,
-                        const Tag tag) {
-    using value_type = typename ReducerType::value_type;
-    value_type value;
-    ReducerType reducer_example = ReducerType(value);
-    return policy.team_size_max(functor, reducer_example, tag);
-  }
-  template <typename Policy, typename Functor, typename Tag>
-  int get_recommended_team_size(const Policy& policy, const Functor& functor,
-                                const Tag tag) {
-    using value_type = typename ReducerType::value_type;
-    value_type value;
-    ReducerType reducer_example = ReducerType(value);
-    return policy.team_size_recommended(functor, reducer_example, tag);
-  }
-  template <typename Policy, typename Functor>
-  int get_mdrange_max_tile_size_product(const Policy& policy,
-                                        const Functor& functor,
-                                        const Kokkos::ParallelReduceTag&) {
-    using exec_space = typename Policy::execution_space;
-    using driver =
-        Kokkos::Impl::ParallelReduce<Functor, Policy, ReducerType, exec_space>;
-    return driver::max_tile_size_product(policy, functor);
-  }
-};
-
-}  // namespace Impl
-
-template <class Tuner, class Functor, class TagType,
-          class TuningPermissionFunctor, class Map, class Policy>
-void generic_tune_policy(const std::string& label_in, Map& map, Policy& policy,
-                         const Functor& functor, const TagType& tag,
-                         const TuningPermissionFunctor& should_tune) {
-  if (should_tune(policy)) {
-    std::string label = label_in;
-    if (label_in.empty()) {
-      using policy_type =
-          typename std::remove_reference<decltype(policy)>::type;
-      using work_tag = typename policy_type::work_tag;
-      Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label);
-      label = name.get();
-    }
-    auto tuner_iter = [&]() {
-      auto my_tuner = map.find(label);
-      if (my_tuner == map.end()) {
-        return (map.emplace(label, Tuner(label, policy, functor, tag,
-                                         Impl::SimpleTeamSizeCalculator{}))
-                    .first);
-      }
-      return my_tuner;
-    }();
-    tuner_iter->second.tune(policy);
-  }
-}
-template <class Tuner, class ReducerType, class Functor, class TagType,
-          class TuningPermissionFunctor, class Map, class Policy>
-void generic_tune_policy(const std::string& label_in, Map& map, Policy& policy,
-                         const Functor& functor, const TagType& tag,
-                         const TuningPermissionFunctor& should_tune) {
-  if (should_tune(policy)) {
-    std::string label = label_in;
-    if (label_in.empty()) {
-      using policy_type =
-          typename std::remove_reference<decltype(policy)>::type;
-      using work_tag = typename policy_type::work_tag;
-      Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label);
-      label = name.get();
-    }
-    auto tuner_iter = [&]() {
-      auto my_tuner = map.find(label);
-      if (my_tuner == map.end()) {
-        return (map.emplace(
-                       label,
-                       Tuner(label, policy, functor, tag,
-                             Impl::ComplexReducerSizeCalculator<ReducerType>{}))
-                    .first);
-      }
-      return my_tuner;
-    }();
-    tuner_iter->second.tune(policy);
-  }
-}
-
-// tune a TeamPolicy, without reducer
-template <class Functor, class TagType, class... Properties>
-void tune_policy(const size_t /**tuning_context*/, const std::string& label_in,
-                 Kokkos::TeamPolicy<Properties...>& policy,
-                 const Functor& functor, const TagType& tag) {
-  generic_tune_policy<Experimental::TeamSizeTuner>(
-      label_in, team_tuners, policy, functor, tag,
-      [](const Kokkos::TeamPolicy<Properties...>& candidate_policy) {
-        return (candidate_policy.impl_auto_team_size() ||
-                candidate_policy.impl_auto_vector_length());
-      });
-}
-
-// tune a TeamPolicy, with reducer
-template <class ReducerType, class Functor, class TagType, class... Properties>
-void tune_policy(const size_t /**tuning_context*/, const std::string& label_in,
-                 Kokkos::TeamPolicy<Properties...>& policy,
-                 const Functor& functor, const TagType& tag) {
-  generic_tune_policy<Experimental::TeamSizeTuner, ReducerType>(
-      label_in, team_tuners, policy, functor, tag,
-      [](const Kokkos::TeamPolicy<Properties...>& candidate_policy) {
-        return (candidate_policy.impl_auto_team_size() ||
-                candidate_policy.impl_auto_vector_length());
-      });
-}
-
-// tune a MDRangePolicy, without reducer
-template <class Functor, class TagType, class... Properties>
-void tune_policy(const size_t /**tuning_context*/, const std::string& label_in,
-                 Kokkos::MDRangePolicy<Properties...>& policy,
-                 const Functor& functor, const TagType& tag) {
-  using Policy              = Kokkos::MDRangePolicy<Properties...>;
-  static constexpr int rank = Policy::rank;
-  generic_tune_policy<Experimental::MDRangeTuner<rank>>(
-      label_in, mdrange_tuners<rank>, policy, functor, tag,
-      [](const Policy& candidate_policy) {
-        return candidate_policy.impl_tune_tile_size();
-      });
-}
-
-// tune a MDRangePolicy, with reducer
-template <class ReducerType, class Functor, class TagType, class... Properties>
-void tune_policy(const size_t /**tuning_context*/, const std::string& label_in,
-                 Kokkos::MDRangePolicy<Properties...>& policy,
-                 const Functor& functor, const TagType& tag) {
-  using Policy              = Kokkos::MDRangePolicy<Properties...>;
-  static constexpr int rank = Policy::rank;
-  generic_tune_policy<Experimental::MDRangeTuner<rank>, ReducerType>(
-      label_in, mdrange_tuners<rank>, policy, functor, tag,
-      [](const Policy& candidate_policy) {
-        return candidate_policy.impl_tune_tile_size();
-      });
-}
-
-template <class ReducerType>
-struct ReductionSwitcher {
-  template <class Functor, class TagType, class ExecPolicy>
-  static void tune(const size_t tuning_context, const std::string& label,
-                   ExecPolicy& policy, const Functor& functor,
-                   const TagType& tag) {
-    if (Kokkos::tune_internals()) {
-      tune_policy<ReducerType>(tuning_context, label, policy, functor, tag);
-    }
-  }
-};
-
-template <>
-struct ReductionSwitcher<Kokkos::InvalidType> {
-  template <class Functor, class TagType, class ExecPolicy>
-  static void tune(const size_t tuning_context, const std::string& label,
-                   ExecPolicy& policy, const Functor& functor,
-                   const TagType& tag) {
-    if (Kokkos::tune_internals()) {
-      tune_policy(tuning_context, label, policy, functor, tag);
-    }
-  }
-};
-
-template <class Tuner, class Functor, class TagType,
-          class TuningPermissionFunctor, class Map, class Policy>
-void generic_report_results(const std::string& label_in, Map& map,
-                            Policy& policy, const Functor&, const TagType&,
-                            const TuningPermissionFunctor& should_tune) {
-  if (should_tune(policy)) {
-    std::string label = label_in;
-    if (label_in.empty()) {
-      using policy_type =
-          typename std::remove_reference<decltype(policy)>::type;
-      using work_tag = typename policy_type::work_tag;
-      Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label);
-      label = name.get();
-    }
-    auto tuner_iter = map[label];
-    tuner_iter.end();
-  }
-}
-
-// report results for a policy type we don't tune (do nothing)
-template <class ExecPolicy, class Functor, typename TagType>
-void report_policy_results(const size_t, const std::string&, ExecPolicy&,
-                           const Functor&, const TagType&) {}
-
-// report results for a TeamPolicy
-template <class Functor, class TagType, class... Properties>
-void report_policy_results(const size_t /**tuning_context*/,
-                           const std::string& label_in,
-                           Kokkos::TeamPolicy<Properties...>& policy,
-                           const Functor& functor, const TagType& tag) {
-  generic_report_results<Experimental::TeamSizeTuner>(
-      label_in, team_tuners, policy, functor, tag,
-      [](const Kokkos::TeamPolicy<Properties...>& candidate_policy) {
-        return (candidate_policy.impl_auto_team_size() ||
-                candidate_policy.impl_auto_vector_length());
-      });
-}
-
-// report results for an MDRangePolicy
-template <class Functor, class TagType, class... Properties>
-void report_policy_results(const size_t /**tuning_context*/,
-                           const std::string& label_in,
-                           Kokkos::MDRangePolicy<Properties...>& policy,
-                           const Functor& functor, const TagType& tag) {
-  using Policy              = Kokkos::MDRangePolicy<Properties...>;
-  static constexpr int rank = Policy::rank;
-  generic_report_results<Experimental::MDRangeTuner<rank>>(
-      label_in, mdrange_tuners<rank>, policy, functor, tag,
-      [](const Policy& candidate_policy) {
-        return candidate_policy.impl_tune_tile_size();
-      });
-}
-
-template <class ExecPolicy, class FunctorType>
-void begin_parallel_for(ExecPolicy& policy, FunctorType& functor,
-                        const std::string& label, uint64_t& kpID) {
-  if (Kokkos::Tools::profileLibraryLoaded()) {
-    Kokkos::Impl::ParallelConstructName<FunctorType,
-                                        typename ExecPolicy::work_tag>
-        name(label);
-    Kokkos::Tools::beginParallelFor(
-        name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()),
-        &kpID);
-  }
-#ifdef KOKKOS_ENABLE_TUNING
-  size_t context_id = Kokkos::Tools::Experimental::get_new_context_id();
-  if (Kokkos::tune_internals()) {
-    tune_policy(context_id, label, policy, functor, Kokkos::ParallelForTag{});
-  }
-#else
-  (void)functor;
-#endif
-}
-
-template <class ExecPolicy, class FunctorType>
-void end_parallel_for(ExecPolicy& policy, FunctorType& functor,
-                      const std::string& label, uint64_t& kpID) {
-  if (Kokkos::Tools::profileLibraryLoaded()) {
-    Kokkos::Tools::endParallelFor(kpID);
-  }
-#ifdef KOKKOS_ENABLE_TUNING
-  size_t context_id = Kokkos::Tools::Experimental::get_current_context_id();
-  if (Kokkos::tune_internals()) {
-    report_policy_results(context_id, label, policy, functor,
-                          Kokkos::ParallelForTag{});
-  }
-#else
-  (void)policy;
-  (void)functor;
-  (void)label;
-#endif
-}
-
-template <class ExecPolicy, class FunctorType>
-void begin_parallel_scan(ExecPolicy& policy, FunctorType& functor,
-                         const std::string& label, uint64_t& kpID) {
-  if (Kokkos::Tools::profileLibraryLoaded()) {
-    Kokkos::Impl::ParallelConstructName<FunctorType,
-                                        typename ExecPolicy::work_tag>
-        name(label);
-    Kokkos::Tools::beginParallelScan(
-        name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()),
-        &kpID);
-  }
-#ifdef KOKKOS_ENABLE_TUNING
-  size_t context_id = Kokkos::Tools::Experimental::get_new_context_id();
-  if (Kokkos::tune_internals()) {
-    tune_policy(context_id, label, policy, functor, Kokkos::ParallelScanTag{});
-  }
-#else
-  (void)functor;
-#endif
-}
-
-template <class ExecPolicy, class FunctorType>
-void end_parallel_scan(ExecPolicy& policy, FunctorType& functor,
-                       const std::string& label, uint64_t& kpID) {
-  if (Kokkos::Tools::profileLibraryLoaded()) {
-    Kokkos::Tools::endParallelScan(kpID);
-  }
-#ifdef KOKKOS_ENABLE_TUNING
-  size_t context_id = Kokkos::Tools::Experimental::get_current_context_id();
-  if (Kokkos::tune_internals()) {
-    report_policy_results(context_id, label, policy, functor,
-                          Kokkos::ParallelScanTag{});
-  }
-#else
-  (void)policy;
-  (void)functor;
-  (void)label;
-#endif
-}
-
-template <class ReducerType, class ExecPolicy, class FunctorType>
-void begin_parallel_reduce(ExecPolicy& policy, FunctorType& functor,
-                           const std::string& label, uint64_t& kpID) {
-  if (Kokkos::Tools::profileLibraryLoaded()) {
-    Kokkos::Impl::ParallelConstructName<FunctorType,
-                                        typename ExecPolicy::work_tag>
-        name(label);
-    Kokkos::Tools::beginParallelReduce(
-        name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()),
-        &kpID);
-  }
-#ifdef KOKKOS_ENABLE_TUNING
-  size_t context_id = Kokkos::Tools::Experimental::get_new_context_id();
-  ReductionSwitcher<ReducerType>::tune(context_id, label, policy, functor,
-                                       Kokkos::ParallelReduceTag{});
-#else
-  (void)functor;
-#endif
-}
-
-template <class ReducerType, class ExecPolicy, class FunctorType>
-void end_parallel_reduce(ExecPolicy& policy, FunctorType& functor,
-                         const std::string& label, uint64_t& kpID) {
-  if (Kokkos::Tools::profileLibraryLoaded()) {
-    Kokkos::Tools::endParallelReduce(kpID);
-  }
-#ifdef KOKKOS_ENABLE_TUNING
-  size_t context_id = Kokkos::Tools::Experimental::get_current_context_id();
-  if (Kokkos::tune_internals()) {
-    report_policy_results(context_id, label, policy, functor,
-                          Kokkos::ParallelReduceTag{});
-  }
-#else
-  (void)policy;
-  (void)functor;
-  (void)label;
-#endif
-}
-
-}  // namespace Impl
-
 }  // namespace Tools
 namespace Profiling {
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h b/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h
index 2c8d1428fc595e0e6724624465ab839f5c80b138..a069fb831b096035701ac40bce494de976e0ee76 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h
+++ b/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h
@@ -54,7 +54,7 @@
 #include <stdbool.h>
 #endif
 
-#define KOKKOSP_INTERFACE_VERSION 20210623
+#define KOKKOSP_INTERFACE_VERSION 20211015
 
 // Profiling
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
index a7aec2e6fd53f6a6b37b011452b58750b092f096..4e0e81405f00da10a9a6c89f5360edde08df9e1f 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
@@ -47,6 +47,7 @@
 
 #include <cinttypes>
 #include <cstddef>
+#include <climits>
 
 #include <cstdlib>
 
@@ -81,6 +82,12 @@ struct ExecutionSpaceIdentifier {
   uint32_t device_id;
   uint32_t instance_id;
 };
+
+constexpr const uint32_t num_type_bits     = 8;
+constexpr const uint32_t num_device_bits   = 7;
+constexpr const uint32_t num_instance_bits = 17;
+constexpr const uint32_t num_avail_bits    = sizeof(uint32_t) * CHAR_BIT;
+
 inline DeviceType devicetype_from_uint32t(const uint32_t in) {
   switch (in) {
     case 0: return DeviceType::Serial;
@@ -96,25 +103,22 @@ inline DeviceType devicetype_from_uint32t(const uint32_t in) {
 }
 
 inline ExecutionSpaceIdentifier identifier_from_devid(const uint32_t in) {
-  // ExecutionSpaceIdentifier out;
-  // out.type = in >> 24;
-  // out.device_id = in >> 17;
-  // out.instance_id = ((uint32_t(-1)) << 17 ) & in;
-  return {devicetype_from_uint32t(in >> 24),
-          (~((uint32_t(-1)) << 24)) & (in >> 17),
-          (~((uint32_t(-1)) << 17)) & in};
+  constexpr const uint32_t shift = num_avail_bits - num_type_bits;
+
+  return {devicetype_from_uint32t(in >> shift), /*First 8 bits*/
+          (~((uint32_t(-1)) << num_device_bits)) &
+              (in >> num_instance_bits),                  /*Next 7 bits */
+          (~((uint32_t(-1)) << num_instance_bits)) & in}; /*Last 17 bits*/
 }
 
 template <typename ExecutionSpace>
 struct DeviceTypeTraits;
 
-constexpr const size_t device_type_bits = 8;
-constexpr const size_t instance_bits    = 24;
 template <typename ExecutionSpace>
 constexpr uint32_t device_id_root() {
   constexpr auto device_id =
       static_cast<uint32_t>(DeviceTypeTraits<ExecutionSpace>::id);
-  return (device_id << instance_bits);
+  return (device_id << num_instance_bits);
 }
 template <typename ExecutionSpace>
 inline uint32_t device_id(ExecutionSpace const& space) noexcept {
diff --git a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp
index 3efff98e459e8a8b92983d195a7a4486672bdce4..149c881af5e0cf636907834794f41a1a0dc5a4c5 100644
--- a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp
@@ -177,7 +177,8 @@ SharedAllocationRecord<void, void>::SharedAllocationRecord(
     SharedAllocationRecord<void, void>* arg_root,
 #endif
     SharedAllocationHeader* arg_alloc_ptr, size_t arg_alloc_size,
-    SharedAllocationRecord<void, void>::function_type arg_dealloc)
+    SharedAllocationRecord<void, void>::function_type arg_dealloc,
+    const std::string& label)
     : m_alloc_ptr(arg_alloc_ptr),
       m_alloc_size(arg_alloc_size),
       m_dealloc(arg_dealloc)
@@ -188,7 +189,8 @@ SharedAllocationRecord<void, void>::SharedAllocationRecord(
       m_next(nullptr)
 #endif
       ,
-      m_count(0) {
+      m_count(0),
+      m_label(label) {
   if (nullptr != arg_alloc_ptr) {
 #ifdef KOKKOS_ENABLE_DEBUG
     // Insert into the root double-linked list for tracking
diff --git a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp
index 64dfd5d33fb8576b1cb5446843edefaaf6d67422..2f18157ffeb21743a920bc90bc5a033191c192fa 100644
--- a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp
@@ -52,27 +52,6 @@
 #include <cstdint>
 #include <string>
 
-#if defined(KOKKOS_ENABLE_OPENMPTARGET)
-// Base function.
-static constexpr bool kokkos_omp_on_host() { return true; }
-#if defined(KOKKOS_COMPILER_PGI)
-#define KOKKOS_IMPL_IF_ON_HOST if (!__builtin_is_device_code())
-#else
-// Note: OpenMPTarget enforces C++17 at configure time
-#pragma omp begin declare variant match(device = {kind(host)})
-static constexpr bool kokkos_omp_on_host() { return true; }
-#pragma omp end declare variant
-
-#pragma omp begin declare variant match(device = {kind(nohost)})
-static constexpr bool kokkos_omp_on_host() { return false; }
-#pragma omp end declare variant
-
-#define KOKKOS_IMPL_IF_ON_HOST if constexpr (kokkos_omp_on_host())
-#endif
-#else
-#define KOKKOS_IMPL_IF_ON_HOST if (true)
-#endif
-
 namespace Kokkos {
 namespace Impl {
 
@@ -86,8 +65,13 @@ class SharedAllocationHeader {
  private:
   using Record = SharedAllocationRecord<void, void>;
 
+#ifdef KOKKOS_ARCH_VEGA
+  static constexpr unsigned maximum_label_length =
+      (1u << 8 /* 256 */) - sizeof(Record*);
+#else
   static constexpr unsigned maximum_label_length =
       (1u << 7 /* 128 */) - sizeof(Record*);
+#endif
 
   template <class, class>
   friend class SharedAllocationRecord;
@@ -114,8 +98,13 @@ class SharedAllocationHeader {
 template <>
 class SharedAllocationRecord<void, void> {
  protected:
+#ifdef KOKKOS_ARCH_VEGA
+  static_assert(sizeof(SharedAllocationHeader) == (1u << 8 /* 256 */),
+                "sizeof(SharedAllocationHeader) != 256");
+#else
   static_assert(sizeof(SharedAllocationHeader) == (1u << 7 /* 128 */),
                 "sizeof(SharedAllocationHeader) != 128");
+#endif
 
   template <class, class>
   friend class SharedAllocationRecord;
@@ -135,6 +124,7 @@ class SharedAllocationRecord<void, void> {
   SharedAllocationRecord* m_next;
 #endif
   int m_count;
+  std::string m_label;
 
   SharedAllocationRecord(SharedAllocationRecord&&)      = delete;
   SharedAllocationRecord(const SharedAllocationRecord&) = delete;
@@ -149,40 +139,27 @@ class SharedAllocationRecord<void, void> {
       SharedAllocationRecord* arg_root,
 #endif
       SharedAllocationHeader* arg_alloc_ptr, size_t arg_alloc_size,
-      function_type arg_dealloc);
+      function_type arg_dealloc, const std::string& label);
  private:
   static KOKKOS_THREAD_LOCAL int t_tracking_enabled;
 
  public:
   virtual std::string get_label() const { return std::string("Unmanaged"); }
 
-#ifdef KOKKOS_IMPL_ENABLE_OVERLOAD_HOST_DEVICE
-  /* Device tracking_enabled -- always disabled */
-  KOKKOS_IMPL_DEVICE_FUNCTION
-  static int tracking_enabled() { return 0; }
-#endif
-
-  KOKKOS_IMPL_HOST_FUNCTION
-  static int tracking_enabled() {
-    KOKKOS_IMPL_IF_ON_HOST { return t_tracking_enabled; }
-    else {
-      return 0;
-    }
+  static KOKKOS_FUNCTION int tracking_enabled() {
+    KOKKOS_IF_ON_HOST(return t_tracking_enabled;)
+    KOKKOS_IF_ON_DEVICE(return 0;)
   }
 
   /**\brief A host process thread claims and disables the
    *        shared allocation tracking flag.
    */
-  static void tracking_disable() {
-    KOKKOS_IMPL_IF_ON_HOST { t_tracking_enabled = 0; }
-  }
+  static void tracking_disable() { t_tracking_enabled = 0; }
 
   /**\brief A host process thread releases and enables the
    *        shared allocation tracking flag.
    */
-  static void tracking_enable() {
-    KOKKOS_IMPL_IF_ON_HOST { t_tracking_enabled = 1; }
-  }
+  static void tracking_enable() { t_tracking_enabled = 1; }
 
   virtual ~SharedAllocationRecord() = default;
 
@@ -216,25 +193,11 @@ class SharedAllocationRecord<void, void> {
   /* Cannot be 'constexpr' because 'm_count' is volatile */
   int use_count() const { return *static_cast<const volatile int*>(&m_count); }
 
-#ifdef KOKKOS_IMPL_ENABLE_OVERLOAD_HOST_DEVICE
-  /* Device tracking_enabled -- always disabled */
-  KOKKOS_IMPL_DEVICE_FUNCTION
-  static void increment(SharedAllocationRecord*){};
-#endif
-
   /* Increment use count */
-  KOKKOS_IMPL_HOST_FUNCTION
   static void increment(SharedAllocationRecord*);
 
-#ifdef KOKKOS_IMPL_ENABLE_OVERLOAD_HOST_DEVICE
-  /* Device tracking_enabled -- always disabled */
-  KOKKOS_IMPL_DEVICE_FUNCTION
-  static void decrement(SharedAllocationRecord*){};
-#endif
-
   /* Decrement use count. If 1->0 then remove from the tracking list and invoke
    * m_dealloc */
-  KOKKOS_IMPL_HOST_FUNCTION
   static SharedAllocationRecord* decrement(SharedAllocationRecord*);
 
   /* Given a root record and data pointer find the record */
@@ -356,18 +319,14 @@ class SharedAllocationRecord
 
   // Allocate with a zero use count.  Incrementing the use count from zero to
   // one inserts the record into the tracking list.  Decrementing the count from
-  // one to zero removes from the trakcing list and deallocates.
+  // one to zero removes from the tracking list and deallocates.
   KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
       const MemorySpace& arg_space, const std::string& arg_label,
       const size_t arg_alloc) {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    return new SharedAllocationRecord(arg_space, arg_label, arg_alloc);
-#else
-    (void)arg_space;
-    (void)arg_label;
-    (void)arg_alloc;
-    return (SharedAllocationRecord*)0;
-#endif
+    KOKKOS_IF_ON_HOST(
+        (return new SharedAllocationRecord(arg_space, arg_label, arg_alloc);))
+    KOKKOS_IF_ON_DEVICE(
+        ((void)arg_space; (void)arg_label; (void)arg_alloc; return nullptr;))
   }
 };
 
@@ -390,51 +349,20 @@ union SharedAllocationTracker {
   // pressure on compiler optimization by reducing
   // number of symbols and inline functions.
 
-#if defined(KOKKOS_IMPL_ENABLE_OVERLOAD_HOST_DEVICE)
-
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED Record::tracking_enabled()
-
-#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_CONDITION \
-  (!(m_record_bits & DO_NOT_DEREF_FLAG))
-#else
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_CONDITION (0)
-#endif
-
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \
-  if (KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_CONDITION)  \
-    KOKKOS_IMPL_IF_ON_HOST Record::increment(m_record);
+#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT          \
+  KOKKOS_IF_ON_HOST((if (!(m_record_bits & DO_NOT_DEREF_FLAG)) { \
+    Record::increment(m_record);                                 \
+  }))
 
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \
-  if (KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_CONDITION)  \
-    KOKKOS_IMPL_IF_ON_HOST Record::decrement(m_record);
-
-#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED Record::tracking_enabled()
-
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \
-  if (!(m_record_bits & DO_NOT_DEREF_FLAG))             \
-    KOKKOS_IMPL_IF_ON_HOST Record::increment(m_record);
-
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \
-  if (!(m_record_bits & DO_NOT_DEREF_FLAG))             \
-    KOKKOS_IMPL_IF_ON_HOST Record::decrement(m_record);
-
-#else
-
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED 0
-
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
-
-#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
-
-#endif
+#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT          \
+  KOKKOS_IF_ON_HOST((if (!(m_record_bits & DO_NOT_DEREF_FLAG)) { \
+    Record::decrement(m_record);                                 \
+  }))
 
 #define KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(rhs,               \
                                                         override_tracking) \
   (((!override_tracking) || (rhs.m_record_bits & DO_NOT_DEREF_FLAG) ||     \
-    (!KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED))                      \
+    (!Record::tracking_enabled()))                                         \
        ? rhs.m_record_bits | DO_NOT_DEREF_FLAG                             \
        : rhs.m_record_bits)
 
@@ -467,17 +395,14 @@ union SharedAllocationTracker {
 
   KOKKOS_INLINE_FUNCTION
   int use_count() const {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    Record* const tmp =
-        reinterpret_cast<Record*>(m_record_bits & ~DO_NOT_DEREF_FLAG);
-    return (tmp ? tmp->use_count() : 0);
-#else
-    return 0;
-#endif
+    KOKKOS_IF_ON_HOST((Record* const tmp = reinterpret_cast<Record*>(
+                           m_record_bits & ~DO_NOT_DEREF_FLAG);
+                       return (tmp ? tmp->use_count() : 0);))
+
+    KOKKOS_IF_ON_DEVICE((return 0;))
   }
 
-  KOKKOS_INLINE_FUNCTION
-  bool has_record() const {
+  KOKKOS_INLINE_FUNCTION bool has_record() const {
     return (m_record_bits & (~DO_NOT_DEREF_FLAG)) != 0;
   }
 
@@ -540,7 +465,7 @@ union SharedAllocationTracker {
    *  are the result of deconstructing the
    *  KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS macro.  This
    *  allows the caller to do the check for tracking enabled and managed
-   *  apart from the assignement of the record because the tracking
+   *  apart from the assignment of the record because the tracking
    *  enabled / managed question may be important for other tasks as well
    */
 
@@ -577,7 +502,6 @@ union SharedAllocationTracker {
     KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
   }
 
-#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
 #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
 #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
 };
diff --git a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc_timpl.hpp b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc_timpl.hpp
index a6ee1b3f9eb11ddfbfd2c1ce5dd7a213bd25dda9..276217c7d4f78cae7ac925e51d074cf3d60c195f 100644
--- a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc_timpl.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc_timpl.hpp
@@ -104,6 +104,9 @@ void* SharedAllocationRecordCommon<MemorySpace>::reallocate_tracked(
 
   Kokkos::Impl::DeepCopy<MemorySpace, MemorySpace>(
       r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size()));
+  Kokkos::fence(
+      "SharedAllocationRecord<Kokkos::Experimental::HBWSpace, "
+      "void>::reallocate_tracked(): fence after copying data");
 
   record_base_t::increment(r_new);
   record_base_t::decrement(r_old);
@@ -130,7 +133,7 @@ auto SharedAllocationRecordCommon<MemorySpace>::get_record(void* alloc_ptr)
 
 template <class MemorySpace>
 std::string SharedAllocationRecordCommon<MemorySpace>::get_label() const {
-  return std::string(record_base_t::head()->m_label);
+  return record_base_t::m_label;
 }
 
 template <class MemorySpace>
@@ -181,6 +184,9 @@ void HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::print_records(
       if (r->m_alloc_ptr) {
         Kokkos::Impl::DeepCopy<HostSpace, MemorySpace>(
             &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader));
+        Kokkos::fence(
+            "HostInaccessibleSharedAllocationRecordCommon::print_records(): "
+            "fence after copying header to HostSpace");
       } else {
         head.m_label[0] = 0;
       }
@@ -213,6 +219,9 @@ void HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::print_records(
       if (r->m_alloc_ptr) {
         Kokkos::Impl::DeepCopy<HostSpace, MemorySpace>(
             &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader));
+        Kokkos::fence(
+            "HostInaccessibleSharedAllocationRecordCommon::print_records(): "
+            "fence after copying header to HostSpace");
 
         // Formatting dependent on sizeof(uintptr_t)
         const char* format_string;
@@ -253,8 +262,12 @@ auto HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::get_record(
       alloc_ptr ? SharedAllocationHeader::get_header(alloc_ptr) : nullptr;
 
   if (alloc_ptr) {
-    Kokkos::Impl::DeepCopy<HostSpace, MemorySpace>(
-        &head, head_cuda, sizeof(SharedAllocationHeader));
+    typename MemorySpace::execution_space exec_space;
+    Kokkos::Impl::DeepCopy<HostSpace, MemorySpace, decltype(exec_space)>(
+        exec_space, &head, head_cuda, sizeof(SharedAllocationHeader));
+    exec_space.fence(
+        "HostInaccessibleSharedAllocationRecordCommon::get_record(): fence "
+        "after copying header to HostSpace");
   }
 
   derived_t* const record =
@@ -273,12 +286,7 @@ auto HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::get_record(
 template <class MemorySpace>
 std::string
 HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::get_label() const {
-  SharedAllocationHeader header;
-
-  Kokkos::Impl::DeepCopy<Kokkos::HostSpace, MemorySpace>(
-      &header, this->record_base_t::head(), sizeof(SharedAllocationHeader));
-
-  return std::string(header.m_label);
+  return record_base_t::m_label;
 }
 
 }  // end namespace Impl
diff --git a/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp b/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp
index 8ac034e249f1c1d1a4309003ee77c0cbe38682de..f46d89226ceddea11c223087e94a5760f0075d8d 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp
@@ -43,7 +43,6 @@
 */
 
 #include <Kokkos_Macros.hpp>
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
 
 #include <Kokkos_Atomic.hpp>
 #include <impl/Kokkos_Spinwait.hpp>
@@ -65,7 +64,7 @@ void host_thread_yield(const uint32_t i, const WaitMode mode) {
   static constexpr uint32_t sleep_limit = 1 << 13;
   static constexpr uint32_t yield_limit = 1 << 12;
 
-  const int c = Kokkos::log2(i);
+  const int c = int_log2(i);
 
   if (WaitMode::ROOT != mode) {
     if (sleep_limit < i) {
@@ -135,7 +134,3 @@ void host_thread_yield(const uint32_t i, const WaitMode mode) {
 
 }  // namespace Impl
 }  // namespace Kokkos
-
-#else
-void KOKKOS_CORE_SRC_IMPL_SPINWAIT_PREVENT_LINK_ERROR() {}
-#endif
diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskBase.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskBase.hpp
index 06581052a8f687aeaff85d1368fd271407f0c36e..e9f5d91aa89058fb574339d544e0eb3bef4d66d7 100644
--- a/packages/kokkos/core/src/impl/Kokkos_TaskBase.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_TaskBase.hpp
@@ -314,7 +314,7 @@ class Task : public TaskBase, public FunctorType {
     // If team then only one thread calls destructor.
 
     const bool only_one_thread =
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
+#ifdef __CUDA_ARCH__  // FIXME_CUDA
         0 == threadIdx.x && 0 == threadIdx.y;
 #else
         0 == member->team_rank();
diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskNode.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskNode.hpp
index caf1d0a84b82e3e6b121476c8cfd0213775dd69f..7cfd696d2c40958fe60167ad7fe5d39091ca1a9e 100644
--- a/packages/kokkos/core/src/impl/Kokkos_TaskNode.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_TaskNode.hpp
@@ -669,7 +669,7 @@ class alignas(16) RunnableTask
     // If team then only one thread calls destructor.
 
     const bool only_one_thread =
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
+#ifdef __CUDA_ARCH__  // FIXME_CUDA
         0 == threadIdx.x && 0 == threadIdx.y;
 #else
         0 == member->team_rank();
diff --git a/packages/kokkos/core/src/impl/Kokkos_Tools_Generic.hpp b/packages/kokkos/core/src/impl/Kokkos_Tools_Generic.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e734b369b2b70ecd7339aec264dff1a3c2a37994
--- /dev/null
+++ b/packages/kokkos/core/src/impl/Kokkos_Tools_Generic.hpp
@@ -0,0 +1,493 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_IMPL_KOKKOS_TOOLS_GENERIC_HPP
+#define KOKKOS_IMPL_KOKKOS_TOOLS_GENERIC_HPP
+
+#include <impl/Kokkos_Profiling.hpp>
+
+#include <Kokkos_Core_fwd.hpp>
+#include <Kokkos_ExecPolicy.hpp>
+#include <Kokkos_Macros.hpp>
+#include <Kokkos_Tuners.hpp>
+
+namespace Kokkos {
+
+namespace Tools {
+
+namespace Experimental {
+
+namespace Impl {
+
+static std::map<std::string, Kokkos::Tools::Experimental::TeamSizeTuner>
+    team_tuners;
+
+template <int Rank>
+using MDRangeTuningMap =
+    std::map<std::string, Kokkos::Tools::Experimental::MDRangeTuner<Rank>>;
+
+template <int Rank>
+static MDRangeTuningMap<Rank> mdrange_tuners;
+
+// For any policies without a tuning implementation, with a reducer
+template <class ReducerType, class ExecPolicy, class Functor, typename TagType>
+void tune_policy(const size_t, const std::string&, ExecPolicy&, const Functor&,
+                 TagType) {}
+
+// For any policies without a tuning implementation, without a reducer
+template <class ExecPolicy, class Functor, typename TagType>
+void tune_policy(const size_t, const std::string&, ExecPolicy&, const Functor&,
+                 const TagType&) {}
+
+/**
+ * Tuning for parallel_fors and parallel_scans is a fairly simple process.
+ *
+ * Tuning for a parallel_reduce turns out to be a little more complicated.
+ *
+ * If you're tuning a reducer, it might be a complex or a simple reducer
+ * (an example of simple would be one where the join is just "+".
+ *
+ * Unfortunately these two paths are very different in terms of which classes
+ * get instantiated. Thankfully, all of this complexity is encoded in the
+ * ReducerType. If it's a "simple" reducer, this will be Kokkos::InvalidType,
+ * otherwise it'll be something else.
+ *
+ * If the type is complex, for the code to be generally right you _must_
+ * pass an instance of that ReducerType to functions that determine
+ * eligible team sizes. If the type is simple, you can't construct one,
+ * you use the simpler 2-arg formulation of team_size_recommended/max.
+ */
+
+namespace Impl {
+
+struct SimpleTeamSizeCalculator {
+  template <typename Policy, typename Functor, typename Tag>
+  int get_max_team_size(const Policy& policy, const Functor& functor,
+                        const Tag tag) {
+    auto max = policy.team_size_max(functor, tag);
+    return max;
+  }
+  template <typename Policy, typename Functor, typename Tag>
+  int get_recommended_team_size(const Policy& policy, const Functor& functor,
+                                const Tag tag) {
+    auto max = policy.team_size_recommended(functor, tag);
+    return max;
+  }
+  template <typename Policy, typename Functor>
+  int get_mdrange_max_tile_size_product(const Policy& policy,
+                                        const Functor& functor,
+                                        const Kokkos::ParallelForTag&) {
+    using exec_space = typename Policy::execution_space;
+    using driver     = Kokkos::Impl::ParallelFor<Functor, Policy, exec_space>;
+    return driver::max_tile_size_product(policy, functor);
+  }
+  template <typename Policy, typename Functor>
+  int get_mdrange_max_tile_size_product(const Policy& policy,
+                                        const Functor& functor,
+                                        const Kokkos::ParallelReduceTag&) {
+    using exec_space = typename Policy::execution_space;
+    using driver =
+        Kokkos::Impl::ParallelReduce<Functor, Policy, Kokkos::InvalidType,
+                                     exec_space>;
+    return driver::max_tile_size_product(policy, functor);
+  }
+};
+
+// when we have a complex reducer, we need to pass an
+// instance to team_size_recommended/max. Reducers
+// aren't default constructible, but they are
+// constructible from a reference to an
+// instance of their value_type so we construct
+// a value_type and temporary reducer here
+template <typename ReducerType>
+struct ComplexReducerSizeCalculator {
+  template <typename Policy, typename Functor, typename Tag>
+  int get_max_team_size(const Policy& policy, const Functor& functor,
+                        const Tag tag) {
+    using value_type = typename ReducerType::value_type;
+    value_type value;
+    ReducerType reducer_example = ReducerType(value);
+    return policy.team_size_max(functor, reducer_example, tag);
+  }
+  template <typename Policy, typename Functor, typename Tag>
+  int get_recommended_team_size(const Policy& policy, const Functor& functor,
+                                const Tag tag) {
+    using value_type = typename ReducerType::value_type;
+    value_type value;
+    ReducerType reducer_example = ReducerType(value);
+    return policy.team_size_recommended(functor, reducer_example, tag);
+  }
+  template <typename Policy, typename Functor>
+  int get_mdrange_max_tile_size_product(const Policy& policy,
+                                        const Functor& functor,
+                                        const Kokkos::ParallelReduceTag&) {
+    using exec_space = typename Policy::execution_space;
+    using driver =
+        Kokkos::Impl::ParallelReduce<Functor, Policy, ReducerType, exec_space>;
+    return driver::max_tile_size_product(policy, functor);
+  }
+};
+
+}  // namespace Impl
+
+template <class Tuner, class Functor, class TagType,
+          class TuningPermissionFunctor, class Map, class Policy>
+void generic_tune_policy(const std::string& label_in, Map& map, Policy& policy,
+                         const Functor& functor, const TagType& tag,
+                         const TuningPermissionFunctor& should_tune) {
+  if (should_tune(policy)) {
+    std::string label = label_in;
+    if (label_in.empty()) {
+      using policy_type =
+          typename std::remove_reference<decltype(policy)>::type;
+      using work_tag = typename policy_type::work_tag;
+      Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label);
+      label = name.get();
+    }
+    auto tuner_iter = [&]() {
+      auto my_tuner = map.find(label);
+      if (my_tuner == map.end()) {
+        return (map.emplace(label, Tuner(label, policy, functor, tag,
+                                         Impl::SimpleTeamSizeCalculator{}))
+                    .first);
+      }
+      return my_tuner;
+    }();
+    tuner_iter->second.tune(policy);
+  }
+}
+template <class Tuner, class ReducerType, class Functor, class TagType,
+          class TuningPermissionFunctor, class Map, class Policy>
+void generic_tune_policy(const std::string& label_in, Map& map, Policy& policy,
+                         const Functor& functor, const TagType& tag,
+                         const TuningPermissionFunctor& should_tune) {
+  if (should_tune(policy)) {
+    std::string label = label_in;
+    if (label_in.empty()) {
+      using policy_type =
+          typename std::remove_reference<decltype(policy)>::type;
+      using work_tag = typename policy_type::work_tag;
+      Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label);
+      label = name.get();
+    }
+    auto tuner_iter = [&]() {
+      auto my_tuner = map.find(label);
+      if (my_tuner == map.end()) {
+        return (map.emplace(
+                       label,
+                       Tuner(label, policy, functor, tag,
+                             Impl::ComplexReducerSizeCalculator<ReducerType>{}))
+                    .first);
+      }
+      return my_tuner;
+    }();
+    tuner_iter->second.tune(policy);
+  }
+}
+
+// tune a TeamPolicy, without reducer
+template <class Functor, class TagType, class... Properties>
+void tune_policy(const size_t /**tuning_context*/, const std::string& label_in,
+                 Kokkos::TeamPolicy<Properties...>& policy,
+                 const Functor& functor, const TagType& tag) {
+  generic_tune_policy<Experimental::TeamSizeTuner>(
+      label_in, team_tuners, policy, functor, tag,
+      [](const Kokkos::TeamPolicy<Properties...>& candidate_policy) {
+        return (candidate_policy.impl_auto_team_size() ||
+                candidate_policy.impl_auto_vector_length());
+      });
+}
+
+// tune a TeamPolicy, with reducer
+template <class ReducerType, class Functor, class TagType, class... Properties>
+void tune_policy(const size_t /**tuning_context*/, const std::string& label_in,
+                 Kokkos::TeamPolicy<Properties...>& policy,
+                 const Functor& functor, const TagType& tag) {
+  generic_tune_policy<Experimental::TeamSizeTuner, ReducerType>(
+      label_in, team_tuners, policy, functor, tag,
+      [](const Kokkos::TeamPolicy<Properties...>& candidate_policy) {
+        return (candidate_policy.impl_auto_team_size() ||
+                candidate_policy.impl_auto_vector_length());
+      });
+}
+
+// tune a MDRangePolicy, without reducer
+template <class Functor, class TagType, class... Properties>
+void tune_policy(const size_t /**tuning_context*/, const std::string& label_in,
+                 Kokkos::MDRangePolicy<Properties...>& policy,
+                 const Functor& functor, const TagType& tag) {
+  using Policy              = Kokkos::MDRangePolicy<Properties...>;
+  static constexpr int rank = Policy::rank;
+  generic_tune_policy<Experimental::MDRangeTuner<rank>>(
+      label_in, mdrange_tuners<rank>, policy, functor, tag,
+      [](const Policy& candidate_policy) {
+        return candidate_policy.impl_tune_tile_size();
+      });
+}
+
+// tune a MDRangePolicy, with reducer
+template <class ReducerType, class Functor, class TagType, class... Properties>
+void tune_policy(const size_t /**tuning_context*/, const std::string& label_in,
+                 Kokkos::MDRangePolicy<Properties...>& policy,
+                 const Functor& functor, const TagType& tag) {
+  using Policy              = Kokkos::MDRangePolicy<Properties...>;
+  static constexpr int rank = Policy::rank;
+  generic_tune_policy<Experimental::MDRangeTuner<rank>, ReducerType>(
+      label_in, mdrange_tuners<rank>, policy, functor, tag,
+      [](const Policy& candidate_policy) {
+        return candidate_policy.impl_tune_tile_size();
+      });
+}
+
+template <class ReducerType>
+struct ReductionSwitcher {
+  template <class Functor, class TagType, class ExecPolicy>
+  static void tune(const size_t tuning_context, const std::string& label,
+                   ExecPolicy& policy, const Functor& functor,
+                   const TagType& tag) {
+    if (Kokkos::tune_internals()) {
+      tune_policy<ReducerType>(tuning_context, label, policy, functor, tag);
+    }
+  }
+};
+
+template <>
+struct ReductionSwitcher<Kokkos::InvalidType> {
+  template <class Functor, class TagType, class ExecPolicy>
+  static void tune(const size_t tuning_context, const std::string& label,
+                   ExecPolicy& policy, const Functor& functor,
+                   const TagType& tag) {
+    if (Kokkos::tune_internals()) {
+      tune_policy(tuning_context, label, policy, functor, tag);
+    }
+  }
+};
+
+template <class Tuner, class Functor, class TagType,
+          class TuningPermissionFunctor, class Map, class Policy>
+void generic_report_results(const std::string& label_in, Map& map,
+                            Policy& policy, const Functor&, const TagType&,
+                            const TuningPermissionFunctor& should_tune) {
+  if (should_tune(policy)) {
+    std::string label = label_in;
+    if (label_in.empty()) {
+      using policy_type =
+          typename std::remove_reference<decltype(policy)>::type;
+      using work_tag = typename policy_type::work_tag;
+      Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label);
+      label = name.get();
+    }
+    auto tuner_iter = map[label];
+    tuner_iter.end();
+  }
+}
+
+// report results for a policy type we don't tune (do nothing)
+template <class ExecPolicy, class Functor, typename TagType>
+void report_policy_results(const size_t, const std::string&, ExecPolicy&,
+                           const Functor&, const TagType&) {}
+
+// report results for a TeamPolicy
+template <class Functor, class TagType, class... Properties>
+void report_policy_results(const size_t /**tuning_context*/,
+                           const std::string& label_in,
+                           Kokkos::TeamPolicy<Properties...>& policy,
+                           const Functor& functor, const TagType& tag) {
+  generic_report_results<Experimental::TeamSizeTuner>(
+      label_in, team_tuners, policy, functor, tag,
+      [](const Kokkos::TeamPolicy<Properties...>& candidate_policy) {
+        return (candidate_policy.impl_auto_team_size() ||
+                candidate_policy.impl_auto_vector_length());
+      });
+}
+
+// report results for an MDRangePolicy
+template <class Functor, class TagType, class... Properties>
+void report_policy_results(const size_t /**tuning_context*/,
+                           const std::string& label_in,
+                           Kokkos::MDRangePolicy<Properties...>& policy,
+                           const Functor& functor, const TagType& tag) {
+  using Policy              = Kokkos::MDRangePolicy<Properties...>;
+  static constexpr int rank = Policy::rank;
+  generic_report_results<Experimental::MDRangeTuner<rank>>(
+      label_in, mdrange_tuners<rank>, policy, functor, tag,
+      [](const Policy& candidate_policy) {
+        return candidate_policy.impl_tune_tile_size();
+      });
+}
+
+}  // namespace Impl
+
+}  // namespace Experimental
+
+namespace Impl {
+
+template <class ExecPolicy, class FunctorType>
+void begin_parallel_for(ExecPolicy& policy, FunctorType& functor,
+                        const std::string& label, uint64_t& kpID) {
+  if (Kokkos::Tools::profileLibraryLoaded()) {
+    Kokkos::Impl::ParallelConstructName<FunctorType,
+                                        typename ExecPolicy::work_tag>
+        name(label);
+    Kokkos::Tools::beginParallelFor(
+        name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()),
+        &kpID);
+  }
+#ifdef KOKKOS_ENABLE_TUNING
+  size_t context_id = Kokkos::Tools::Experimental::get_new_context_id();
+  if (Kokkos::tune_internals()) {
+    Experimental::Impl::tune_policy(context_id, label, policy, functor,
+                                    Kokkos::ParallelForTag{});
+  }
+#else
+  (void)functor;
+#endif
+}
+
+template <class ExecPolicy, class FunctorType>
+void end_parallel_for(ExecPolicy& policy, FunctorType& functor,
+                      const std::string& label, uint64_t& kpID) {
+  if (Kokkos::Tools::profileLibraryLoaded()) {
+    Kokkos::Tools::endParallelFor(kpID);
+  }
+#ifdef KOKKOS_ENABLE_TUNING
+  size_t context_id = Kokkos::Tools::Experimental::get_current_context_id();
+  if (Kokkos::tune_internals()) {
+    Experimental::Impl::report_policy_results(
+        context_id, label, policy, functor, Kokkos::ParallelForTag{});
+  }
+#else
+  (void)policy;
+  (void)functor;
+  (void)label;
+#endif
+}
+
+template <class ExecPolicy, class FunctorType>
+void begin_parallel_scan(ExecPolicy& policy, FunctorType& functor,
+                         const std::string& label, uint64_t& kpID) {
+  if (Kokkos::Tools::profileLibraryLoaded()) {
+    Kokkos::Impl::ParallelConstructName<FunctorType,
+                                        typename ExecPolicy::work_tag>
+        name(label);
+    Kokkos::Tools::beginParallelScan(
+        name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()),
+        &kpID);
+  }
+#ifdef KOKKOS_ENABLE_TUNING
+  size_t context_id = Kokkos::Tools::Experimental::get_new_context_id();
+  if (Kokkos::tune_internals()) {
+    Experimental::Impl::tune_policy(context_id, label, policy, functor,
+                                    Kokkos::ParallelScanTag{});
+  }
+#else
+  (void)functor;
+#endif
+}
+
+template <class ExecPolicy, class FunctorType>
+void end_parallel_scan(ExecPolicy& policy, FunctorType& functor,
+                       const std::string& label, uint64_t& kpID) {
+  if (Kokkos::Tools::profileLibraryLoaded()) {
+    Kokkos::Tools::endParallelScan(kpID);
+  }
+#ifdef KOKKOS_ENABLE_TUNING
+  size_t context_id = Kokkos::Tools::Experimental::get_current_context_id();
+  if (Kokkos::tune_internals()) {
+    Experimental::Impl::report_policy_results(
+        context_id, label, policy, functor, Kokkos::ParallelScanTag{});
+  }
+#else
+  (void)policy;
+  (void)functor;
+  (void)label;
+#endif
+}
+
+template <class ReducerType, class ExecPolicy, class FunctorType>
+void begin_parallel_reduce(ExecPolicy& policy, FunctorType& functor,
+                           const std::string& label, uint64_t& kpID) {
+  if (Kokkos::Tools::profileLibraryLoaded()) {
+    Kokkos::Impl::ParallelConstructName<FunctorType,
+                                        typename ExecPolicy::work_tag>
+        name(label);
+    Kokkos::Tools::beginParallelReduce(
+        name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()),
+        &kpID);
+  }
+#ifdef KOKKOS_ENABLE_TUNING
+  size_t context_id = Kokkos::Tools::Experimental::get_new_context_id();
+  Experimental::Impl::ReductionSwitcher<ReducerType>::tune(
+      context_id, label, policy, functor, Kokkos::ParallelReduceTag{});
+#else
+  (void)functor;
+#endif
+}
+
+template <class ReducerType, class ExecPolicy, class FunctorType>
+void end_parallel_reduce(ExecPolicy& policy, FunctorType& functor,
+                         const std::string& label, uint64_t& kpID) {
+  if (Kokkos::Tools::profileLibraryLoaded()) {
+    Kokkos::Tools::endParallelReduce(kpID);
+  }
+#ifdef KOKKOS_ENABLE_TUNING
+  size_t context_id = Kokkos::Tools::Experimental::get_current_context_id();
+  if (Kokkos::tune_internals()) {
+    Experimental::Impl::report_policy_results(
+        context_id, label, policy, functor, Kokkos::ParallelReduceTag{});
+  }
+#else
+  (void)policy;
+  (void)functor;
+  (void)label;
+#endif
+}
+
+}  // end namespace Impl
+
+}  // namespace Tools
+
+}  // namespace Kokkos
+
+#endif  // header guard
diff --git a/packages/kokkos/core/src/impl/Kokkos_Traits.hpp b/packages/kokkos/core/src/impl/Kokkos_Traits.hpp
index d88230f5b247829dbf6e8ee79b111cb2d1309118..aa38388acc40b3645b92dc1400c3001f8bf75f75 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Traits.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Traits.hpp
@@ -261,31 +261,6 @@ constexpr unsigned integral_power_of_two(const size_t N) {
                                      : ~0u;
 }
 
-//----------------------------------------------------------------------------
-
-template <size_t N>
-struct is_power_of_two {
-  enum type { value = (N > 0) && !(N & (N - 1)) };
-};
-
-template <size_t N, bool OK = is_power_of_two<N>::value>
-struct power_of_two;
-
-template <size_t N>
-struct power_of_two<N, true> {
-  enum type { value = 1 + power_of_two<(N >> 1), true>::value };
-};
-
-template <>
-struct power_of_two<2, true> {
-  enum type { value = 1 };
-};
-
-template <>
-struct power_of_two<1, true> {
-  enum type { value = 0 };
-};
-
 /** \brief  If power of two then return power,
  *          otherwise return ~0u.
  */
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp
index 7adbe4690d7a914a00e37ced88a4c0312a9557a1..fbda3e09318201655c34cd870dfc93a154071360 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp
@@ -355,7 +355,8 @@ class ViewMapping<Traits, Kokkos::Array<>> {
 
     using execution_space = typename alloc_prop::execution_space;
     using memory_space    = typename Traits::memory_space;
-    using functor_type    = ViewValueFunctor<execution_space, scalar_type>;
+    using functor_type =
+        ViewValueFunctor<typename Traits::device_type, scalar_type>;
     using record_type =
         Kokkos::Impl::SharedAllocationRecord<memory_space, functor_type>;
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewCtor.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewCtor.hpp
index 797b3f584b6234b290a809e7f1c502e04249f058..cc3953c05e3292b1f9f45d0d21d014a06f40ddab 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewCtor.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewCtor.hpp
@@ -55,6 +55,18 @@ struct WithoutInitializing_t {};
 struct AllowPadding_t {};
 struct NullSpace_t {};
 
+template <typename>
+struct is_view_ctor_property : public std::false_type {};
+
+template <>
+struct is_view_ctor_property<WithoutInitializing_t> : public std::true_type {};
+
+template <>
+struct is_view_ctor_property<AllowPadding_t> : public std::true_type {};
+
+template <>
+struct is_view_ctor_property<NullSpace_t> : public std::true_type {};
+
 //----------------------------------------------------------------------------
 /**\brief Whether a type can be used for a view label */
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
index 9523118748f09907933764aea6ca94a085e68a9d..09f7af09186966dd797e8c6d6b19b3e735fa4d73 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
@@ -642,25 +642,21 @@ struct SubviewExtents {
     error(buf + n, buf_len - n, domain_rank + 1, range_rank + 1, dim, args...);
   }
 
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
   template <size_t... DimArgs, class... Args>
   KOKKOS_FORCEINLINE_FUNCTION void error(const ViewDimension<DimArgs...>& dim,
                                          Args... args) const {
-    enum { LEN = 1024 };
-    char buffer[LEN];
+    KOKKOS_IF_ON_HOST(
+        (enum {LEN = 1024}; char buffer[LEN];
 
-    const int n = snprintf(buffer, LEN, "Kokkos::subview bounds error (");
-    error(buffer + n, LEN - n, 0, 0, dim, args...);
+         const int n = snprintf(buffer, LEN, "Kokkos::subview bounds error (");
+         error(buffer + n, LEN - n, 0, 0, dim, args...);
 
-    Kokkos::Impl::throw_runtime_exception(std::string(buffer));
-  }
-#else
-  template <size_t... DimArgs, class... Args>
-  KOKKOS_FORCEINLINE_FUNCTION void error(const ViewDimension<DimArgs...>&,
-                                         Args...) const {
-    Kokkos::abort("Kokkos::subview bounds error");
+         Kokkos::Impl::throw_runtime_exception(std::string(buffer));))
+
+    KOKKOS_IF_ON_DEVICE(((void)dim;
+                         Kokkos::abort("Kokkos::subview bounds error");
+                         [](Args...) {}(args...);))
   }
-#endif
 
 #else
 
@@ -2449,8 +2445,10 @@ struct ViewOffset<Dimension, Kokkos::LayoutStride, void> {
   /* Cardinality of the domain index space */
   KOKKOS_INLINE_FUNCTION
   constexpr size_type size() const {
-    return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
-           m_dim.N6 * m_dim.N7;
+    return dimension_type::rank == 0
+               ? 1
+               : m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
+                     m_dim.N5 * m_dim.N6 * m_dim.N7;
   }
 
  private:
@@ -2463,16 +2461,19 @@ struct ViewOffset<Dimension, Kokkos::LayoutStride, void> {
   /* Span of the range space, largest stride * dimension */
   KOKKOS_INLINE_FUNCTION
   constexpr size_type span() const {
-    return size() == size_type(0)
-               ? size_type(0)
-               : Max(m_dim.N0 * m_stride.S0,
-                     Max(m_dim.N1 * m_stride.S1,
-                         Max(m_dim.N2 * m_stride.S2,
-                             Max(m_dim.N3 * m_stride.S3,
-                                 Max(m_dim.N4 * m_stride.S4,
-                                     Max(m_dim.N5 * m_stride.S5,
-                                         Max(m_dim.N6 * m_stride.S6,
-                                             m_dim.N7 * m_stride.S7)))))));
+    return dimension_type::rank == 0
+               ? 1
+               : (size() == size_type(0)
+                      ? size_type(0)
+                      : Max(m_dim.N0 * m_stride.S0,
+                            Max(m_dim.N1 * m_stride.S1,
+                                Max(m_dim.N2 * m_stride.S2,
+                                    Max(m_dim.N3 * m_stride.S3,
+                                        Max(m_dim.N4 * m_stride.S4,
+                                            Max(m_dim.N5 * m_stride.S5,
+                                                Max(m_dim.N6 * m_stride.S6,
+                                                    m_dim.N7 *
+                                                        m_stride.S7))))))));
   }
 
   KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
@@ -2930,10 +2931,11 @@ struct ViewValueFunctor<DeviceType, ValueType, false /* is_scalar */> {
       uint64_t kpID = 0;
       if (Kokkos::Profiling::profileLibraryLoaded()) {
         functor_name =
-            (destroy ? "Kokkos::View::destruction [" + name + "]"
-                     : "Kokkos::View::initialization [" + name + "]");
-        Kokkos::Tools::Impl::begin_parallel_for(policy, *this, functor_name,
-                                                kpID);
+            (destroy ? "Kokkos::View::destruction [" + functor_name + "]"
+                     : "Kokkos::View::initialization [" + functor_name + "]");
+        Kokkos::Profiling::beginParallelFor(
+            "Kokkos::View::initialization [" + functor_name + "]",
+            Kokkos::Profiling::Experimental::device_id(space), &kpID);
       }
 
 #ifdef KOKKOS_ENABLE_CUDA
@@ -2947,8 +2949,7 @@ struct ViewValueFunctor<DeviceType, ValueType, false /* is_scalar */> {
       closure.execute();
       space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence");
       if (Kokkos::Profiling::profileLibraryLoaded()) {
-        Kokkos::Tools::Impl::end_parallel_for(policy, *this, functor_name,
-                                              kpID);
+        Kokkos::Profiling::endParallelFor(kpID);
       }
     } else {
       for (size_t i = 0; i < n; ++i) operator()(i);
@@ -3027,8 +3028,9 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> {
       std::string functor_name = "Kokkos::View::initialization [" + name + "]";
       uint64_t kpID            = 0;
       if (Kokkos::Profiling::profileLibraryLoaded()) {
-        Kokkos::Tools::Impl::begin_parallel_for(policy, *this, functor_name,
-                                                kpID);
+        Kokkos::Profiling::beginParallelFor(
+            "Kokkos::View::initialization [" + name + "]",
+            Kokkos::Profiling::Experimental::device_id(space), &kpID);
       }
 #ifdef KOKKOS_ENABLE_CUDA
       if (std::is_same<ExecSpace, Kokkos::Cuda>::value) {
@@ -3042,8 +3044,7 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> {
       space.fence(
           "Kokkos::Impl::ViewValueFunctor: Fence after setting values in view");
       if (Kokkos::Profiling::profileLibraryLoaded()) {
-        Kokkos::Tools::Impl::end_parallel_for(policy, *this, functor_name,
-                                              kpID);
+        Kokkos::Profiling::endParallelFor(kpID);
       }
     } else {
       for (size_t i = 0; i < n; ++i) operator()(i);
@@ -3901,8 +3902,6 @@ inline void view_error_operator_bounds(char* buf, int len, const MapType& map,
   view_error_operator_bounds<R + 1>(buf + n, len - n, map, args...);
 }
 
-#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-
 /* Check #3: is the View managed as determined by the MemoryTraits? */
 template <class MapType, bool is_managed = (MapType::is_managed != 0)>
 struct OperatorBoundsErrorOnDevice;
@@ -3958,34 +3957,29 @@ KOKKOS_FUNCTION
   OperatorBoundsErrorOnDevice<Map>::run(map);
 }
 
-#endif  // ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-
 template <class MemorySpace, class ViewType, class MapType, class... Args>
 KOKKOS_INLINE_FUNCTION void view_verify_operator_bounds(
     Kokkos::Impl::ViewTracker<ViewType> const& tracker, const MapType& map,
     Args... args) {
   if (!view_verify_operator_bounds<0>(map, args...)) {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    enum { LEN = 1024 };
-    char buffer[LEN];
-    const std::string label =
-        tracker.m_tracker.template get_label<MemorySpace>();
-    int n =
-        snprintf(buffer, LEN, "View bounds error of view %s (", label.c_str());
-    view_error_operator_bounds<0>(buffer + n, LEN - n, map, args...);
-    Kokkos::Impl::throw_runtime_exception(std::string(buffer));
-#else
-    /* Check #1: is there a SharedAllocationRecord?
-       (we won't use it, but if its not there then there isn't
-        a corresponding SharedAllocationHeader containing a label).
-       This check should cover the case of Views that don't
-       have the Unmanaged trait but were initialized by pointer. */
-    if (tracker.m_tracker.has_record()) {
-      operator_bounds_error_on_device(map);
-    } else {
-      Kokkos::abort("View bounds error");
-    }
-#endif
+    KOKKOS_IF_ON_HOST(
+        (enum {LEN = 1024}; char buffer[LEN];
+         const std::string label =
+             tracker.m_tracker.template get_label<MemorySpace>();
+         int n = snprintf(buffer, LEN, "View bounds error of view %s (",
+                          label.c_str());
+         view_error_operator_bounds<0>(buffer + n, LEN - n, map, args...);
+         Kokkos::Impl::throw_runtime_exception(std::string(buffer));))
+
+    KOKKOS_IF_ON_DEVICE((
+        /* Check #1: is there a SharedAllocationRecord?
+           (we won't use it, but if its not there then there isn't
+            a corresponding SharedAllocationHeader containing a label).
+           This check should cover the case of Views that don't
+           have the Unmanaged trait but were initialized by pointer. */
+        if (tracker.m_tracker.has_record()) {
+          operator_bounds_error_on_device(map);
+        } else { Kokkos::abort("View bounds error"); }))
   }
 }
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp
index 9cfe9d79144dbcf7457b50b300f691a7a8b6504e..fe3651886bd138a0662e4799bcef8950ee4e1e26 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp
@@ -91,30 +91,24 @@ struct ViewTracker {
 
   template <class RT, class... RP>
   KOKKOS_INLINE_FUNCTION void assign(const View<RT, RP...>& vt) noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    if (view_traits::is_managed &&
-        Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enabled()) {
-      m_tracker.assign_direct(vt.m_track.m_tracker);
-    } else {
-      m_tracker.assign_force_disable(vt.m_track.m_tracker);
-    }
-#else
-    m_tracker.assign_force_disable(vt.m_track.m_tracker);
-#endif
+    KOKKOS_IF_ON_HOST((
+        if (view_traits::is_managed && Kokkos::Impl::SharedAllocationRecord<
+                                           void, void>::tracking_enabled()) {
+          m_tracker.assign_direct(vt.m_track.m_tracker);
+        } else { m_tracker.assign_force_disable(vt.m_track.m_tracker); }))
+
+    KOKKOS_IF_ON_DEVICE((m_tracker.assign_force_disable(vt.m_track.m_tracker);))
   }
 
-  KOKKOS_INLINE_FUNCTION
-  ViewTracker& operator=(const ViewTracker& rhs) noexcept {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-    if (view_traits::is_managed &&
-        Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enabled()) {
-      m_tracker.assign_direct(rhs.m_tracker);
-    } else {
-      m_tracker.assign_force_disable(rhs.m_tracker);
-    }
-#else
-    m_tracker.assign_force_disable(rhs.m_tracker);
-#endif
+  KOKKOS_INLINE_FUNCTION ViewTracker& operator=(
+      const ViewTracker& rhs) noexcept {
+    KOKKOS_IF_ON_HOST((
+        if (view_traits::is_managed && Kokkos::Impl::SharedAllocationRecord<
+                                           void, void>::tracking_enabled()) {
+          m_tracker.assign_direct(rhs.m_tracker);
+        } else { m_tracker.assign_force_disable(rhs.m_tracker); }))
+
+    KOKKOS_IF_ON_DEVICE((m_tracker.assign_force_disable(rhs.m_tracker);))
     return *this;
   }
 
diff --git a/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp b/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp
index 1913e508236a6ead07d3a4aaf684eef412f978ba..8551856aa89ae8fa9fddbf0f5a83bdaa25e297b4 100644
--- a/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp
+++ b/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp
@@ -88,9 +88,6 @@
 #define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__
 #endif
 
-#if defined(__NVCC__)
-#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
-#endif
 #else  // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
 #undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
 #endif  // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
diff --git a/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp b/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp
index d964baa8fb0f5e1b105d244740b74e32d1bdd69e..32236e963d563be07c010d6a520a745d2d977eb5 100644
--- a/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp
+++ b/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp
@@ -48,26 +48,11 @@
 #include <CL/sycl.hpp>
 
 #ifdef __SYCL_DEVICE_ONLY__
-#ifdef KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF
-namespace Kokkos {
-namespace ImplSYCL {
-template <typename... Args>
-void sink(Args&&... args) {
-  (void)(sizeof...(args));
-}
-}  // namespace ImplSYCL
-}  // namespace Kokkos
-#define KOKKOS_IMPL_DO_NOT_USE_PRINTF(...) \
-  do {                                     \
-    Kokkos::ImplSYCL::sink(__VA_ARGS__);   \
-  } while (0)
-#else
 #define KOKKOS_IMPL_DO_NOT_USE_PRINTF(format, ...)                \
   do {                                                            \
     const __attribute__((opencl_constant)) char fmt[] = (format); \
-    sycl::ONEAPI::experimental::printf(fmt, ##__VA_ARGS__);       \
+    sycl::ext::oneapi::experimental::printf(fmt, ##__VA_ARGS__);  \
   } while (0)
 #endif
-#endif
 
 #endif
diff --git a/packages/kokkos/core/src/traits/Kokkos_ExecutionSpaceTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_ExecutionSpaceTrait.hpp
index e12d1f6a49d37f4f595214be00713c2ccb4166ef..cb2808698a279fbe9718d149d44af20c5a8dd0ef 100644
--- a/packages/kokkos/core/src/traits/Kokkos_ExecutionSpaceTrait.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_ExecutionSpaceTrait.hpp
@@ -66,6 +66,7 @@ struct ExecutionSpaceTrait : TraitSpecificationBase<ExecutionSpaceTrait> {
     static constexpr auto execution_space_is_defaulted = true;
 
     using execution_space = Kokkos::DefaultExecutionSpace;
+    KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
   };
   template <class T>
   using trait_matches_specification = Kokkos::is_execution_space<T>;
diff --git a/packages/kokkos/core/src/traits/Kokkos_GraphKernelTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_GraphKernelTrait.hpp
index b57dfbbc07ccc0e2391b2fdb5b6ec577ed552cc2..b16a7777cd598bf4326627346886b65cd22c45fe 100644
--- a/packages/kokkos/core/src/traits/Kokkos_GraphKernelTrait.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_GraphKernelTrait.hpp
@@ -60,6 +60,7 @@ namespace Impl {
 struct GraphKernelTrait : TraitSpecificationBase<GraphKernelTrait> {
   struct base_traits {
     using is_graph_kernel = std::false_type;
+    KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
   };
   template <class, class AnalyzeNextTrait>
   struct mixin_matching_trait : AnalyzeNextTrait {
diff --git a/packages/kokkos/core/src/traits/Kokkos_IndexTypeTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_IndexTypeTrait.hpp
index 63446375fbd529e82d71acf2bac5ef12fba238af..57f74d521b8e181ad3a1f3f7896c92d577a16f7c 100644
--- a/packages/kokkos/core/src/traits/Kokkos_IndexTypeTrait.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_IndexTypeTrait.hpp
@@ -67,6 +67,7 @@ struct IndexTypeTrait : TraitSpecificationBase<IndexTypeTrait> {
   struct base_traits {
     static constexpr bool index_type_is_defaulted = true;
     using index_type = dependent_policy_trait_default;
+    KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
   };
   template <class IdxType, class AnalyzeNextTrait>
   using mixin_matching_trait = IndexTypePolicyMixin<IdxType, AnalyzeNextTrait>;
diff --git a/packages/kokkos/core/src/traits/Kokkos_IterationPatternTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_IterationPatternTrait.hpp
index b05f3b29e976c503c120c4f59dc4ed81b01822f7..3c8ba47417252f35a31995fe4c671482c11b5756 100644
--- a/packages/kokkos/core/src/traits/Kokkos_IterationPatternTrait.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_IterationPatternTrait.hpp
@@ -65,6 +65,7 @@ struct show_extra_iteration_pattern_erroneously_given_to_execution_policy<
 struct IterationPatternTrait : TraitSpecificationBase<IterationPatternTrait> {
   struct base_traits {
     using iteration_pattern = void;  // TODO set default iteration pattern
+    KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
   };
   template <class IterPattern, class AnalyzeNextTrait>
   struct mixin_matching_trait : AnalyzeNextTrait {
diff --git a/packages/kokkos/core/src/traits/Kokkos_LaunchBoundsTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_LaunchBoundsTrait.hpp
index 06836bef8bff6ffc19d470766e0caa0a739a43c2..c20a883ddab7298b76c709e064134e0cefc95366 100644
--- a/packages/kokkos/core/src/traits/Kokkos_LaunchBoundsTrait.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_LaunchBoundsTrait.hpp
@@ -61,6 +61,7 @@ struct LaunchBoundsTrait : TraitSpecificationBase<LaunchBoundsTrait> {
     static constexpr bool launch_bounds_is_defaulted = true;
 
     using launch_bounds = LaunchBounds<>;
+    KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
   };
   template <class LaunchBoundParam, class AnalyzeNextTrait>
   struct mixin_matching_trait : AnalyzeNextTrait {
diff --git a/packages/kokkos/core/src/traits/Kokkos_OccupancyControlTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_OccupancyControlTrait.hpp
index 73be14cf8501b3c3bff4a2386e2f75e1ffb00f19..b3328f8120cfce8018b579c3281ea3173cff24da 100644
--- a/packages/kokkos/core/src/traits/Kokkos_OccupancyControlTrait.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_OccupancyControlTrait.hpp
@@ -96,6 +96,7 @@ struct OccupancyControlTrait : TraitSpecificationBase<OccupancyControlTrait> {
     static constexpr occupancy_control impl_get_occupancy_control() {
       return occupancy_control{};
     }
+    KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
   };
   template <class OccControl, class AnalyzeNextTrait>
   using mixin_matching_trait =
diff --git a/packages/kokkos/core/src/traits/Kokkos_ScheduleTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_ScheduleTrait.hpp
index 3e578f9060ab22b8707adc8797d401226a52ff44..311ab1a47d798d40f4d916f49d37b60b59f0fb0b 100644
--- a/packages/kokkos/core/src/traits/Kokkos_ScheduleTrait.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_ScheduleTrait.hpp
@@ -66,6 +66,7 @@ struct ScheduleTrait : TraitSpecificationBase<ScheduleTrait> {
     static constexpr auto schedule_type_is_defaulted = true;
 
     using schedule_type = Schedule<Static>;
+    KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
   };
   template <class Sched, class AnalyzeNextTrait>
   struct mixin_matching_trait : AnalyzeNextTrait {
diff --git a/packages/kokkos/core/src/traits/Kokkos_Traits_fwd.hpp b/packages/kokkos/core/src/traits/Kokkos_Traits_fwd.hpp
index b8289ca6188846884277ca514db453145f1cb3c6..c6b4fe41d90f618c20b42727195d569b8cc54d9e 100644
--- a/packages/kokkos/core/src/traits/Kokkos_Traits_fwd.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_Traits_fwd.hpp
@@ -45,6 +45,15 @@
 #ifndef KOKKOS_KOKKOS_TRAITS_FWD_HPP
 #define KOKKOS_KOKKOS_TRAITS_FWD_HPP
 
+// Without this the CUDA side does proper EBO while MSVC doesn't
+// leading to mismatched sizes of the driver objects (CudaParallel)
+// leading to illegal memory accesses etc on device
+#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA)
+#define KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND char dummy;
+#else
+#define KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
+#endif
+
 namespace Kokkos {
 namespace Impl {
 
diff --git a/packages/kokkos/core/src/traits/Kokkos_WorkItemPropertyTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_WorkItemPropertyTrait.hpp
index 35671d19b02bb72c777b77717beced94d152beb3..edc488a548bb1cbf920344f898e35228d9c7bea3 100644
--- a/packages/kokkos/core/src/traits/Kokkos_WorkItemPropertyTrait.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_WorkItemPropertyTrait.hpp
@@ -59,6 +59,7 @@ namespace Impl {
 struct WorkItemPropertyTrait : TraitSpecificationBase<WorkItemPropertyTrait> {
   struct base_traits {
     using work_item_property = Kokkos::Experimental::WorkItemProperty::None_t;
+    KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
   };
   template <class WorkItemProp, class AnalyzeNextTrait>
   struct mixin_matching_trait : AnalyzeNextTrait {
diff --git a/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp
index 424e5c405b70cff9f73ef5756b5dca41e9d3d618..7bd96ab53c784d186d606727d1ea81938f70ab74 100644
--- a/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp
+++ b/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp
@@ -77,6 +77,7 @@ struct _trait_matches_spec_predicate {
 struct WorkTagTrait : TraitSpecificationBase<WorkTagTrait> {
   struct base_traits {
     using work_tag = void;
+    KOKKOS_IMPL_MSVC_NVCC_EBO_WORKAROUND
   };
   template <class WorkTag, class AnalyzeNextTrait>
   struct mixin_matching_trait : AnalyzeNextTrait {
diff --git a/packages/kokkos/core/unit_test/CMakeLists.txt b/packages/kokkos/core/unit_test/CMakeLists.txt
index 89b8ff1e4f0a8004ecd4c2f06d72544123107d03..0d968b89f6793d0be26f700f7ae9d488916c77bf 100644
--- a/packages/kokkos/core/unit_test/CMakeLists.txt
+++ b/packages/kokkos/core/unit_test/CMakeLists.txt
@@ -2,28 +2,32 @@
 # Add test-only library for gtest to be reused by all the subpackages
 #
 
+IF(NOT GTest_FOUND)  # fallback to internal gtest
+  SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest)
 
-SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest)
-
-#need here for tribits
-KOKKOS_INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR})
-KOKKOS_ADD_TEST_LIBRARY(
-  kokkos_gtest
-  HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
-  SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
-)
+  #need here for tribits
+  KOKKOS_INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR})
+  KOKKOS_ADD_TEST_LIBRARY(
+    kokkos_gtest
+    HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
+    SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
+  )
 
-# avoid deprecation warnings from MSVC
-TARGET_COMPILE_DEFINITIONS(kokkos_gtest PUBLIC GTEST_HAS_TR1_TUPLE=0 GTEST_HAS_PTHREAD=0)
+  TARGET_INCLUDE_DIRECTORIES(kokkos_gtest PUBLIC ${GTEST_SOURCE_DIR})
+  IF((NOT (Kokkos_ENABLE_CUDA AND WIN32)) AND (NOT ("${KOKKOS_CXX_COMPILER_ID}" STREQUAL "Fujitsu")))
+    TARGET_COMPILE_FEATURES(kokkos_gtest PUBLIC cxx_std_14)
+  ENDIF()
 
-TARGET_INCLUDE_DIRECTORIES(kokkos_gtest PUBLIC ${GTEST_SOURCE_DIR})
-IF((NOT (Kokkos_ENABLE_CUDA AND WIN32)) AND (NOT ("${KOKKOS_CXX_COMPILER_ID}" STREQUAL "Fujitsu")))
-  TARGET_COMPILE_FEATURES(kokkos_gtest PUBLIC cxx_std_14)
-ENDIF()
+  # Suppress clang-tidy diagnostics on code that we do not have control over
+  IF(CMAKE_CXX_CLANG_TIDY)
+    SET_TARGET_PROPERTIES(kokkos_gtest PROPERTIES CXX_CLANG_TIDY "")
+  ENDIF()
 
-# Suppress clang-tidy diagnostics on code that we do not have control over
-IF(CMAKE_CXX_CLANG_TIDY)
-  SET_TARGET_PROPERTIES(kokkos_gtest PROPERTIES CXX_CLANG_TIDY "")
+  FIND_PACKAGE(Threads QUIET)
+  IF(TARGET Threads::Threads)
+    SET_TARGET_PROPERTIES(kokkos_gtest PROPERTIES
+                          INTERFACE_LINK_LIBRARIES Threads::Threads)
+  ENDIF()
 ENDIF()
 
 #
@@ -81,12 +85,7 @@ KOKKOS_ADD_EXECUTABLE(
 )
 
 foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL)
-  # Because there is always an exception to the rule
-  if(Tag STREQUAL "Threads")
-    set(DEVICE "PTHREAD")
-  else()
-    string(TOUPPER ${Tag} DEVICE)
-  endif()
+  string(TOUPPER ${Tag} DEVICE)
   string(TOLOWER ${Tag} dir)
 
   if(Kokkos_ENABLE_${DEVICE})
@@ -106,6 +105,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL)
         AtomicOperations_float
         AtomicOperations_complexdouble
         AtomicOperations_complexfloat
+        AtomicOperations_shared
         AtomicViews
         Atomics
         BlockSizeDeduction
@@ -117,6 +117,8 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL)
         FunctorAnalysis
         Init
         LocalDeepCopy
+        MinMaxClamp
+        MathematicalConstants
         MathematicalFunctions
         MDRange_a
         MDRange_b
@@ -143,6 +145,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL)
         MDRange_d
         MDRange_e
         MDRange_f
+        MDRange_g
         NumericTraits
         Other
         RangePolicy
@@ -152,6 +155,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL)
         Reducers_b
         Reducers_c
         Reducers_d
+        Reducers_e
         Reductions_DeviceView
         Scan
         SharedAlloc
@@ -258,6 +262,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL)
       SubView_c12
       SubView_c13
       SubView_c14
+      WithoutInitializing
       )
       set(file ${dir}/Test${Tag}_${Name}.cpp)
       # Write to a temporary intermediate file and call configure_file to avoid
@@ -331,6 +336,16 @@ if(Kokkos_ENABLE_OPENMPTARGET)
     )
 endif()
 
+# FIXME_OPENMPTARGET - Comment non-passing tests with amdclang++
+IF(KOKKOS_ARCH_VEGA906 OR KOKKOS_ARCH_VEGA908 OR KOKKOS_ARCH_VEGA90A)
+  SET(KOKKOS_AMDGPU_ARCH TRUE)
+ENDIF()
+IF(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_AMDGPU_ARCH)
+  LIST(REMOVE_ITEM OpenMPTarget_SOURCES
+    ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_e.cpp
+  )
+ENDIF()
+
 # FIXME_OPENMPTARGET - Comment non-passing tests with the NVIDIA HPC compiler nvc++
 IF(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
   list(REMOVE_ITEM OpenMPTarget_SOURCES
@@ -364,6 +379,7 @@ IF(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
     ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_b.cpp
     ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_c.cpp
     ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_d.cpp
+    ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_e.cpp
     ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewMapping_b.cpp
     ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TeamBasic.cpp
     ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Scan.cpp
@@ -423,7 +439,7 @@ if(Kokkos_ENABLE_SERIAL)
   )
 endif()
 
-if(Kokkos_ENABLE_PTHREAD)
+if(Kokkos_ENABLE_THREADS)
   KOKKOS_ADD_EXECUTABLE_AND_TEST(
     UnitTest_Threads
     SOURCES ${Threads_SOURCES}
@@ -514,6 +530,7 @@ if(Kokkos_ENABLE_CUDA)
     SOURCES
       UnitTestMainInit.cpp
       ${Cuda_SOURCES1}
+      cuda/TestCuda_ReducerViewSizeLimit.cpp
     )
 
     KOKKOS_ADD_EXECUTABLE_AND_TEST(
@@ -690,6 +707,7 @@ else()
     default/TestDefaultDeviceType_c3.cpp
     default/TestDefaultDeviceType_d.cpp
     default/TestDefaultDeviceTypeResize.cpp
+    default/TestDefaultDeviceTypeViewAPI.cpp
   )
 endif()
 
@@ -730,7 +748,6 @@ KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate
       "PASSED: I am the custom std::terminate handler."
     ALWAYS_FAIL_ON_ZERO_RETURN
 )
-
   if(KOKKOS_ENABLE_TUNING)
     KOKKOS_ADD_EXECUTABLE_AND_TEST(
       UnitTest_TuningBuiltins
@@ -755,13 +772,26 @@ KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate
       tools/TestLogicalSpaces.cpp
   )
   endif()
+  if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL Intel AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 18.0.0))
   KOKKOS_ADD_EXECUTABLE_AND_TEST(
-    UnitTest_EventCorrectness
+    UnitTest_KokkosP
     SOURCES
+    UnitTestMainInit.cpp
     tools/TestEventCorrectness.cpp
+    tools/TestWithoutInitializing.cpp
+    tools/TestProfilingSection.cpp
   )
+  endif()
   if(KOKKOS_ENABLE_LIBDL)
-
+    KOKKOS_ADD_EXECUTABLE_AND_TEST(
+      UnitTest_ToolIndependence
+      SOURCES
+      tools/TestIndependence.cpp
+    )
+    TARGET_COMPILE_DEFINITIONS(
+      KokkosCore_UnitTest_ToolIndependence PUBLIC
+      KOKKOS_TOOLS_INDEPENDENT_BUILD
+    )
     KOKKOS_ADD_TEST_LIBRARY(
       kokkosprinter-tool SHARED
       SOURCES tools/printing-tool.cpp
@@ -807,7 +837,7 @@ KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate
       EXE  ProfilingAllCalls
       TOOL kokkosprinter-tool
       ARGS --kokkos-tools-args="-c test delimit"
-      PASS_REGULAR_EXPRESSION "kokkosp_init_library::kokkosp_parse_args:4:KokkosCore_ProfilingAllCalls:-c:test:delimit::.*::kokkosp_allocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]source] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_allocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]destination] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_begin_deep_copy:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_end_deep_copy::kokkosp_begin_parallel_for:parallel_for:${SIZE_REGEX}:0::kokkosp_end_parallel_for:0::kokkosp_begin_parallel_reduce:parallel_reduce:${SIZE_REGEX}:1${SKIP_SCRATCH_INITIALIZATION_REGEX}::kokkosp_end_parallel_reduce:1::kokkosp_begin_parallel_scan:parallel_scan:${SIZE_REGEX}:2::kokkosp_end_parallel_scan:2::kokkosp_push_profile_region:push_region::kokkosp_pop_profile_region::kokkosp_create_profile_section:created_section:3::kokkosp_start_profile_section:3::kokkosp_stop_profile_section:3::kokkosp_destroy_profile_section:3::kokkosp_profile_event:profiling_event::kokkosp_declare_metadata:dogs:good::kokkosp_deallocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_deallocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_finalize_library::"
+      PASS_REGULAR_EXPRESSION "kokkosp_init_library::kokkosp_parse_args:4:KokkosCore_ProfilingAllCalls:-c:test:delimit::.*::kokkosp_allocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]source] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_allocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]destination] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_begin_deep_copy:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::.*kokkosp_end_deep_copy::kokkosp_begin_parallel_for:parallel_for:${SIZE_REGEX}:0::kokkosp_end_parallel_for:0::kokkosp_begin_parallel_reduce:parallel_reduce:${SIZE_REGEX}:1${SKIP_SCRATCH_INITIALIZATION_REGEX}::kokkosp_end_parallel_reduce:1::kokkosp_begin_parallel_scan:parallel_scan:${SIZE_REGEX}:2::kokkosp_end_parallel_scan:2::kokkosp_push_profile_region:push_region::kokkosp_pop_profile_region::kokkosp_create_profile_section:created_section:3::kokkosp_start_profile_section:3::kokkosp_stop_profile_section:3::kokkosp_destroy_profile_section:3::kokkosp_profile_event:profiling_event::kokkosp_declare_metadata:dogs:good::kokkosp_deallocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_deallocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_finalize_library::"
     )
 
     # Above will test that leading/trailing quotes are stripped bc ctest cmd args is:
@@ -824,7 +854,7 @@ KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate
       EXE  ProfilingAllCalls
       ARGS [=[--kokkos-tools-args=-c test delimit]=]
             --kokkos-tools-library=$<TARGET_FILE:kokkosprinter-tool>
-      PASS_REGULAR_EXPRESSION "kokkosp_init_library::kokkosp_parse_args:4:KokkosCore_ProfilingAllCalls:-c:test:delimit::.*::kokkosp_allocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]source] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_allocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]destination] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_begin_deep_copy:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_end_deep_copy::kokkosp_begin_parallel_for:parallel_for:${SIZE_REGEX}:0::kokkosp_end_parallel_for:0::kokkosp_begin_parallel_reduce:parallel_reduce:${SIZE_REGEX}:1${SKIP_SCRATCH_INITIALIZATION_REGEX}::kokkosp_end_parallel_reduce:1::kokkosp_begin_parallel_scan:parallel_scan:${SIZE_REGEX}:2::kokkosp_end_parallel_scan:2::kokkosp_push_profile_region:push_region::kokkosp_pop_profile_region::kokkosp_create_profile_section:created_section:3::kokkosp_start_profile_section:3::kokkosp_stop_profile_section:3::kokkosp_destroy_profile_section:3::kokkosp_profile_event:profiling_event::kokkosp_declare_metadata:dogs:good::kokkosp_deallocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_deallocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_finalize_library::"
+      PASS_REGULAR_EXPRESSION "kokkosp_init_library::kokkosp_parse_args:4:KokkosCore_ProfilingAllCalls:-c:test:delimit::.*::kokkosp_allocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]source] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_allocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]destination] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_begin_deep_copy:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::.*kokkosp_end_deep_copy::kokkosp_begin_parallel_for:parallel_for:${SIZE_REGEX}:0::kokkosp_end_parallel_for:0::kokkosp_begin_parallel_reduce:parallel_reduce:${SIZE_REGEX}:1${SKIP_SCRATCH_INITIALIZATION_REGEX}::kokkosp_end_parallel_reduce:1::kokkosp_begin_parallel_scan:parallel_scan:${SIZE_REGEX}:2::kokkosp_end_parallel_scan:2::kokkosp_push_profile_region:push_region::kokkosp_pop_profile_region::kokkosp_create_profile_section:created_section:3::kokkosp_start_profile_section:3::kokkosp_stop_profile_section:3::kokkosp_destroy_profile_section:3::kokkosp_profile_event:profiling_event::kokkosp_declare_metadata:dogs:good::kokkosp_deallocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_deallocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_finalize_library::"
     )
   endif() #KOKKOS_ENABLE_LIBDL
 if(NOT KOKKOS_HAS_TRILINOS)
@@ -923,6 +953,6 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST(
   ARGS "one 2 THREE"
 )
 
-if (KOKKOS_ENABLE_HEADER_SELF_CONTAINMENT_TESTS AND NOT KOKKOS_HAS_TRILINOS)
+if (KOKKOS_ENABLE_HEADER_SELF_CONTAINMENT_TESTS AND NOT KOKKOS_HAS_TRILINOS AND NOT WIN32)
   add_subdirectory(headers_self_contained)
 endif()
diff --git a/packages/kokkos/core/unit_test/Makefile b/packages/kokkos/core/unit_test/Makefile
index 422628221402586ec4829ad5d8b628cbdd3736b1..570cee0227ffea55119c604eef92c06b60478c50 100644
--- a/packages/kokkos/core/unit_test/Makefile
+++ b/packages/kokkos/core/unit_test/Makefile
@@ -62,7 +62,7 @@ else
    STACK_TRACE_TERMINATE_FILTER :=
 endif
 
-TESTS = AtomicOperations_int AtomicOperations_unsignedint AtomicOperations_longint AtomicOperations_unsignedlongint AtomicOperations_longlongint AtomicOperations_double AtomicOperations_float AtomicOperations_complexdouble AtomicOperations_complexfloat AtomicViews Atomics BlockSizeDeduction Concepts Complex Crs DeepCopyAlignment FunctorAnalysis Init LocalDeepCopy MDRange_a MDRange_b MDRange_c MDRange_d MDRange_e MDRange_f Other RangePolicy RangePolicyRequire Reductions Reducers_a Reducers_b Reducers_c Reducers_d Reductions_DeviceView Scan SharedAlloc TeamBasic TeamReductionScan TeamScratch TeamTeamSize TeamVectorRange UniqueToken ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewLayoutStrideAssignment ViewMapping_a ViewMapping_b ViewMapping_subview ViewOfClass WorkGraph View_64bit ViewResize
+TESTS = AtomicOperations_int AtomicOperations_unsignedint AtomicOperations_longint AtomicOperations_unsignedlongint AtomicOperations_longlongint AtomicOperations_double AtomicOperations_float AtomicOperations_complexdouble AtomicOperations_complexfloat AtomicViews Atomics BlockSizeDeduction Concepts Complex Crs DeepCopyAlignment FunctorAnalysis Init LocalDeepCopy MDRange_a MDRange_b MDRange_c MDRange_d MDRange_e MDRange_f Other RangePolicy RangePolicyRequire Reductions Reducers_a Reducers_b Reducers_c Reducers_d Reducers_e Reductions_DeviceView Scan SharedAlloc TeamBasic TeamReductionScan TeamScratch TeamTeamSize TeamVectorRange UniqueToken ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewLayoutStrideAssignment ViewMapping_a ViewMapping_b ViewMapping_subview ViewOfClass WorkGraph View_64bit ViewResize
 
 tmp := $(foreach device, $(KOKKOS_DEVICELIST), \
   tmp2 := $(foreach test, $(TESTS), \
@@ -129,7 +129,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
     OBJ_CUDA += TestCuda_SubView_c13.o
     OBJ_CUDA += TestCuda_Reductions.o TestCuda_Scan.o
     OBJ_CUDA += TestCuda_Reductions_DeviceView.o
-    OBJ_CUDA += TestCuda_Reducers_a.o TestCuda_Reducers_b.o TestCuda_Reducers_c.o TestCuda_Reducers_d.o
+    OBJ_CUDA += TestCuda_Reducers_a.o TestCuda_Reducers_b.o TestCuda_Reducers_c.o TestCuda_Reducers_d.o TestCuda_Reducers_e.o
     OBJ_CUDA += TestCuda_Complex.o
     OBJ_CUDA += TestCuda_AtomicOperations_int.o TestCuda_AtomicOperations_unsignedint.o TestCuda_AtomicOperations_longint.o
     OBJ_CUDA += TestCuda_AtomicOperations_unsignedlongint.o TestCuda_AtomicOperations_longlongint.o TestCuda_AtomicOperations_double.o TestCuda_AtomicOperations_float.o
@@ -155,7 +155,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
     TEST_TARGETS += test-cuda
 endif
 
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
     OBJ_THREADS = UnitTestMainInit.o gtest-all.o
     OBJ_THREADS += TestThreads_Init.o
     OBJ_THREADS += TestThreads_SharedAlloc.o
@@ -173,7 +173,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
     OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o
     OBJ_THREADS += TestThreads_Reductions.o TestThreads_Scan.o
     OBJ_THREADS += TestThreads_Reductions_DeviceView.o
-    OBJ_THREADS += TestThreads_Reducers_a.o TestThreads_Reducers_b.o TestThreads_Reducers_c.o TestThreads_Reducers_d.o
+    OBJ_THREADS += TestThreads_Reducers_a.o TestThreads_Reducers_b.o TestThreads_Reducers_c.o TestThreads_Reducers_d.o TestThreads_Reducers_e.o
     OBJ_THREADS += TestThreads_Complex.o
     OBJ_THREADS += TestThreads_AtomicOperations_int.o TestThreads_AtomicOperations_unsignedint.o TestThreads_AtomicOperations_longint.o
     OBJ_THREADS += TestThreads_AtomicOperations_unsignedlongint.o TestThreads_AtomicOperations_longlongint.o TestThreads_AtomicOperations_double.o TestThreads_AtomicOperations_float.o
@@ -209,7 +209,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
     OBJ_OPENMP += TestOpenMP_SubView_c13.o
     OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_Scan.o
     OBJ_OPENMP += TestOpenMP_Reductions_DeviceView.o
-    OBJ_OPENMP += TestOpenMP_Reducers_a.o TestOpenMP_Reducers_b.o TestOpenMP_Reducers_c.o TestOpenMP_Reducers_d.o
+    OBJ_OPENMP += TestOpenMP_Reducers_a.o TestOpenMP_Reducers_b.o TestOpenMP_Reducers_c.o TestOpenMP_Reducers_d.o TestOpenMP_Reducers_e.o
     OBJ_OPENMP += TestOpenMP_Complex.o
     OBJ_OPENMP += TestOpenMP_AtomicOperations_int.o TestOpenMP_AtomicOperations_unsignedint.o TestOpenMP_AtomicOperations_longint.o
     OBJ_OPENMP += TestOpenMP_AtomicOperations_unsignedlongint.o TestOpenMP_AtomicOperations_longlongint.o TestOpenMP_AtomicOperations_double.o TestOpenMP_AtomicOperations_float.o
@@ -250,7 +250,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
     #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c07.o TestOpenMPTarget_SubView_c08.o TestOpenMPTarget_SubView_c09.o
     #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c10.o TestOpenMPTarget_SubView_c11.o TestOpenMPTarget_SubView_c12.o
     #OBJ_OPENMPTARGET += TestOpenMPTarget_Reductions.o # Need custom reductions
-    OBJ_OPENMPTARGET += TestOpenMPTarget_Reducers_a.o TestOpenMPTarget_Reducers_b.o TestOpenMPTarget_Reducers_c.o TestOpenMPTarget_Reducers_d.o
+    OBJ_OPENMPTARGET += TestOpenMPTarget_Reducers_a.o TestOpenMPTarget_Reducers_b.o TestOpenMPTarget_Reducers_c.o TestOpenMPTarget_Reducers_d.o TestOpenMPTarget_Reducers_e.o
     #OBJ_OPENMPTARGET += TestOpenMPTarget_Scan.o
     OBJ_OPENMPTARGET += TestOpenMPTarget_Complex.o
     OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_int.o TestOpenMPTarget_AtomicOperations_unsignedint.o TestOpenMPTarget_AtomicOperations_longint.o
@@ -285,7 +285,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
 
 	OBJ_HIP = UnitTestMainInit.o gtest-all.o
 	OBJ_HIP += TestHIP_Init.o
-	OBJ_HIP += TestHIP_Reducers_a.o TestHIP_Reducers_b.o TestHIP_Reducers_c.o TestHIP_Reducers_d.o
+	OBJ_HIP += TestHIP_Reducers_a.o TestHIP_Reducers_b.o TestHIP_Reducers_c.o TestHIP_Reducers_d.o TestHIP_Reducers_e.o
 	OBJ_HIP += TestHIP_Reductions.o
 	OBJ_HIP += TestHIP_MDRange_a.o TestHIP_MDRange_b.o TestHIP_MDRange_c.o TestHIP_MDRange_d.o TestHIP_MDRange_e.o
 	OBJ_HIP += TestHIP_Spaces.o
@@ -316,7 +316,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
 	OBJ_HPX += TestHPX_SubView_c13.o
 	OBJ_HPX += TestHPX_Reductions.o
 	OBJ_HPX += TestHPX_Scan.o
-	OBJ_HPX += TestHPX_Reducers_a.o TestHPX_Reducers_b.o TestHPX_Reducers_c.o TestHPX_Reducers_d.o
+	OBJ_HPX += TestHPX_Reducers_a.o TestHPX_Reducers_b.o TestHPX_Reducers_c.o TestHPX_Reducers_d.o TestHPX_Reducers_e.o
 	OBJ_HPX += TestHPX_Complex.o
 	OBJ_HPX += TestHPX_AtomicOperations_int.o TestHPX_AtomicOperations_unsignedint.o TestHPX_AtomicOperations_longint.o
 	OBJ_HPX += TestHPX_AtomicOperations_unsignedlongint.o TestHPX_AtomicOperations_longlongint.o TestHPX_AtomicOperations_double.o TestHPX_AtomicOperations_float.o
@@ -356,7 +356,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
     OBJ_SERIAL += TestSerial_SubView_c13.o
     OBJ_SERIAL += TestSerial_Reductions.o TestSerial_Scan.o
     OBJ_SERIAL += TestSerial_Reductions_DeviceView.o
-    OBJ_SERIAL += TestSerial_Reducers_a.o TestSerial_Reducers_b.o TestSerial_Reducers_c.o TestSerial_Reducers_d.o
+    OBJ_SERIAL += TestSerial_Reducers_a.o TestSerial_Reducers_b.o TestSerial_Reducers_c.o TestSerial_Reducers_d.o TestSerial_Reducers_e.o
     OBJ_SERIAL += TestSerial_Complex.o
     OBJ_SERIAL += TestSerial_AtomicOperations_int.o TestSerial_AtomicOperations_unsignedint.o TestSerial_AtomicOperations_longint.o
     OBJ_SERIAL += TestSerial_AtomicOperations_unsignedlongint.o TestSerial_AtomicOperations_longlongint.o TestSerial_AtomicOperations_double.o TestSerial_AtomicOperations_float.o
diff --git a/packages/kokkos/core/unit_test/TestAggregate.hpp b/packages/kokkos/core/unit_test/TestAggregate.hpp
index 3151143a6ff992c30ecee6dd52668c5c77941923..7590c6f1fe091227c2033176690cc18aee5be44f 100644
--- a/packages/kokkos/core/unit_test/TestAggregate.hpp
+++ b/packages/kokkos/core/unit_test/TestAggregate.hpp
@@ -97,11 +97,11 @@ void TestViewAggregate() {
   a32_type x("test", 4, 5);
   a32_flat_type y(x);
 
-  ASSERT_EQ(x.extent(0), 4);
-  ASSERT_EQ(x.extent(1), 5);
-  ASSERT_EQ(y.extent(0), 4);
-  ASSERT_EQ(y.extent(1), 5);
-  ASSERT_EQ(y.extent(2), 32);
+  ASSERT_EQ(x.extent(0), 4u);
+  ASSERT_EQ(x.extent(1), 5u);
+  ASSERT_EQ(y.extent(0), 4u);
+  ASSERT_EQ(y.extent(1), 5u);
+  ASSERT_EQ(y.extent(2), 32u);
 
   // Initialize arrays from brace-init-list as for std::array.
   //
diff --git a/packages/kokkos/core/unit_test/TestAtomicOperations.hpp b/packages/kokkos/core/unit_test/TestAtomicOperations.hpp
index 257ad2e9e5bba73babacd0153ba74f0ab1a2ba15..1ec175710c86cf0748c9a4e3d846369e189e7be6 100644
--- a/packages/kokkos/core/unit_test/TestAtomicOperations.hpp
+++ b/packages/kokkos/core/unit_test/TestAtomicOperations.hpp
@@ -345,6 +345,84 @@ bool IncAtomicTest(T i0) {
   return passed;
 }
 
+//---------------------------------------------------
+//-------------atomic_wrapping_increment-------------
+//---------------------------------------------------
+
+template <class T, class DEVICE_TYPE>
+struct WrappingIncFunctor {
+  using execution_space = DEVICE_TYPE;
+  using type            = Kokkos::View<T, execution_space>;
+
+  type data;
+  T i0;
+  T i1;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int) const {
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    desul::atomic_fetch_inc_mod(&data(), (T)i1, desul::MemoryOrderRelaxed(),
+                                desul::MemoryScopeDevice());
+#endif
+  }
+
+  WrappingIncFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {}
+};
+
+template <class T, class execution_space>
+T WrappingIncAtomic(T i0, T i1) {
+  struct InitFunctor<T, execution_space> f_init(i0);
+  typename InitFunctor<T, execution_space>::type data("Data");
+  typename InitFunctor<T, execution_space>::h_type h_data("HData");
+
+  f_init.data = data;
+  Kokkos::parallel_for(1, f_init);
+  execution_space().fence();
+
+  struct WrappingIncFunctor<T, execution_space> f(i0, i1);
+
+  f.data = data;
+  Kokkos::parallel_for(1, f);
+  execution_space().fence();
+
+  Kokkos::deep_copy(h_data, data);
+  T val = h_data();
+
+  return val;
+}
+
+template <class T>
+T WrappingIncAtomicCheck(T i0, T i1) {
+  T* data = new T[1];
+  data[0] = 0;
+
+  // Wraps to 0 when i0 >= i1
+  *data = ((i0 >= i1) ? (T)0 : i0 + (T)1);
+
+  T val = *data;
+  delete[] data;
+
+  return val;
+}
+
+template <class T, class DeviceType>
+bool WrappingIncAtomicTest(T i0, T i1) {
+  T res       = WrappingIncAtomic<T, DeviceType>(i0, i1);
+  T resSerial = WrappingIncAtomicCheck<T>(i0, i1);
+
+  bool passed = true;
+
+  if (resSerial != res) {
+    passed = false;
+
+    std::cout << "Loop<" << typeid(T).name()
+              << ">( test = WrappingIncAtomicTest"
+              << " FAILED : " << resSerial << " != " << res << std::endl;
+  }
+
+  return passed;
+}
+
 //---------------------------------------------------
 //--------------atomic_decrement---------------------
 //---------------------------------------------------
@@ -415,6 +493,85 @@ bool DecAtomicTest(T i0) {
   return passed;
 }
 
+//---------------------------------------------------
+//-------------atomic_wrapping_decrement-------------
+//---------------------------------------------------
+
+template <class T, class DEVICE_TYPE>
+struct WrappingDecFunctor {
+  using execution_space = DEVICE_TYPE;
+  using type            = Kokkos::View<T, execution_space>;
+
+  type data;
+  T i0;
+  T i1;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(int) const {
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    desul::atomic_fetch_dec_mod(&data(), (T)i1, desul::MemoryOrderRelaxed(),
+                                desul::MemoryScopeDevice());
+#endif
+  }
+
+  WrappingDecFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {}
+};
+
+template <class T, class execution_space>
+T WrappingDecAtomic(T i0, T i1) {
+  struct InitFunctor<T, execution_space> f_init(i0);
+  typename InitFunctor<T, execution_space>::type data("Data");
+  typename InitFunctor<T, execution_space>::h_type h_data("HData");
+
+  f_init.data = data;
+  Kokkos::parallel_for(1, f_init);
+  execution_space().fence();
+
+  struct WrappingDecFunctor<T, execution_space> f(i0, i1);
+
+  f.data = data;
+  Kokkos::parallel_for(1, f);
+  execution_space().fence();
+
+  Kokkos::deep_copy(h_data, data);
+  T val = h_data();
+
+  return val;
+}
+
+template <class T>
+T WrappingDecAtomicCheck(T i0, T i1) {
+  T* data = new T[1];
+  data[0] = 0;
+
+  // Wraps to i1 when i0 <= 0
+  // i0 should never be negative
+  *data = ((i0 <= (T)0) ? i1 : i0 - (T)1);
+
+  T val = *data;
+  delete[] data;
+
+  return val;
+}
+
+template <class T, class DeviceType>
+bool WrappingDecAtomicTest(T i0, T i1) {
+  T res       = WrappingDecAtomic<T, DeviceType>(i0, i1);
+  T resSerial = WrappingDecAtomicCheck<T>(i0, i1);
+
+  bool passed = true;
+
+  if (resSerial != res) {
+    passed = false;
+
+    std::cout << "Loop<" << typeid(T).name()
+              << ">( test = WrappingDecAtomicTest"
+              << " FAILED : " << resSerial << " != " << res << std::endl;
+  }
+
+  return passed;
+}
+
 //---------------------------------------------------
 //--------------atomic_fetch_mul---------------------
 //---------------------------------------------------
@@ -1016,6 +1173,16 @@ bool AtomicOperationsTestIntegralType(int i0, int i1, int test) {
   return 0;
 }
 
+template <class T, class DeviceType>
+bool AtomicOperationsTestUnsignedIntegralType(int i0, int i1, int test) {
+  switch (test) {
+    case 1: return WrappingIncAtomicTest<T, DeviceType>((T)i0, (T)i1);
+    case 2: return WrappingDecAtomicTest<T, DeviceType>((T)i0, (T)i1);
+  }
+
+  return 0;
+}
+
 template <class T, class DeviceType>
 bool AtomicOperationsTestNonIntegralType(int i0, int i1, int test) {
   switch (test) {
diff --git a/packages/kokkos/core/unit_test/TestAtomicOperations_shared.hpp b/packages/kokkos/core/unit_test/TestAtomicOperations_shared.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..08f4782ce18e9b9f774aa891f9878da3be6d12ef
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestAtomicOperations_shared.hpp
@@ -0,0 +1,86 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <Kokkos_Core.hpp>
+
+namespace Test {
+
+// FIXME_SYCL This doesn't work yet for SYCL+CUDA
+#if !defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ARCH_INTEL_GPU)
+template <typename ExecutionSpace>
+struct TestSharedAtomicsFunctor {
+  Kokkos::View<int, typename ExecutionSpace::memory_space> m_view;
+
+  TestSharedAtomicsFunctor(
+      Kokkos::View<int, typename ExecutionSpace::memory_space>& view)
+      : m_view(view) {}
+
+  KOKKOS_INLINE_FUNCTION void operator()(
+      const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type t) const {
+    int* x = (int*)t.team_shmem().get_shmem(sizeof(int));
+    Kokkos::single(Kokkos::PerTeam(t), [&]() { *x = 0; });
+    t.team_barrier();
+    Kokkos::atomic_add(x, 1);
+    t.team_barrier();
+    Kokkos::single(Kokkos::PerTeam(t), [&]() { m_view() = *x; });
+  }
+};
+
+TEST(TEST_CATEGORY, atomic_shared) {
+  TEST_EXECSPACE exec;
+  Kokkos::View<int, typename TEST_EXECSPACE::memory_space> view("ref_value");
+  auto team_size =
+      Kokkos::TeamPolicy<TEST_EXECSPACE>(exec, 1, Kokkos::AUTO)
+          .team_size_recommended(TestSharedAtomicsFunctor<TEST_EXECSPACE>(view),
+                                 Kokkos::ParallelForTag{});
+  Kokkos::parallel_for(Kokkos::TeamPolicy<TEST_EXECSPACE>(exec, 1, team_size)
+                           .set_scratch_size(0, Kokkos::PerTeam(8)),
+                       TestSharedAtomicsFunctor<TEST_EXECSPACE>(view));
+  exec.fence("Fence after test kernel");
+  int i = 0;
+  Kokkos::deep_copy(i, view);
+  ASSERT_EQ(i, team_size);
+}
+#endif
+}  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestAtomicOperations_unsignedint.hpp b/packages/kokkos/core/unit_test/TestAtomicOperations_unsignedint.hpp
index c3c6bc9fb38d9dc9af37bc69c29e60d1fd040cc6..75b354c7a3e0e8ea2db6e9b0fc1ef9e89cf941bc 100644
--- a/packages/kokkos/core/unit_test/TestAtomicOperations_unsignedint.hpp
+++ b/packages/kokkos/core/unit_test/TestAtomicOperations_unsignedint.hpp
@@ -73,6 +73,14 @@ TEST(TEST_CATEGORY, atomic_operations_unsigned) {
                  unsigned int, TEST_EXECSPACE>(start, end - i, 12)));
     ASSERT_TRUE((TestAtomicOperations::AtomicOperationsTestIntegralType<
                  unsigned int, TEST_EXECSPACE>(start, end - i, 13)));
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    ASSERT_TRUE(
+        (TestAtomicOperations::AtomicOperationsTestUnsignedIntegralType<
+            unsigned int, TEST_EXECSPACE>(start, end - i, 1)));  // Wrapping Inc
+    ASSERT_TRUE(
+        (TestAtomicOperations::AtomicOperationsTestUnsignedIntegralType<
+            unsigned int, TEST_EXECSPACE>(start, end - i, 2)));  // Wrapping Dec
+#endif
   }
 }
 }  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestAtomicOperations_unsignedlongint.hpp b/packages/kokkos/core/unit_test/TestAtomicOperations_unsignedlongint.hpp
index f3be4bedb794884998639eb9a313db5079bebdd2..d7e90033c378790f7dc0aec6126840988605590a 100644
--- a/packages/kokkos/core/unit_test/TestAtomicOperations_unsignedlongint.hpp
+++ b/packages/kokkos/core/unit_test/TestAtomicOperations_unsignedlongint.hpp
@@ -73,6 +73,14 @@ TEST(TEST_CATEGORY, atomic_operations_unsignedlong) {
                  unsigned long int, TEST_EXECSPACE>(start, end - i, 12)));
     ASSERT_TRUE((TestAtomicOperations::AtomicOperationsTestIntegralType<
                  unsigned long int, TEST_EXECSPACE>(start, end - i, 13)));
+#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
+    ASSERT_TRUE((TestAtomicOperations::AtomicOperationsTestUnsignedIntegralType<
+                 unsigned long int, TEST_EXECSPACE>(start, end - i,
+                                                    1)));  // Wrapping Inc
+    ASSERT_TRUE((TestAtomicOperations::AtomicOperationsTestUnsignedIntegralType<
+                 unsigned long int, TEST_EXECSPACE>(start, end - i,
+                                                    2)));  // Wrapping Dec
+#endif
   }
 }
 }  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestAtomicViews.hpp b/packages/kokkos/core/unit_test/TestAtomicViews.hpp
index e029ad81f576f25333470e6078eee9445abba3ec..88f1aee630add62ec7753b191fb58ba0ca6d7a6a 100644
--- a/packages/kokkos/core/unit_test/TestAtomicViews.hpp
+++ b/packages/kokkos/core/unit_test/TestAtomicViews.hpp
@@ -194,19 +194,19 @@ class TestAtomicViewAPI {
 
     dx = dView0("dx");
     dy = dView0("dy");
-    ASSERT_EQ(dx.use_count(), size_t(1));
-    ASSERT_EQ(dy.use_count(), size_t(1));
+    ASSERT_EQ(dx.use_count(), 1);
+    ASSERT_EQ(dy.use_count(), 1);
 
     ax = dx;
     ay = dy;
-    ASSERT_EQ(dx.use_count(), size_t(2));
-    ASSERT_EQ(dy.use_count(), size_t(2));
+    ASSERT_EQ(dx.use_count(), 2);
+    ASSERT_EQ(dy.use_count(), 2);
     ASSERT_EQ(dx.use_count(), ax.use_count());
 
     az = ax;
-    ASSERT_EQ(dx.use_count(), size_t(3));
-    ASSERT_EQ(ax.use_count(), size_t(3));
-    ASSERT_EQ(az.use_count(), size_t(3));
+    ASSERT_EQ(dx.use_count(), 3);
+    ASSERT_EQ(ax.use_count(), 3);
+    ASSERT_EQ(az.use_count(), 3);
     ASSERT_EQ(az.use_count(), ax.use_count());
   }
 
@@ -216,33 +216,33 @@ class TestAtomicViewAPI {
 
     dx = dView4("dx", N0);
     dy = dView4("dy", N0);
-    ASSERT_EQ(dx.use_count(), size_t(1));
-    ASSERT_EQ(dy.use_count(), size_t(1));
+    ASSERT_EQ(dx.use_count(), 1);
+    ASSERT_EQ(dy.use_count(), 1);
 
     ax = dx;
     ay = dy;
-    ASSERT_EQ(dx.use_count(), size_t(2));
-    ASSERT_EQ(dy.use_count(), size_t(2));
+    ASSERT_EQ(dx.use_count(), 2);
+    ASSERT_EQ(dy.use_count(), 2);
     ASSERT_EQ(dx.use_count(), ax.use_count());
 
     dView4_unmanaged unmanaged_dx = dx;
-    ASSERT_EQ(dx.use_count(), size_t(2));
+    ASSERT_EQ(dx.use_count(), 2);
 
     az = ax;
-    ASSERT_EQ(dx.use_count(), size_t(3));
-    ASSERT_EQ(ax.use_count(), size_t(3));
-    ASSERT_EQ(az.use_count(), size_t(3));
+    ASSERT_EQ(dx.use_count(), 3);
+    ASSERT_EQ(ax.use_count(), 3);
+    ASSERT_EQ(az.use_count(), 3);
     ASSERT_EQ(az.use_count(), ax.use_count());
 
     aView4_unmanaged unmanaged_ax = ax;
-    ASSERT_EQ(ax.use_count(), size_t(3));
+    ASSERT_EQ(ax.use_count(), 3);
 
     aView4_unmanaged unmanaged_ax_from_ptr_dx = aView4_unmanaged(
         dx.data(), dx.extent(0), dx.extent(1), dx.extent(2), dx.extent(3));
-    ASSERT_EQ(ax.use_count(), size_t(3));
+    ASSERT_EQ(ax.use_count(), 3);
 
     const_aView4 const_ax = ax;
-    ASSERT_EQ(ax.use_count(), size_t(4));
+    ASSERT_EQ(ax.use_count(), 4);
     ASSERT_EQ(const_ax.use_count(), ax.use_count());
 
     ASSERT_NE(ax.data(), nullptr);
diff --git a/packages/kokkos/core/unit_test/TestCTestDevice.cpp b/packages/kokkos/core/unit_test/TestCTestDevice.cpp
index b2ee79b856b0b995bb86b39d8f7fedb4548c5a7d..247981c5ce5ed5d486084a77ca290b170a91e1ab 100644
--- a/packages/kokkos/core/unit_test/TestCTestDevice.cpp
+++ b/packages/kokkos/core/unit_test/TestCTestDevice.cpp
@@ -24,12 +24,12 @@ int unsetenv(const char *name) { return _putenv_s(name, ""); }
 
 // Needed because https://github.com/google/googletest/issues/952 has not been
 // resolved
-#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) \
-  EXPECT_THROW(                                            \
-      try { stmt; } catch (const etype &ex) {              \
-        EXPECT_EQ(whatstring, std::string(ex.what()));     \
-        throw;                                             \
-      },                                                   \
+#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring)      \
+  EXPECT_THROW(                                                 \
+      try { stmt; } catch (const etype &ex) {                   \
+        EXPECT_EQ(std::string(ex.what()).find(whatstring), 0u); \
+        throw;                                                  \
+      },                                                        \
       etype)
 
 class ctest_environment : public ::testing::Test {
@@ -80,54 +80,43 @@ TEST_F(ctest_environment, invalid_rank) {
   EXPECT_THROW_WITH_MESSAGE(
       Kokkos::Impl::get_ctest_gpu("10"), std::runtime_error,
       "Error: local rank 10 is outside the bounds of resource groups provided "
-      "by"
-      " CTest. Raised by Kokkos::Impl::get_ctest_gpu().\nTraceback "
-      "functionality"
-      " not available\n");
+      "by CTest.");
 }
 
 TEST_F(ctest_environment, no_type_str) {
   EXPECT_THROW_WITH_MESSAGE(
       Kokkos::Impl::get_ctest_gpu("0"), std::runtime_error,
       "Error: CTEST_RESOURCE_GROUP_0 is not specified. Raised by "
-      "Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not "
-      "available\n");
+      "Kokkos::Impl::get_ctest_gpu().");
 }
 
 TEST_F(ctest_environment, missing_type) {
   EXPECT_THROW_WITH_MESSAGE(
       Kokkos::Impl::get_ctest_gpu("1"), std::runtime_error,
       "Error: device type 'gpus' not included in CTEST_RESOURCE_GROUP_1. "
-      "Raised "
-      "by Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not available"
-      "\n");
+      "Raised by Kokkos::Impl::get_ctest_gpu().");
   EXPECT_THROW_WITH_MESSAGE(
       Kokkos::Impl::get_ctest_gpu("2"), std::runtime_error,
       "Error: device type 'gpus' not included in CTEST_RESOURCE_GROUP_2. "
-      "Raised "
-      "by Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not available"
-      "\n");
+      "Raised by Kokkos::Impl::get_ctest_gpu().");
 }
 
 TEST_F(ctest_environment, no_id_str) {
   EXPECT_THROW_WITH_MESSAGE(
       Kokkos::Impl::get_ctest_gpu("3"), std::runtime_error,
       "Error: CTEST_RESOURCE_GROUP_3_GPUS is not specified. Raised by "
-      "Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not "
-      "available\n");
+      "Kokkos::Impl::get_ctest_gpu().");
 }
 
 TEST_F(ctest_environment, invalid_id_str) {
   EXPECT_THROW_WITH_MESSAGE(
       Kokkos::Impl::get_ctest_gpu("4"), std::runtime_error,
       "Error: invalid value of CTEST_RESOURCE_GROUP_4_GPUS: 'id:2'. Raised by "
-      "Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not "
-      "available\n");
+      "Kokkos::Impl::get_ctest_gpu().");
   EXPECT_THROW_WITH_MESSAGE(
       Kokkos::Impl::get_ctest_gpu("5"), std::runtime_error,
       "Error: invalid value of CTEST_RESOURCE_GROUP_5_GPUS: 'slots:1,id:2'. "
-      "Raised by Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not "
-      "available\n");
+      "Raised by Kokkos::Impl::get_ctest_gpu().");
 }
 
 TEST_F(ctest_environment, good) {
diff --git a/packages/kokkos/core/unit_test/TestComplex.hpp b/packages/kokkos/core/unit_test/TestComplex.hpp
index be0c1e50d7013efc177f427b481a4b67b1441744..513fb6aeeef5f405642cc7e0560d6bcbf3a5af35 100644
--- a/packages/kokkos/core/unit_test/TestComplex.hpp
+++ b/packages/kokkos/core/unit_test/TestComplex.hpp
@@ -48,6 +48,11 @@
 
 namespace Test {
 
+#ifdef KOKKOS_COMPILER_NVHPC
+// warning: 'long double' is treated as 'double' in device code
+#pragma diag_suppress 20208
+#endif
+
 // Test construction and assignment
 
 template <class ExecSpace>
@@ -348,7 +353,9 @@ struct TestComplexSpecialFunctions {
     r = std::acosh(a);
     ASSERT_FLOAT_EQ(h_results(13).real(), r.real());
     ASSERT_FLOAT_EQ(h_results(13).imag(), r.imag());
-    r = std::atanh(a);
+    // atanh
+    // Work around a bug in gcc 5.3.1 where the compiler cannot compute atanh
+    r = {0.163481616851666003, 1.27679502502111284};
     ASSERT_FLOAT_EQ(h_results(14).real(), r.real());
     ASSERT_FLOAT_EQ(h_results(14).imag(), r.imag());
     r = std::asin(a);
@@ -357,7 +364,9 @@ struct TestComplexSpecialFunctions {
     r = std::acos(a);
     ASSERT_FLOAT_EQ(h_results(16).real(), r.real());
     ASSERT_FLOAT_EQ(h_results(16).imag(), r.imag());
-    r = std::atan(a);
+    // atan
+    // Work around a bug in gcc 5.3.1 where the compiler cannot compute atan
+    r = {1.380543138238714, 0.2925178131625636};
     ASSERT_FLOAT_EQ(h_results(17).real(), r.real());
     ASSERT_FLOAT_EQ(h_results(17).imag(), r.imag());
 #endif
@@ -459,6 +468,7 @@ TEST(TEST_CATEGORY, complex_issue_3865) {
   TestBugPowAndLogComplex<TEST_EXECSPACE>();
 }
 
+#ifdef KOKKOS_ENABLE_OPENMPTARGET  // FIXME_OPENMPTARGET
 TEST(TEST_CATEGORY, complex_issue_3867) {
   ASSERT_EQ(Kokkos::pow(Kokkos::complex<double>(2., 1.), 3.),
             Kokkos::pow(Kokkos::complex<double>(2., 1.), 3));
@@ -514,6 +524,7 @@ TEST(TEST_CATEGORY, complex_issue_3867) {
 
 #undef CHECK_POW_COMPLEX_PROMOTION
 }
+#endif
 
 TEST(TEST_CATEGORY, complex_operations_arithmetic_types_overloads) {
 #define STATIC_ASSERT(cond) static_assert(cond, "")
diff --git a/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp b/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp
index f487a015fbf261f85bf2b8a0b4755dadcefe2f32..73db630b305b59a44bff3431dbbac87a8375b626 100644
--- a/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp
+++ b/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp
@@ -1,4 +1,5 @@
 #include <Kokkos_Core.hpp>
+#include <cstddef>
 
 namespace Test {
 
@@ -62,7 +63,7 @@ struct TestDeepCopy {
     reset_a_copy_and_b(a_char_copy, b_char);
 
     {
-      int check = compare_equal(a_char_copy, a_char);
+      size_t check = compare_equal(a_char_copy, a_char);
       ASSERT_EQ(check, a_char.extent(0));
     }
 
diff --git a/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp
index 90e485998ec08ba98716185298860fb4c407daf2..7ffa5aaddc899d1c0612eba85473eb64d9c55d5b 100644
--- a/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp
+++ b/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp
@@ -86,11 +86,19 @@ char** init_kokkos_args(bool do_threads, bool do_numa, bool do_device,
     int nthreads = 3;
 
 #ifdef KOKKOS_ENABLE_OPENMP
-    if (omp_get_max_threads() < 3) nthreads = omp_get_max_threads();
+    if (omp_get_max_threads() < nthreads) {
+      nthreads = omp_get_max_threads();
+    }
+#elif defined(KOKKOS_ENABLE_HPX)
+    const auto concurrency = std::thread::hardware_concurrency();
+    if (concurrency < nthreads) {
+      nthreads = concurrency;
+    }
 #endif
 
     if (Kokkos::hwloc::available()) {
-      if (Kokkos::hwloc::get_available_threads_per_core() < 3)
+      if (Kokkos::hwloc::get_available_threads_per_core() <
+          static_cast<unsigned>(nthreads))
         nthreads = Kokkos::hwloc::get_available_threads_per_core() *
                    Kokkos::hwloc::get_available_numa_count();
     }
@@ -153,13 +161,19 @@ Kokkos::InitArguments init_initstruct(bool do_threads, bool do_numa,
     int nthreads = 3;
 
 #ifdef KOKKOS_ENABLE_OPENMP
-    if (omp_get_max_threads() < 3) {
+    if (omp_get_max_threads() < nthreads) {
       nthreads = omp_get_max_threads();
     }
+#elif defined(KOKKOS_ENABLE_HPX)
+    const auto concurrency = std::thread::hardware_concurrency();
+    if (concurrency < nthreads) {
+      nthreads = concurrency;
+    }
 #endif
 
     if (Kokkos::hwloc::available()) {
-      if (Kokkos::hwloc::get_available_threads_per_core() < 3) {
+      if (Kokkos::hwloc::get_available_threads_per_core() <
+          static_cast<unsigned>(nthreads)) {
         nthreads = Kokkos::hwloc::get_available_threads_per_core() *
                    Kokkos::hwloc::get_available_numa_count();
       }
diff --git a/packages/kokkos/core/unit_test/TestHalfConversion.hpp b/packages/kokkos/core/unit_test/TestHalfConversion.hpp
index 992f56cc6b833882676f71817bae3b6bd03631d6..872e3ef8bdf40faaa677f7cb1805ebf224e5fa15 100644
--- a/packages/kokkos/core/unit_test/TestHalfConversion.hpp
+++ b/packages/kokkos/core/unit_test/TestHalfConversion.hpp
@@ -55,8 +55,6 @@ void test_half_conversion_type() {
   T b                            = Kokkos::Experimental::cast_from_half<T>(a);
   ASSERT_LT((double(b - base) / double(base)), epsilon);
 
-// TODO: Remove ifndef once https://github.com/kokkos/kokkos/pull/3480 merges
-#ifndef KOKKOS_ENABLE_SYCL
 #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
   Kokkos::View<T> b_v("b_v");
   Kokkos::parallel_for(
@@ -69,7 +67,28 @@ void test_half_conversion_type() {
   Kokkos::deep_copy(b, b_v);
   ASSERT_LT((double(b - base) / double(base)), epsilon);
 #endif  // KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
-#endif  // KOKKOS_ENABLE_SYCL
+}
+
+template <class T>
+void test_bhalf_conversion_type() {
+  double epsilon = KOKKOS_BHALF_T_IS_FLOAT ? 0.0000003 : 0.0003;
+  T base         = static_cast<T>(3.3);
+  Kokkos::Experimental::bhalf_t a = Kokkos::Experimental::cast_to_bhalf(base);
+  T b                             = Kokkos::Experimental::cast_from_bhalf<T>(a);
+  ASSERT_LT((double(b - base) / double(base)), epsilon);
+
+#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
+  Kokkos::View<T> b_v("b_v");
+  Kokkos::parallel_for(
+      "TestHalfConversion", 1, KOKKOS_LAMBDA(int) {
+        Kokkos::Experimental::bhalf_t d_a =
+            Kokkos::Experimental::cast_to_bhalf(base);
+        b_v() = Kokkos::Experimental::cast_from_bhalf<T>(d_a);
+      });
+
+  Kokkos::deep_copy(b, b_v);
+  ASSERT_LT((double(b - base) / double(base)), epsilon);
+#endif  // KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
 }
 
 void test_half_conversion() {
@@ -85,7 +104,22 @@ void test_half_conversion() {
   test_half_conversion_type<unsigned long long>();
 }
 
+void test_bhalf_conversion() {
+  test_bhalf_conversion_type<float>();
+  test_bhalf_conversion_type<double>();
+  test_bhalf_conversion_type<short>();
+  test_bhalf_conversion_type<int>();
+  test_bhalf_conversion_type<long>();
+  test_bhalf_conversion_type<long long>();
+  test_bhalf_conversion_type<unsigned short>();
+  test_bhalf_conversion_type<unsigned int>();
+  test_bhalf_conversion_type<unsigned long>();
+  test_bhalf_conversion_type<unsigned long long>();
+}
+
 TEST(TEST_CATEGORY, half_conversion) { test_half_conversion(); }
 
+TEST(TEST_CATEGORY, bhalf_conversion) { test_bhalf_conversion(); }
+
 }  // namespace Test
 #endif
diff --git a/packages/kokkos/core/unit_test/TestHalfOperators.hpp b/packages/kokkos/core/unit_test/TestHalfOperators.hpp
index c4cf8a745701897a2a36c38540a94149650b5e2d..543ae506ee251de8c8136cf06b8e08ed1d4e2f53 100644
--- a/packages/kokkos/core/unit_test/TestHalfOperators.hpp
+++ b/packages/kokkos/core/unit_test/TestHalfOperators.hpp
@@ -45,10 +45,9 @@
 
 #ifndef TESTHALFOPERATOR_HPP_
 #define TESTHALFOPERATOR_HPP_
-// TODO: Remove ifndef once https://github.com/kokkos/kokkos/pull/3480 merges
-#ifndef KOKKOS_ENABLE_SYCL
 namespace Test {
-#define FP16_EPSILON 0.0009765625F
+#define FP16_EPSILON 0.0009765625F  // 1/2^10
+#define BF16_EPSILON 0.0078125F     // 1/2^7
 using namespace Kokkos::Experimental;
 using ExecutionSpace = TEST_EXECSPACE;
 using ScalarType     = double;
@@ -56,6 +55,10 @@ using ViewType       = Kokkos::View<ScalarType*, ExecutionSpace>;
 using ViewTypeHost   = Kokkos::View<ScalarType*, Kokkos::HostSpace>;
 KOKKOS_FUNCTION
 const half_t& accept_ref(const half_t& a) { return a; }
+#if !KOKKOS_BHALF_T_IS_FLOAT
+KOKKOS_FUNCTION
+const bhalf_t& accept_ref(const bhalf_t& a) { return a; }
+#endif  // !KOKKOS_BHALF_T_IS_FLOAT
 
 enum OP_TESTS {
   ASSIGN,
@@ -269,18 +272,21 @@ enum OP_TESTS {
   N_OP_TESTS
 };
 
-template <class view_type>
+template <class view_type, class half_type>
 struct Functor_TestHalfVolatileOperators {
-  volatile half_t h_lhs, h_rhs;
+  volatile half_type h_lhs, h_rhs;
   view_type actual_lhs, expected_lhs;
   double d_lhs, d_rhs;
-  Functor_TestHalfVolatileOperators(volatile half_t lhs = half_t(0),
-                                    volatile half_t rhs = half_t(0))
+  Functor_TestHalfVolatileOperators(volatile half_type lhs = half_type(0),
+                                    volatile half_type rhs = half_type(0))
       : h_lhs(lhs), h_rhs(rhs) {
     actual_lhs   = view_type("actual_lhs", N_OP_TESTS);
     expected_lhs = view_type("expected_lhs", N_OP_TESTS);
-    d_lhs        = cast_from_half<double>(h_lhs);
-    d_rhs        = cast_from_half<double>(h_rhs);
+    half_type nv_tmp;
+    nv_tmp = h_lhs;
+    d_lhs  = static_cast<double>(nv_tmp);
+    nv_tmp = h_rhs;
+    d_rhs  = static_cast<double>(nv_tmp);
     if (std::is_same<view_type, ViewTypeHost>::value) {
       auto run_on_host = *this;
       run_on_host(0);
@@ -292,7 +298,8 @@ struct Functor_TestHalfVolatileOperators {
 
   KOKKOS_FUNCTION
   void operator()(int) const {
-    volatile half_t tmp_lhs;
+    volatile half_type tmp_lhs;
+    half_type nv_tmp;
 
     // Initialze output views to catch missing test invocations
     for (int i = 0; i < N_OP_TESTS; ++i) {
@@ -300,8 +307,8 @@ struct Functor_TestHalfVolatileOperators {
       expected_lhs(i) = -1;
     }
 
-    tmp_lhs              = h_lhs;
-    actual_lhs(ASSIGN)   = cast_from_half<double>(tmp_lhs);
+    nv_tmp               = h_lhs;
+    actual_lhs(ASSIGN)   = static_cast<double>(nv_tmp);
     expected_lhs(ASSIGN) = d_lhs;
 
     actual_lhs(LT)   = h_lhs < h_rhs;
@@ -324,42 +331,47 @@ struct Functor_TestHalfVolatileOperators {
 
     tmp_lhs = h_lhs;
     tmp_lhs += h_rhs;
-    actual_lhs(CADD_H_H)   = cast_from_half<double>(tmp_lhs);
+    nv_tmp                 = tmp_lhs;
+    actual_lhs(CADD_H_H)   = static_cast<double>(nv_tmp);
     expected_lhs(CADD_H_H) = d_lhs;
     expected_lhs(CADD_H_H) += d_rhs;
 
     tmp_lhs = h_lhs;
     tmp_lhs -= h_rhs;
-    actual_lhs(CSUB_H_H)   = cast_from_half<double>(tmp_lhs);
+    nv_tmp                 = tmp_lhs;
+    actual_lhs(CSUB_H_H)   = static_cast<double>(nv_tmp);
     expected_lhs(CSUB_H_H) = d_lhs;
     expected_lhs(CSUB_H_H) -= d_rhs;
 
     tmp_lhs = h_lhs;
     tmp_lhs *= h_rhs;
-    actual_lhs(CMUL_H_H)   = cast_from_half<double>(tmp_lhs);
+    nv_tmp                 = tmp_lhs;
+    actual_lhs(CMUL_H_H)   = static_cast<double>(nv_tmp);
     expected_lhs(CMUL_H_H) = d_lhs;
     expected_lhs(CMUL_H_H) *= d_rhs;
 
     tmp_lhs = h_lhs;
     tmp_lhs /= h_rhs;
-    actual_lhs(CDIV_H_H)   = cast_from_half<double>(tmp_lhs);
+    nv_tmp                 = tmp_lhs;
+    actual_lhs(CDIV_H_H)   = static_cast<double>(nv_tmp);
     expected_lhs(CDIV_H_H) = d_lhs;
     expected_lhs(CDIV_H_H) /= d_rhs;
   }
 };
 
-template <class view_type>
+template <class view_type, class half_type>
 struct Functor_TestHalfOperators {
-  half_t h_lhs, h_rhs;
+  half_type h_lhs, h_rhs;
   double d_lhs, d_rhs;
   view_type actual_lhs, expected_lhs;
 
-  Functor_TestHalfOperators(half_t lhs = half_t(0), half_t rhs = half_t(0))
+  Functor_TestHalfOperators(half_type lhs = half_type(0),
+                            half_type rhs = half_type(0))
       : h_lhs(lhs), h_rhs(rhs) {
     actual_lhs   = view_type("actual_lhs", N_OP_TESTS);
     expected_lhs = view_type("expected_lhs", N_OP_TESTS);
-    d_lhs        = cast_from_half<double>(h_lhs);
-    d_rhs        = cast_from_half<double>(h_rhs);
+    d_lhs        = static_cast<double>(h_lhs);
+    d_rhs        = static_cast<double>(h_rhs);
 
     if (std::is_same<view_type, ViewTypeHost>::value) {
       auto run_on_host = *this;
@@ -377,13 +389,13 @@ struct Functor_TestHalfOperators {
     auto sum = static_cast<LhsType>(h_lhs) + static_cast<RhsType>(h_rhs);
     actual_lhs(op_test_idx) = static_cast<double>(sum);
 
-    if (std::is_same<RhsType, half_t>::value &&
-        std::is_same<LhsType, half_t>::value) {
+    if (std::is_same<RhsType, half_type>::value &&
+        std::is_same<LhsType, half_type>::value) {
       expected_lhs(op_test_idx) = d_lhs + d_rhs;
     } else {
-      if (std::is_same<LhsType, half_t>::value)
+      if (std::is_same<LhsType, half_type>::value)
         expected_lhs(op_test_idx) = d_lhs + static_cast<RhsType>(d_rhs);
-      if (std::is_same<RhsType, half_t>::value)
+      if (std::is_same<RhsType, half_type>::value)
         expected_lhs(op_test_idx) = static_cast<LhsType>(d_lhs) + d_rhs;
     }
 
@@ -397,13 +409,13 @@ struct Functor_TestHalfOperators {
     auto result = static_cast<LhsType>(h_lhs) - static_cast<RhsType>(h_rhs);
     actual_lhs(op_test_idx) = static_cast<double>(result);
 
-    if (std::is_same<RhsType, half_t>::value &&
-        std::is_same<LhsType, half_t>::value) {
+    if (std::is_same<RhsType, half_type>::value &&
+        std::is_same<LhsType, half_type>::value) {
       expected_lhs(op_test_idx) = d_lhs - d_rhs;
     } else {
-      if (std::is_same<LhsType, half_t>::value)
+      if (std::is_same<LhsType, half_type>::value)
         expected_lhs(op_test_idx) = d_lhs - static_cast<RhsType>(d_rhs);
-      if (std::is_same<RhsType, half_t>::value)
+      if (std::is_same<RhsType, half_type>::value)
         expected_lhs(op_test_idx) = static_cast<LhsType>(d_lhs) - d_rhs;
     }
 
@@ -417,13 +429,13 @@ struct Functor_TestHalfOperators {
     auto result = static_cast<LhsType>(h_lhs) * static_cast<RhsType>(h_rhs);
     actual_lhs(op_test_idx) = static_cast<double>(result);
 
-    if (std::is_same<RhsType, half_t>::value &&
-        std::is_same<LhsType, half_t>::value) {
+    if (std::is_same<RhsType, half_type>::value &&
+        std::is_same<LhsType, half_type>::value) {
       expected_lhs(op_test_idx) = d_lhs * d_rhs;
     } else {
-      if (std::is_same<LhsType, half_t>::value)
+      if (std::is_same<LhsType, half_type>::value)
         expected_lhs(op_test_idx) = d_lhs * static_cast<RhsType>(d_rhs);
-      if (std::is_same<RhsType, half_t>::value)
+      if (std::is_same<RhsType, half_type>::value)
         expected_lhs(op_test_idx) = static_cast<LhsType>(d_lhs) * d_rhs;
     }
 
@@ -437,13 +449,13 @@ struct Functor_TestHalfOperators {
     auto result = static_cast<LhsType>(h_lhs) / static_cast<RhsType>(h_rhs);
     actual_lhs(op_test_idx) = static_cast<double>(result);
 
-    if (std::is_same<RhsType, half_t>::value &&
-        std::is_same<LhsType, half_t>::value) {
+    if (std::is_same<RhsType, half_type>::value &&
+        std::is_same<LhsType, half_type>::value) {
       expected_lhs(op_test_idx) = d_lhs / d_rhs;
     } else {
-      if (std::is_same<LhsType, half_t>::value)
+      if (std::is_same<LhsType, half_type>::value)
         expected_lhs(op_test_idx) = d_lhs / static_cast<RhsType>(d_rhs);
-      if (std::is_same<RhsType, half_t>::value)
+      if (std::is_same<RhsType, half_type>::value)
         expected_lhs(op_test_idx) = static_cast<LhsType>(d_lhs) / d_rhs;
     }
 
@@ -454,10 +466,14 @@ struct Functor_TestHalfOperators {
 
   KOKKOS_FUNCTION
   void operator()(int) const {
-    half_t tmp_lhs, tmp2_lhs, *tmp_ptr;
+    half_type tmp_lhs, tmp2_lhs, *tmp_ptr;
     double tmp_d_lhs;
     float tmp_s_lhs;
-    using half_impl_type = Kokkos::Impl::half_impl_t::type;
+#if !defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
+    using half_impl_type = typename half_type::impl_type;
+#else
+    using half_impl_type = half_type;
+#endif  // !defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
     half_impl_type half_tmp;
 
     // Initialze output views to catch missing test invocations
@@ -467,54 +483,55 @@ struct Functor_TestHalfOperators {
     }
 
     tmp_lhs              = h_lhs;
-    actual_lhs(ASSIGN)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(ASSIGN)   = static_cast<double>(tmp_lhs);
     expected_lhs(ASSIGN) = d_lhs;
 
     tmp_lhs  = 0;
     tmp2_lhs = tmp_lhs           = h_lhs;
-    actual_lhs(ASSIGN_CHAINED)   = cast_from_half<double>(tmp2_lhs);
+    actual_lhs(ASSIGN_CHAINED)   = static_cast<double>(tmp2_lhs);
     expected_lhs(ASSIGN_CHAINED) = d_lhs;
 
-    actual_lhs(UNA)   = cast_from_half<double>(+h_lhs);
+    actual_lhs(UNA)   = static_cast<double>(+h_lhs);
     expected_lhs(UNA) = +d_lhs;
 
-    actual_lhs(UNS)   = cast_from_half<double>(-h_lhs);
+    actual_lhs(UNS)   = static_cast<double>(-h_lhs);
     expected_lhs(UNS) = -d_lhs;
 
     tmp_lhs                  = h_lhs;
     tmp_d_lhs                = d_lhs;
-    actual_lhs(PREFIX_INC)   = cast_from_half<double>(++tmp_lhs);
+    actual_lhs(PREFIX_INC)   = static_cast<double>(++tmp_lhs);
     expected_lhs(PREFIX_INC) = ++tmp_d_lhs;
 
-    actual_lhs(PREFIX_DEC)   = cast_from_half<double>(--tmp_lhs);
+    actual_lhs(PREFIX_DEC)   = static_cast<double>(--tmp_lhs);
     expected_lhs(PREFIX_DEC) = --tmp_d_lhs;
 
     // if (h_lhs != tmp_lhs) {
     //  printf("tmp_lhs = %f, h_lhs = %f\n", __half2float(tmp_lhs),
-    //  __half2float(h_lhs)); Kokkos::abort("Error in half_t prefix operators");
+    //  __half2float(h_lhs)); Kokkos::abort("Error in half_type prefix
+    //  operators");
     //}
 
-    actual_lhs(POSTFIX_INC)   = cast_from_half<double>(tmp_lhs++);
+    actual_lhs(POSTFIX_INC)   = static_cast<double>(tmp_lhs++);
     expected_lhs(POSTFIX_INC) = tmp_d_lhs++;
 
-    actual_lhs(POSTFIX_DEC)   = cast_from_half<double>(tmp_lhs--);
+    actual_lhs(POSTFIX_DEC)   = static_cast<double>(tmp_lhs--);
     expected_lhs(POSTFIX_DEC) = tmp_d_lhs--;
 
     // if (h_lhs != tmp_lhs) {
     //  printf("tmp_lhs = %f, h_lhs = %f\n", __half2float(tmp_lhs),
-    //  __half2float(h_lhs)); Kokkos::abort("Error in half_t postfix
+    //  __half2float(h_lhs)); Kokkos::abort("Error in half_type postfix
     //  operators");
     //}
 
     tmp_lhs = h_lhs;
     tmp_lhs += h_rhs;
-    actual_lhs(CADD_H_H)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(CADD_H_H)   = static_cast<double>(tmp_lhs);
     expected_lhs(CADD_H_H) = d_lhs;
     expected_lhs(CADD_H_H) += d_rhs;
 
     tmp_lhs = h_lhs;
     tmp_lhs += static_cast<float>(d_rhs);
-    actual_lhs(CADD_H_S)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(CADD_H_S)   = static_cast<double>(tmp_lhs);
     expected_lhs(CADD_H_S) = d_lhs;
     expected_lhs(CADD_H_S) += d_rhs;
 
@@ -526,7 +543,7 @@ struct Functor_TestHalfOperators {
 
     tmp_lhs = static_cast<double>(h_lhs);
     tmp_lhs += static_cast<double>(d_rhs);
-    actual_lhs(CADD_H_D)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(CADD_H_D)   = static_cast<double>(tmp_lhs);
     expected_lhs(CADD_H_D) = d_lhs;
     expected_lhs(CADD_H_D) += d_rhs;
 
@@ -538,13 +555,13 @@ struct Functor_TestHalfOperators {
 
     tmp_lhs = h_lhs;
     tmp_lhs -= h_rhs;
-    actual_lhs(CSUB_H_H)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(CSUB_H_H)   = static_cast<double>(tmp_lhs);
     expected_lhs(CSUB_H_H) = d_lhs;
     expected_lhs(CSUB_H_H) -= d_rhs;
 
     tmp_lhs = h_lhs;
     tmp_lhs -= static_cast<float>(d_rhs);
-    actual_lhs(CSUB_H_S)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(CSUB_H_S)   = static_cast<double>(tmp_lhs);
     expected_lhs(CSUB_H_S) = d_lhs;
     expected_lhs(CSUB_H_S) -= d_rhs;
 
@@ -568,13 +585,13 @@ struct Functor_TestHalfOperators {
 
     tmp_lhs = h_lhs;
     tmp_lhs *= h_rhs;
-    actual_lhs(CMUL_H_H)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(CMUL_H_H)   = static_cast<double>(tmp_lhs);
     expected_lhs(CMUL_H_H) = d_lhs;
     expected_lhs(CMUL_H_H) *= d_rhs;
 
     tmp_lhs = h_lhs;
     tmp_lhs *= static_cast<float>(d_rhs);
-    actual_lhs(CMUL_H_S)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(CMUL_H_S)   = static_cast<double>(tmp_lhs);
     expected_lhs(CMUL_H_S) = d_lhs;
     expected_lhs(CMUL_H_S) *= d_rhs;
 
@@ -598,13 +615,13 @@ struct Functor_TestHalfOperators {
 
     tmp_lhs = h_lhs;
     tmp_lhs /= h_rhs;
-    actual_lhs(CDIV_H_H)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(CDIV_H_H)   = static_cast<double>(tmp_lhs);
     expected_lhs(CDIV_H_H) = d_lhs;
     expected_lhs(CDIV_H_H) /= d_rhs;
 
     tmp_lhs = h_lhs;
     tmp_lhs /= static_cast<float>(d_rhs);
-    actual_lhs(CDIV_H_S)   = cast_from_half<double>(tmp_lhs);
+    actual_lhs(CDIV_H_S)   = static_cast<double>(tmp_lhs);
     expected_lhs(CDIV_H_S) = d_lhs;
     expected_lhs(CDIV_H_S) /= d_rhs;
 
@@ -626,28 +643,30 @@ struct Functor_TestHalfOperators {
     expected_lhs(CDIV_D_H) = d_lhs;
     expected_lhs(CDIV_D_H) /= d_rhs;
 
-    test_add<half_t, half_t, half_t>(ADD_H_H, ADD_H_H_SZ);
-    test_add<float, half_t, float>(ADD_S_H, ADD_S_H_SZ);
-    test_add<double, half_t, double>(ADD_D_H, ADD_D_H_SZ);
-    test_add<short int, half_t, half_t>(ADD_SI_H, ADD_SI_H_SZ);
-    test_add<int, half_t, half_t>(ADD_I_H, ADD_I_H_SZ);
-    test_add<long int, half_t, half_t>(ADD_LI_H, ADD_LI_H_SZ);
-    test_add<long long int, half_t, half_t>(ADD_LLI_H, ADD_LLI_H_SZ);
-    test_add<half_t, float, float>(ADD_H_S, ADD_H_S_SZ);
-    test_add<half_t, double, double>(ADD_H_D, ADD_H_D_SZ);
-    test_add<half_t, short int, half_t>(ADD_H_SI, ADD_H_SI_SZ);
-    test_add<half_t, int, half_t>(ADD_H_I, ADD_H_I_SZ);
-    test_add<half_t, long int, half_t>(ADD_H_LI, ADD_H_LI_SZ);
-    test_add<half_t, long long int, half_t>(ADD_H_LLI, ADD_H_LLI_SZ);
-
-    // Check for potential overflow due to negative half_t -> unsigned integral
-    // cast
+    test_add<half_type, half_type, half_type>(ADD_H_H, ADD_H_H_SZ);
+    test_add<float, half_type, float>(ADD_S_H, ADD_S_H_SZ);
+    test_add<double, half_type, double>(ADD_D_H, ADD_D_H_SZ);
+    test_add<short int, half_type, half_type>(ADD_SI_H, ADD_SI_H_SZ);
+    test_add<int, half_type, half_type>(ADD_I_H, ADD_I_H_SZ);
+    test_add<long int, half_type, half_type>(ADD_LI_H, ADD_LI_H_SZ);
+    test_add<long long int, half_type, half_type>(ADD_LLI_H, ADD_LLI_H_SZ);
+    test_add<half_type, float, float>(ADD_H_S, ADD_H_S_SZ);
+    test_add<half_type, double, double>(ADD_H_D, ADD_H_D_SZ);
+    test_add<half_type, short int, half_type>(ADD_H_SI, ADD_H_SI_SZ);
+    test_add<half_type, int, half_type>(ADD_H_I, ADD_H_I_SZ);
+    test_add<half_type, long int, half_type>(ADD_H_LI, ADD_H_LI_SZ);
+    test_add<half_type, long long int, half_type>(ADD_H_LLI, ADD_H_LLI_SZ);
+
+    // Check for potential overflow due to negative half_type -> unsigned
+    // integral cast
     if (h_lhs >= 0) {
-      test_add<unsigned short int, half_t, half_t>(ADD_USI_H, ADD_USI_H_SZ);
-      test_add<unsigned int, half_t, half_t>(ADD_UI_H, ADD_UI_H_SZ);
-      test_add<unsigned long int, half_t, half_t>(ADD_ULI_H, ADD_ULI_H_SZ);
-      test_add<unsigned long long int, half_t, half_t>(ADD_ULLI_H,
-                                                       ADD_ULLI_H_SZ);
+      test_add<unsigned short int, half_type, half_type>(ADD_USI_H,
+                                                         ADD_USI_H_SZ);
+      test_add<unsigned int, half_type, half_type>(ADD_UI_H, ADD_UI_H_SZ);
+      test_add<unsigned long int, half_type, half_type>(ADD_ULI_H,
+                                                        ADD_ULI_H_SZ);
+      test_add<unsigned long long int, half_type, half_type>(ADD_ULLI_H,
+                                                             ADD_ULLI_H_SZ);
     } else {
       actual_lhs(ADD_USI_H)     = expected_lhs(ADD_USI_H);
       actual_lhs(ADD_USI_H_SZ)  = expected_lhs(ADD_USI_H_SZ);
@@ -659,14 +678,16 @@ struct Functor_TestHalfOperators {
       actual_lhs(ADD_ULLI_H_SZ) = expected_lhs(ADD_ULLI_H_SZ);
     }
 
-    // Check for potential overflow due to negative half_t -> unsigned integral
-    // cast
+    // Check for potential overflow due to negative half_type -> unsigned
+    // integral cast
     if (h_rhs >= 0) {
-      test_add<half_t, unsigned short int, half_t>(ADD_H_USI, ADD_H_USI_SZ);
-      test_add<half_t, unsigned int, half_t>(ADD_H_UI, ADD_H_UI_SZ);
-      test_add<half_t, unsigned long int, half_t>(ADD_H_ULI, ADD_H_ULI_SZ);
-      test_add<half_t, unsigned long long int, half_t>(ADD_H_ULLI,
-                                                       ADD_H_ULLI_SZ);
+      test_add<half_type, unsigned short int, half_type>(ADD_H_USI,
+                                                         ADD_H_USI_SZ);
+      test_add<half_type, unsigned int, half_type>(ADD_H_UI, ADD_H_UI_SZ);
+      test_add<half_type, unsigned long int, half_type>(ADD_H_ULI,
+                                                        ADD_H_ULI_SZ);
+      test_add<half_type, unsigned long long int, half_type>(ADD_H_ULLI,
+                                                             ADD_H_ULLI_SZ);
     } else {
       actual_lhs(ADD_H_USI)     = expected_lhs(ADD_H_USI);
       actual_lhs(ADD_H_USI_SZ)  = expected_lhs(ADD_H_USI_SZ);
@@ -678,28 +699,30 @@ struct Functor_TestHalfOperators {
       actual_lhs(ADD_H_ULLI_SZ) = expected_lhs(ADD_H_ULLI_SZ);
     }
 
-    test_sub<half_t, half_t, half_t>(SUB_H_H, SUB_H_H_SZ);
-    test_sub<float, half_t, float>(SUB_S_H, SUB_S_H_SZ);
-    test_sub<double, half_t, double>(SUB_D_H, SUB_D_H_SZ);
-    test_sub<short int, half_t, half_t>(SUB_SI_H, SUB_SI_H_SZ);
-    test_sub<int, half_t, half_t>(SUB_I_H, SUB_I_H_SZ);
-    test_sub<long int, half_t, half_t>(SUB_LI_H, SUB_LI_H_SZ);
-    test_sub<long long int, half_t, half_t>(SUB_LLI_H, SUB_LLI_H_SZ);
-    test_sub<half_t, float, float>(SUB_H_S, SUB_H_S_SZ);
-    test_sub<half_t, double, double>(SUB_H_D, SUB_H_D_SZ);
-    test_sub<half_t, short int, half_t>(SUB_H_SI, SUB_H_SI_SZ);
-    test_sub<half_t, int, half_t>(SUB_H_I, SUB_H_I_SZ);
-    test_sub<half_t, long int, half_t>(SUB_H_LI, SUB_H_LI_SZ);
-    test_sub<half_t, long long int, half_t>(SUB_H_LLI, SUB_H_LLI_SZ);
-
-    // Check for potential overflow due to negative half_t -> unsigned integral
-    // cast
-    if (h_lhs >= half_t(0)) {
-      test_sub<unsigned short int, half_t, half_t>(SUB_USI_H, SUB_USI_H_SZ);
-      test_sub<unsigned int, half_t, half_t>(SUB_UI_H, SUB_UI_H_SZ);
-      test_sub<unsigned long int, half_t, half_t>(SUB_ULI_H, SUB_ULI_H_SZ);
-      test_sub<unsigned long long int, half_t, half_t>(SUB_ULLI_H,
-                                                       SUB_ULLI_H_SZ);
+    test_sub<half_type, half_type, half_type>(SUB_H_H, SUB_H_H_SZ);
+    test_sub<float, half_type, float>(SUB_S_H, SUB_S_H_SZ);
+    test_sub<double, half_type, double>(SUB_D_H, SUB_D_H_SZ);
+    test_sub<short int, half_type, half_type>(SUB_SI_H, SUB_SI_H_SZ);
+    test_sub<int, half_type, half_type>(SUB_I_H, SUB_I_H_SZ);
+    test_sub<long int, half_type, half_type>(SUB_LI_H, SUB_LI_H_SZ);
+    test_sub<long long int, half_type, half_type>(SUB_LLI_H, SUB_LLI_H_SZ);
+    test_sub<half_type, float, float>(SUB_H_S, SUB_H_S_SZ);
+    test_sub<half_type, double, double>(SUB_H_D, SUB_H_D_SZ);
+    test_sub<half_type, short int, half_type>(SUB_H_SI, SUB_H_SI_SZ);
+    test_sub<half_type, int, half_type>(SUB_H_I, SUB_H_I_SZ);
+    test_sub<half_type, long int, half_type>(SUB_H_LI, SUB_H_LI_SZ);
+    test_sub<half_type, long long int, half_type>(SUB_H_LLI, SUB_H_LLI_SZ);
+
+    // Check for potential overflow due to negative half_type -> unsigned
+    // integral cast
+    if (h_lhs >= half_type(0)) {
+      test_sub<unsigned short int, half_type, half_type>(SUB_USI_H,
+                                                         SUB_USI_H_SZ);
+      test_sub<unsigned int, half_type, half_type>(SUB_UI_H, SUB_UI_H_SZ);
+      test_sub<unsigned long int, half_type, half_type>(SUB_ULI_H,
+                                                        SUB_ULI_H_SZ);
+      test_sub<unsigned long long int, half_type, half_type>(SUB_ULLI_H,
+                                                             SUB_ULLI_H_SZ);
     } else {
       actual_lhs(SUB_USI_H)     = expected_lhs(SUB_USI_H);
       actual_lhs(SUB_USI_H_SZ)  = expected_lhs(SUB_USI_H_SZ);
@@ -711,14 +734,16 @@ struct Functor_TestHalfOperators {
       actual_lhs(SUB_ULLI_H_SZ) = expected_lhs(SUB_ULLI_H_SZ);
     }
 
-    // Check for potential overflow due to negative half_t -> unsigned integral
-    // cast
-    if (h_rhs >= half_t(0)) {
-      test_sub<half_t, unsigned short int, half_t>(SUB_H_USI, SUB_H_USI_SZ);
-      test_sub<half_t, unsigned int, half_t>(SUB_H_UI, SUB_H_UI_SZ);
-      test_sub<half_t, unsigned long int, half_t>(SUB_H_ULI, SUB_H_ULI_SZ);
-      test_sub<half_t, unsigned long long int, half_t>(SUB_H_ULLI,
-                                                       SUB_H_ULLI_SZ);
+    // Check for potential overflow due to negative half_type -> unsigned
+    // integral cast
+    if (h_rhs >= half_type(0)) {
+      test_sub<half_type, unsigned short int, half_type>(SUB_H_USI,
+                                                         SUB_H_USI_SZ);
+      test_sub<half_type, unsigned int, half_type>(SUB_H_UI, SUB_H_UI_SZ);
+      test_sub<half_type, unsigned long int, half_type>(SUB_H_ULI,
+                                                        SUB_H_ULI_SZ);
+      test_sub<half_type, unsigned long long int, half_type>(SUB_H_ULLI,
+                                                             SUB_H_ULLI_SZ);
     } else {
       actual_lhs(SUB_H_USI)     = expected_lhs(SUB_H_USI);
       actual_lhs(SUB_H_USI_SZ)  = expected_lhs(SUB_H_USI_SZ);
@@ -730,28 +755,30 @@ struct Functor_TestHalfOperators {
       actual_lhs(SUB_H_ULLI_SZ) = expected_lhs(SUB_H_ULLI_SZ);
     }
 
-    test_mul<half_t, half_t, half_t>(MUL_H_H, MUL_H_H_SZ);
-    test_mul<float, half_t, float>(MUL_S_H, MUL_S_H_SZ);
-    test_mul<double, half_t, double>(MUL_D_H, MUL_D_H_SZ);
-    test_mul<short int, half_t, half_t>(MUL_SI_H, MUL_SI_H_SZ);
-    test_mul<int, half_t, half_t>(MUL_I_H, MUL_I_H_SZ);
-    test_mul<long int, half_t, half_t>(MUL_LI_H, MUL_LI_H_SZ);
-    test_mul<long long int, half_t, half_t>(MUL_LLI_H, MUL_LLI_H_SZ);
-    test_mul<half_t, float, float>(MUL_H_S, MUL_H_S_SZ);
-    test_mul<half_t, double, double>(MUL_H_D, MUL_H_D_SZ);
-    test_mul<half_t, short int, half_t>(MUL_H_SI, MUL_H_SI_SZ);
-    test_mul<half_t, int, half_t>(MUL_H_I, MUL_H_I_SZ);
-    test_mul<half_t, long int, half_t>(MUL_H_LI, MUL_H_LI_SZ);
-    test_mul<half_t, long long int, half_t>(MUL_H_LLI, MUL_H_LLI_SZ);
-
-    // Check for potential overflow due to negative half_t -> unsigned integral
-    // cast
-    if (h_lhs >= half_t(0)) {
-      test_mul<unsigned short int, half_t, half_t>(MUL_USI_H, MUL_USI_H_SZ);
-      test_mul<unsigned int, half_t, half_t>(MUL_UI_H, MUL_UI_H_SZ);
-      test_mul<unsigned long int, half_t, half_t>(MUL_ULI_H, MUL_ULI_H_SZ);
-      test_mul<unsigned long long int, half_t, half_t>(MUL_ULLI_H,
-                                                       MUL_ULLI_H_SZ);
+    test_mul<half_type, half_type, half_type>(MUL_H_H, MUL_H_H_SZ);
+    test_mul<float, half_type, float>(MUL_S_H, MUL_S_H_SZ);
+    test_mul<double, half_type, double>(MUL_D_H, MUL_D_H_SZ);
+    test_mul<short int, half_type, half_type>(MUL_SI_H, MUL_SI_H_SZ);
+    test_mul<int, half_type, half_type>(MUL_I_H, MUL_I_H_SZ);
+    test_mul<long int, half_type, half_type>(MUL_LI_H, MUL_LI_H_SZ);
+    test_mul<long long int, half_type, half_type>(MUL_LLI_H, MUL_LLI_H_SZ);
+    test_mul<half_type, float, float>(MUL_H_S, MUL_H_S_SZ);
+    test_mul<half_type, double, double>(MUL_H_D, MUL_H_D_SZ);
+    test_mul<half_type, short int, half_type>(MUL_H_SI, MUL_H_SI_SZ);
+    test_mul<half_type, int, half_type>(MUL_H_I, MUL_H_I_SZ);
+    test_mul<half_type, long int, half_type>(MUL_H_LI, MUL_H_LI_SZ);
+    test_mul<half_type, long long int, half_type>(MUL_H_LLI, MUL_H_LLI_SZ);
+
+    // Check for potential overflow due to negative half_type -> unsigned
+    // integral cast
+    if (h_lhs >= half_type(0)) {
+      test_mul<unsigned short int, half_type, half_type>(MUL_USI_H,
+                                                         MUL_USI_H_SZ);
+      test_mul<unsigned int, half_type, half_type>(MUL_UI_H, MUL_UI_H_SZ);
+      test_mul<unsigned long int, half_type, half_type>(MUL_ULI_H,
+                                                        MUL_ULI_H_SZ);
+      test_mul<unsigned long long int, half_type, half_type>(MUL_ULLI_H,
+                                                             MUL_ULLI_H_SZ);
     } else {
       actual_lhs(MUL_USI_H)     = expected_lhs(MUL_USI_H);
       actual_lhs(MUL_UI_H)      = expected_lhs(MUL_UI_H);
@@ -763,14 +790,16 @@ struct Functor_TestHalfOperators {
       actual_lhs(MUL_ULLI_H_SZ) = expected_lhs(MUL_ULLI_H_SZ);
     }
 
-    // Check for potential overflow due to negative half_t -> unsigned integral
-    // cast
-    if (h_rhs >= half_t(0)) {
-      test_mul<half_t, unsigned short int, half_t>(MUL_H_USI, MUL_H_USI_SZ);
-      test_mul<half_t, unsigned int, half_t>(MUL_H_UI, MUL_H_UI_SZ);
-      test_mul<half_t, unsigned long int, half_t>(MUL_H_ULI, MUL_H_ULI_SZ);
-      test_mul<half_t, unsigned long long int, half_t>(MUL_H_ULLI,
-                                                       MUL_H_ULLI_SZ);
+    // Check for potential overflow due to negative half_type -> unsigned
+    // integral cast
+    if (h_rhs >= half_type(0)) {
+      test_mul<half_type, unsigned short int, half_type>(MUL_H_USI,
+                                                         MUL_H_USI_SZ);
+      test_mul<half_type, unsigned int, half_type>(MUL_H_UI, MUL_H_UI_SZ);
+      test_mul<half_type, unsigned long int, half_type>(MUL_H_ULI,
+                                                        MUL_H_ULI_SZ);
+      test_mul<half_type, unsigned long long int, half_type>(MUL_H_ULLI,
+                                                             MUL_H_ULLI_SZ);
     } else {
       actual_lhs(MUL_H_USI)     = expected_lhs(MUL_H_USI);
       actual_lhs(MUL_H_UI)      = expected_lhs(MUL_H_UI);
@@ -782,22 +811,23 @@ struct Functor_TestHalfOperators {
       actual_lhs(MUL_H_ULLI_SZ) = expected_lhs(MUL_H_ULLI_SZ);
     }
 
-    test_div<half_t, half_t, half_t>(DIV_H_H, DIV_H_H_SZ);
-    test_div<float, half_t, float>(DIV_S_H, DIV_S_H_SZ);
-    test_div<double, half_t, double>(DIV_D_H, DIV_D_H_SZ);
-    test_div<short int, half_t, half_t>(DIV_SI_H, DIV_SI_H_SZ);
-    test_div<int, half_t, half_t>(DIV_I_H, DIV_I_H_SZ);
-    test_div<long int, half_t, half_t>(DIV_LI_H, DIV_LI_H_SZ);
-    test_div<long long int, half_t, half_t>(DIV_LLI_H, DIV_LLI_H_SZ);
-    test_div<half_t, float, float>(DIV_H_S, DIV_H_S_SZ);
-    test_div<half_t, double, double>(DIV_H_D, DIV_H_D_SZ);
-
-    // Check for division by zero due to truncation by half_t -> integral cast
-    if (h_rhs >= half_t(1) || h_rhs <= half_t(-1)) {
-      test_div<half_t, short int, half_t>(DIV_H_SI, DIV_H_SI_SZ);
-      test_div<half_t, int, half_t>(DIV_H_I, DIV_H_I_SZ);
-      test_div<half_t, long int, half_t>(DIV_H_LI, DIV_H_LI_SZ);
-      test_div<half_t, long long int, half_t>(DIV_H_LLI, DIV_H_LLI_SZ);
+    test_div<half_type, half_type, half_type>(DIV_H_H, DIV_H_H_SZ);
+    test_div<float, half_type, float>(DIV_S_H, DIV_S_H_SZ);
+    test_div<double, half_type, double>(DIV_D_H, DIV_D_H_SZ);
+    test_div<short int, half_type, half_type>(DIV_SI_H, DIV_SI_H_SZ);
+    test_div<int, half_type, half_type>(DIV_I_H, DIV_I_H_SZ);
+    test_div<long int, half_type, half_type>(DIV_LI_H, DIV_LI_H_SZ);
+    test_div<long long int, half_type, half_type>(DIV_LLI_H, DIV_LLI_H_SZ);
+    test_div<half_type, float, float>(DIV_H_S, DIV_H_S_SZ);
+    test_div<half_type, double, double>(DIV_H_D, DIV_H_D_SZ);
+
+    // Check for division by zero due to truncation by half_type -> integral
+    // cast
+    if (h_rhs >= half_type(1) || h_rhs <= half_type(-1)) {
+      test_div<half_type, short int, half_type>(DIV_H_SI, DIV_H_SI_SZ);
+      test_div<half_type, int, half_type>(DIV_H_I, DIV_H_I_SZ);
+      test_div<half_type, long int, half_type>(DIV_H_LI, DIV_H_LI_SZ);
+      test_div<half_type, long long int, half_type>(DIV_H_LLI, DIV_H_LLI_SZ);
     } else {
       actual_lhs(DIV_H_SI)     = expected_lhs(DIV_H_SI);
       actual_lhs(DIV_H_I)      = expected_lhs(DIV_H_I);
@@ -809,14 +839,16 @@ struct Functor_TestHalfOperators {
       actual_lhs(DIV_H_LLI_SZ) = expected_lhs(DIV_H_LLI_SZ);
     }
 
-    // Check for potential overflow due to negative half_t -> unsigned integral
-    // cast
-    if (h_lhs >= half_t(0)) {
-      test_div<unsigned short int, half_t, half_t>(DIV_USI_H, DIV_USI_H_SZ);
-      test_div<unsigned int, half_t, half_t>(DIV_UI_H, DIV_UI_H_SZ);
-      test_div<unsigned long int, half_t, half_t>(DIV_ULI_H, DIV_ULI_H_SZ);
-      test_div<unsigned long long int, half_t, half_t>(DIV_ULLI_H,
-                                                       DIV_ULLI_H_SZ);
+    // Check for potential overflow due to negative half_type -> unsigned
+    // integral cast
+    if (h_lhs >= half_type(0)) {
+      test_div<unsigned short int, half_type, half_type>(DIV_USI_H,
+                                                         DIV_USI_H_SZ);
+      test_div<unsigned int, half_type, half_type>(DIV_UI_H, DIV_UI_H_SZ);
+      test_div<unsigned long int, half_type, half_type>(DIV_ULI_H,
+                                                        DIV_ULI_H_SZ);
+      test_div<unsigned long long int, half_type, half_type>(DIV_ULLI_H,
+                                                             DIV_ULLI_H_SZ);
     } else {
       actual_lhs(DIV_USI_H)     = expected_lhs(DIV_USI_H);
       actual_lhs(DIV_UI_H)      = expected_lhs(DIV_UI_H);
@@ -828,13 +860,16 @@ struct Functor_TestHalfOperators {
       actual_lhs(DIV_ULLI_H_SZ) = expected_lhs(DIV_ULLI_H_SZ);
     }
 
-    // Check for division by zero due to truncation by half_t -> integral cast
-    if (h_rhs >= half_t(1)) {
-      test_div<half_t, unsigned short int, half_t>(DIV_H_USI, DIV_H_USI_SZ);
-      test_div<half_t, unsigned int, half_t>(DIV_H_UI, DIV_H_UI_SZ);
-      test_div<half_t, unsigned long int, half_t>(DIV_H_ULI, DIV_H_ULI_SZ);
-      test_div<half_t, unsigned long long int, half_t>(DIV_H_ULLI,
-                                                       DIV_H_ULLI_SZ);
+    // Check for division by zero due to truncation by half_type -> integral
+    // cast
+    if (h_rhs >= half_type(1)) {
+      test_div<half_type, unsigned short int, half_type>(DIV_H_USI,
+                                                         DIV_H_USI_SZ);
+      test_div<half_type, unsigned int, half_type>(DIV_H_UI, DIV_H_UI_SZ);
+      test_div<half_type, unsigned long int, half_type>(DIV_H_ULI,
+                                                        DIV_H_ULI_SZ);
+      test_div<half_type, unsigned long long int, half_type>(DIV_H_ULLI,
+                                                             DIV_H_ULLI_SZ);
     } else {
       actual_lhs(DIV_H_USI)     = expected_lhs(DIV_H_USI);
       actual_lhs(DIV_H_USI_SZ)  = expected_lhs(DIV_H_USI_SZ);
@@ -850,10 +885,10 @@ struct Functor_TestHalfOperators {
     actual_lhs(NEG)   = static_cast<double>(!h_lhs);
     expected_lhs(NEG) = !d_lhs;
 
-    actual_lhs(AND)   = static_cast<double>(half_t(0) && h_lhs);
+    actual_lhs(AND)   = static_cast<double>(half_type(0) && h_lhs);
     expected_lhs(AND) = double(0) && d_lhs;
 
-    actual_lhs(OR)   = static_cast<double>(h_lhs || half_t(1));
+    actual_lhs(OR)   = static_cast<double>(h_lhs || half_type(1));
     expected_lhs(OR) = d_lhs || double(1);
 
     actual_lhs(EQ)   = h_lhs == h_rhs;
@@ -877,21 +912,21 @@ struct Functor_TestHalfOperators {
     // actual_lhs(TW)   = h_lhs <=> h_rhs;  // Need C++20?
     // expected_lhs(TW) = d_lhs <=> d_rhs;  // Need C++20?
 
-    actual_lhs(PASS_BY_REF)   = cast_from_half<double>(accept_ref(h_lhs));
+    actual_lhs(PASS_BY_REF)   = static_cast<double>(accept_ref(h_lhs));
     expected_lhs(PASS_BY_REF) = d_lhs;
 
-    half_tmp = cast_from_half<float>(h_lhs);
+    half_tmp = static_cast<float>(h_lhs);
     tmp_ptr  = &(tmp_lhs = half_tmp);
     if (tmp_ptr != &tmp_lhs)
-      Kokkos::abort("Error in half_t address-of operator");
-    actual_lhs(AO_IMPL_HALF)   = cast_from_half<double>(*tmp_ptr);
+      Kokkos::abort("Error in half_type address-of operator");
+    actual_lhs(AO_IMPL_HALF)   = static_cast<double>(*tmp_ptr);
     expected_lhs(AO_IMPL_HALF) = d_lhs;
 
     tmp2_lhs = h_lhs;
     tmp_ptr  = &(tmp_lhs = tmp2_lhs);
     if (tmp_ptr != &tmp_lhs)
-      Kokkos::abort("Error in half_t address-of operator");
-    actual_lhs(AO_HALF_T)   = cast_from_half<double>(tmp_ptr[0]);
+      Kokkos::abort("Error in half_type address-of operator");
+    actual_lhs(AO_HALF_T)   = static_cast<double>(tmp_ptr[0]);
     expected_lhs(AO_HALF_T) = d_lhs;
 
     // TODO: Check upcasting and downcasting in large expressions involving
@@ -899,10 +934,17 @@ struct Functor_TestHalfOperators {
   }
 };
 
-void __test_half_operators(half_t h_lhs, half_t h_rhs) {
-  double epsilon = KOKKOS_HALF_T_IS_FLOAT ? FLT_EPSILON : FP16_EPSILON;
-  Functor_TestHalfOperators<ViewType> f_device(h_lhs, h_rhs);  // Run on device
-  Functor_TestHalfOperators<ViewTypeHost> f_host(h_lhs, h_rhs);  // Run on host
+template <class half_type>
+void __test_half_operators(half_type h_lhs, half_type h_rhs) {
+  double epsilon = FLT_EPSILON;
+
+  if (std::is_same<half_type, Kokkos::Experimental::half_t>::value)
+    epsilon = FP16_EPSILON;
+  if (std::is_same<half_type, Kokkos::Experimental::bhalf_t>::value)
+    epsilon = BF16_EPSILON;
+
+  Functor_TestHalfOperators<ViewType, half_type> f_device(h_lhs, h_rhs);
+  Functor_TestHalfOperators<ViewTypeHost, half_type> f_host(h_lhs, h_rhs);
   typename ViewType::HostMirror f_device_actual_lhs =
       Kokkos::create_mirror_view(f_device.actual_lhs);
   typename ViewType::HostMirror f_device_expected_lhs =
@@ -920,11 +962,12 @@ void __test_half_operators(half_t h_lhs, half_t h_rhs) {
   }
 
   // Test partial volatile support
-  volatile half_t _h_lhs = h_lhs;
-  volatile half_t _h_rhs = h_rhs;
-  Functor_TestHalfVolatileOperators<ViewType> f_volatile_device(_h_lhs, _h_rhs);
-  Functor_TestHalfVolatileOperators<ViewTypeHost> f_volatile_host(_h_lhs,
-                                                                  _h_rhs);
+  volatile half_type _h_lhs = h_lhs;
+  volatile half_type _h_rhs = h_rhs;
+  Functor_TestHalfVolatileOperators<ViewType, half_type> f_volatile_device(
+      _h_lhs, _h_rhs);
+  Functor_TestHalfVolatileOperators<ViewTypeHost, half_type> f_volatile_host(
+      _h_lhs, _h_rhs);
 
   ExecutionSpace().fence();
   Kokkos::deep_copy(f_device_actual_lhs, f_device.actual_lhs);
@@ -944,12 +987,12 @@ void __test_half_operators(half_t h_lhs, half_t h_rhs) {
 
   // is_trivially_copyable is false with the addition of explicit
   // copy constructors that are required for supporting reductions
-  // ASSERT_TRUE(std::is_trivially_copyable<half_t>::value);
+  // ASSERT_TRUE(std::is_trivially_copyable<half_type>::value);
 
   constexpr size_t n       = 2;
-  constexpr size_t n_bytes = sizeof(half_t) * n;
-  const half_t h_arr0 = half_t(0x89ab), h_arr1 = half_t(0xcdef);
-  half_t h_arr[n];
+  constexpr size_t n_bytes = sizeof(half_type) * n;
+  const half_type h_arr0 = half_type(0x89ab), h_arr1 = half_type(0xcdef);
+  half_type h_arr[n];
   char c_arr[n_bytes], *h_arr_ptr = nullptr;
   size_t i;
 
@@ -970,13 +1013,24 @@ void test_half_operators() {
   for (int i = -3; i < 2; i++) {
     // printf("%f OP %f\n", float(h_lhs + cast_to_half(i + 1)), float(h_rhs +
     // cast_to_half(i)));
-    __test_half_operators(h_lhs + cast_to_half(i + 1), h_rhs + cast_to_half(i));
+    __test_half_operators<half_t>(h_lhs + cast_to_half(i + 1),
+                                  h_rhs + cast_to_half(i));
     // TODO: __test_half_operators(h_lhs + cast_to_half(i + 1), half_t(0));
     // TODO: __test_half_operators(half_t(0), h_rhs + cast_to_half(i));
   }
 }
 
+void test_bhalf_operators() {
+  bhalf_t h_lhs = bhalf_t(0.23458), h_rhs = bhalf_t(0.67898);
+  for (int i = -2; i < 2; i++) {
+    // printf("%f OP %f\n", float(h_lhs + cast_to_bhalf(i + 1)), float(h_rhs +
+    // cast_to_bhalf(i)));
+    __test_half_operators<bhalf_t>(h_lhs + cast_to_bhalf(i + 1),
+                                   h_rhs + cast_to_bhalf(i));
+  }
+}
+
 TEST(TEST_CATEGORY, half_operators) { test_half_operators(); }
+TEST(TEST_CATEGORY, bhalf_operators) { test_bhalf_operators(); }
 }  // namespace Test
-#endif  // KOKKOS_ENABLE_SYCL
 #endif  // TESTHALFOPERATOR_HPP_
diff --git a/packages/kokkos/core/unit_test/TestMDRange.hpp b/packages/kokkos/core/unit_test/TestMDRange.hpp
index 57461be714cde62bdd9b370c834759b91a13da92..5ff87f8d9afed9fb4e8aebaadecf1788087a0870 100644
--- a/packages/kokkos/core/unit_test/TestMDRange.hpp
+++ b/packages/kokkos/core/unit_test/TestMDRange.hpp
@@ -124,6 +124,13 @@ struct TestMDRange_ReduceArray_2D {
       parallel_for(range_init, functor);  // Init the view to 3's
 
       double sums[array_size];
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
+      double *sums_ptr = sums;
+      parallel_reduce(range, functor, sums_ptr);
+      ASSERT_EQ(sums[0], 6 * N0 * N1);
+      ASSERT_EQ(sums[1], 3 * N0 * N1);
+#endif
+      Kokkos::fence("Fence before accessing result on the host");
       parallel_reduce(range, functor, sums);
 
       // Check output
diff --git a/packages/kokkos/core/unit_test/TestMDRange_g.hpp b/packages/kokkos/core/unit_test/TestMDRange_g.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c097b2961d4c6f8bf7561d043e359ab635b8a0d2
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestMDRange_g.hpp
@@ -0,0 +1,111 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+//#include <gtest/gtest.h>
+
+#include <Kokkos_Core.hpp>
+
+namespace Test {
+
+template <typename View>
+struct SumView {
+  const View m_view;
+  KOKKOS_FUNCTION void operator()(const int i, const int j, int& update) const {
+    update += m_view(i, j);
+  }
+
+  SumView(View view) : m_view(view) {}
+
+  int run() {
+    int sum_view = 0;
+    Kokkos::parallel_reduce(
+        Kokkos::MDRangePolicy<typename View::execution_space, Kokkos::Rank<2>>(
+            {0, 0}, {m_view.extent(0), m_view.extent(1)}),
+        *this, sum_view);
+    return sum_view;
+  }
+};
+
+template <typename ExecutionSpace>
+struct TestMDRangeLargeDeepCopy {
+  static void run() {
+    ExecutionSpace exec;
+    using MemorySpace = typename ExecutionSpace::memory_space;
+    // FIXME_SYCL
+#ifdef KOKKOS_ENABLE_SYCL
+    const int s = 13;
+#else
+    const int s = 45;
+#endif
+    const int step_sizes[2] = {1, 10000};
+    Kokkos::View<int**, MemorySpace> view("v", s * step_sizes[0],
+                                          (s + 1) * step_sizes[1]);
+    Kokkos::deep_copy(exec, view, 1);
+    for (int step = 2; step < view.extent_int(0); ++step) {
+      auto subview =
+          Kokkos::subview(view, std::make_pair(0, (step + 1) * step_sizes[0]),
+                          std::make_pair(0, (step + 2) * step_sizes[1]));
+      Kokkos::View<int**, MemorySpace> subview_copy(
+          "subview_copy", subview.extent(0), subview.extent(1));
+      Kokkos::deep_copy(TEST_EXECSPACE{}, subview_copy, subview);
+      exec.fence();
+
+      SumView<decltype(subview)> sum_subview(subview);
+      int total_subview = sum_subview.run();
+      SumView<decltype(subview_copy)> sum_subview_copy(subview_copy);
+      int total_subview_copy = sum_subview_copy.run();
+
+      ASSERT_EQ(total_subview, total_subview_copy);
+    }
+  }
+};
+
+// Check that deep_copy with a large range for a dimension different from the
+// first one works successfully. There was a problem with this in the Cuda
+// backend.
+TEST(TEST_CATEGORY, mdrange_large_deep_copy) {
+  TestMDRangeLargeDeepCopy<TEST_EXECSPACE>::run();
+}
+
+}  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestMathematicalConstants.hpp b/packages/kokkos/core/unit_test/TestMathematicalConstants.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2043d727074eb81c8962748061f93bab27cd9d7e
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestMathematicalConstants.hpp
@@ -0,0 +1,151 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+
+template <class T>
+KOKKOS_FUNCTION T *take_address_of(T &arg) {
+  return &arg;
+}
+
+template <class T>
+KOKKOS_FUNCTION void take_by_value(T) {}
+
+#if defined(KOKKOS_ENABLE_CXX17)
+#define DEFINE_MATH_CONSTANT_TRAIT(TRAIT)                          \
+  template <class T>                                               \
+  struct TRAIT {                                                   \
+    static constexpr T value = Kokkos::Experimental::TRAIT##_v<T>; \
+  }
+#else
+#define DEFINE_MATH_CONSTANT_TRAIT(TRAIT)                                    \
+  template <class>                                                           \
+  struct TRAIT;                                                              \
+  template <>                                                                \
+  struct TRAIT<float> {                                                      \
+    static constexpr float value = Kokkos::Experimental::TRAIT##_v<float>;   \
+  };                                                                         \
+  template <>                                                                \
+  struct TRAIT<double> {                                                     \
+    static constexpr double value = Kokkos::Experimental::TRAIT##_v<double>; \
+  };                                                                         \
+  template <>                                                                \
+  struct TRAIT<long double> {                                                \
+    static constexpr long double value =                                     \
+        Kokkos::Experimental::TRAIT##_v<long double>;                        \
+  };                                                                         \
+  constexpr float TRAIT<float>::value;                                       \
+  constexpr double TRAIT<double>::value;                                     \
+  constexpr long double TRAIT<long double>::value
+#endif
+
+DEFINE_MATH_CONSTANT_TRAIT(e);
+DEFINE_MATH_CONSTANT_TRAIT(log2e);
+DEFINE_MATH_CONSTANT_TRAIT(log10e);
+DEFINE_MATH_CONSTANT_TRAIT(pi);
+DEFINE_MATH_CONSTANT_TRAIT(inv_pi);
+DEFINE_MATH_CONSTANT_TRAIT(inv_sqrtpi);
+DEFINE_MATH_CONSTANT_TRAIT(ln2);
+DEFINE_MATH_CONSTANT_TRAIT(ln10);
+DEFINE_MATH_CONSTANT_TRAIT(sqrt2);
+DEFINE_MATH_CONSTANT_TRAIT(sqrt3);
+DEFINE_MATH_CONSTANT_TRAIT(inv_sqrt3);
+DEFINE_MATH_CONSTANT_TRAIT(egamma);
+DEFINE_MATH_CONSTANT_TRAIT(phi);
+
+template <class Space, class Trait>
+struct TestMathematicalConstants {
+  using T = std::decay_t<decltype(Trait::value)>;
+
+  TestMathematicalConstants() { run(); }
+
+  void run() const {
+    int errors = 0;
+    Kokkos::parallel_reduce(Kokkos::RangePolicy<Space, Trait>(0, 1), *this,
+                            errors);
+    ASSERT_EQ(errors, 0);
+    (void)take_address_of(Trait::value);  // use on host
+  }
+
+  KOKKOS_FUNCTION void operator()(Trait, int, int &) const { use_on_device(); }
+
+  KOKKOS_FUNCTION void use_on_device() const {
+#if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_ENABLE_OPENMPTARGET)
+    take_by_value(Trait::value);
+#else
+    (void)take_address_of(Trait::value);
+#endif
+  }
+};
+
+#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \
+    defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET)
+#define TEST_MATH_CONSTANT(TRAIT)                               \
+  TEST(TEST_CATEGORY, mathematical_constants_##TRAIT) {         \
+    TestMathematicalConstants<TEST_EXECSPACE, TRAIT<float>>();  \
+    TestMathematicalConstants<TEST_EXECSPACE, TRAIT<double>>(); \
+  }
+#else
+#define TEST_MATH_CONSTANT(TRAIT)                                    \
+  TEST(TEST_CATEGORY, mathematical_constants_##TRAIT) {              \
+    TestMathematicalConstants<TEST_EXECSPACE, TRAIT<float>>();       \
+    TestMathematicalConstants<TEST_EXECSPACE, TRAIT<double>>();      \
+    TestMathematicalConstants<TEST_EXECSPACE, TRAIT<long double>>(); \
+  }
+#endif
+
+TEST_MATH_CONSTANT(e)
+TEST_MATH_CONSTANT(log2e)
+TEST_MATH_CONSTANT(log10e)
+TEST_MATH_CONSTANT(pi)
+TEST_MATH_CONSTANT(inv_pi)
+TEST_MATH_CONSTANT(inv_sqrtpi)
+TEST_MATH_CONSTANT(ln2)
+TEST_MATH_CONSTANT(ln10)
+TEST_MATH_CONSTANT(sqrt2)
+TEST_MATH_CONSTANT(sqrt3)
+TEST_MATH_CONSTANT(inv_sqrt3)
+TEST_MATH_CONSTANT(egamma)
+TEST_MATH_CONSTANT(phi)
diff --git a/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp b/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp
index b38871afaaf6a277f6080e34f1a81aac31f6fb93..0e1514a33f4d95733bc88f486cff8a5026411116 100644
--- a/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp
+++ b/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp
@@ -59,6 +59,12 @@
 #define MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS
 #endif
 
+// WORKAROUND icpx changing default FP model when optimization level is >= 1
+// using -fp-model=precise works too
+#if defined(__INTEL_LLVM_COMPILER)
+#define KOKKOS_IMPL_WORKAROUND_INTEL_LLVM_DEFAULT_FLOATING_POINT_MODEL
+#endif
+
 // clang-format off
 template <class>
 struct math_unary_function_return_type;
@@ -342,7 +348,13 @@ DEFINE_UNARY_FUNCTION_EVAL(asinh, 4);
 DEFINE_UNARY_FUNCTION_EVAL(acosh, 2);
 DEFINE_UNARY_FUNCTION_EVAL(atanh, 2);
 
+#if defined(__APPLE__)
+// Apple's standard library implementation seems to have a poor implementation
+DEFINE_UNARY_FUNCTION_EVAL(erf, 5);
+#else
 DEFINE_UNARY_FUNCTION_EVAL(erf, 2);
+#endif
+
 DEFINE_UNARY_FUNCTION_EVAL(erfc, 5);
 // has a larger error due to some impls doing integer exact.
 // We cast always to double leading to larger difference when comparing our
@@ -415,8 +427,6 @@ struct TestMathUnaryFunction : FloatingPointComparison {
   Arg val_[N];
   Ret res_[N];
   TestMathUnaryFunction(const Arg (&val)[N]) {
-    std::cout << math_function_name<Func>::name << "("
-              << type_helper<Arg>::name() << ")\n";
     std::copy(val, val + N, val_);
     std::transform(val, val + N, res_,
                    [](auto x) { return Func::eval_std(x); });
@@ -425,7 +435,9 @@ struct TestMathUnaryFunction : FloatingPointComparison {
   void run() {
     int errors = 0;
     Kokkos::parallel_reduce(Kokkos::RangePolicy<Space>(0, N), *this, errors);
-    ASSERT_EQ(errors, 0);
+    ASSERT_EQ(errors, 0) << "Failed check no error for "
+                         << math_function_name<Func>::name << "("
+                         << type_helper<Arg>::name() << ")";
   }
   KOKKOS_FUNCTION void operator()(int i, int& e) const {
     bool ar = compare(Func::eval(val_[i]), res_[i], Func::ulp_factor());
@@ -456,15 +468,15 @@ struct TestMathBinaryFunction : FloatingPointComparison {
   Ret res_;
   TestMathBinaryFunction(Arg1 val1, Arg2 val2)
       : val1_(val1), val2_(val2), res_(Func::eval_std(val1, val2)) {
-    std::cout << math_function_name<Func>::name << "("
-              << type_helper<Arg1>::name() << ", " << type_helper<Arg2>::name()
-              << ")\n";
     run();
   }
   void run() {
     int errors = 0;
     Kokkos::parallel_reduce(Kokkos::RangePolicy<Space>(0, 1), *this, errors);
-    ASSERT_EQ(errors, 0);
+    ASSERT_EQ(errors, 0) << "Failed check no error for "
+                         << math_function_name<Func>::name << "("
+                         << type_helper<Arg1>::name() << ", "
+                         << type_helper<Arg2>::name() << ")";
   }
   KOKKOS_FUNCTION void operator()(int, int& e) const {
     bool ar = compare(Func::eval(val1_, val2_), res_, Func::ulp_factor());
@@ -917,9 +929,7 @@ struct TestAbsoluteValueFunction {
     using Kokkos::Experimental::isinf;
     using Kokkos::Experimental::isnan;
     if (abs(-0.) != 0.
-    // WORKAROUND icpx changing default FP model when optimization level is >= 1
-    // using -fp-model=precise works too
-#ifndef __INTEL_LLVM_COMPILER
+#ifndef KOKKOS_IMPL_WORKAROUND_INTEL_LLVM_DEFAULT_FLOATING_POINT_MODEL
         || !isinf(abs(-INFINITY)) || !isnan(abs(-NAN))
 #endif
     ) {
@@ -942,3 +952,73 @@ struct TestAbsoluteValueFunction {
 TEST(TEST_CATEGORY, mathematical_functions_absolute_value) {
   TestAbsoluteValueFunction<TEST_EXECSPACE>();
 }
+
+template <class Space>
+struct TestIsNaN {
+  TestIsNaN() { run(); }
+  void run() const {
+    int errors = 0;
+    Kokkos::parallel_reduce(Kokkos::RangePolicy<Space>(0, 1), *this, errors);
+    ASSERT_EQ(errors, 0);
+  }
+  KOKKOS_FUNCTION void operator()(int, int& e) const {
+    using Kokkos::Experimental::isnan;
+    using Kokkos::Experimental::quiet_NaN;
+    using Kokkos::Experimental::signaling_NaN;
+    if (isnan(1) || isnan(INT_MAX)) {
+      ++e;
+      KOKKOS_IMPL_DO_NOT_USE_PRINTF("failed isnan(integral)\n");
+    }
+    if (isnan(2.f)
+#ifndef KOKKOS_IMPL_WORKAROUND_INTEL_LLVM_DEFAULT_FLOATING_POINT_MODEL
+        || !isnan(quiet_NaN<float>::value) ||
+        !isnan(signaling_NaN<float>::value)
+#endif
+
+    ) {
+      ++e;
+      KOKKOS_IMPL_DO_NOT_USE_PRINTF("failed isnan(float)\n");
+    }
+    if (isnan(3.)
+#ifndef KOKKOS_IMPL_WORKAROUND_INTEL_LLVM_DEFAULT_FLOATING_POINT_MODEL
+        || !isnan(quiet_NaN<double>::value) ||
+        !isnan(signaling_NaN<double>::value)
+#endif
+    ) {
+      ++e;
+      KOKKOS_IMPL_DO_NOT_USE_PRINTF("failed isnan(double)\n");
+    }
+#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS
+    if (isnan(4.l)
+#ifndef KOKKOS_IMPL_WORKAROUND_INTEL_LLVM_DEFAULT_FLOATING_POINT_MODEL
+        || !isnan(quiet_NaN<long double>::value) ||
+        !isnan(signaling_NaN<long double>::value)
+#endif
+    ) {
+      ++e;
+      KOKKOS_IMPL_DO_NOT_USE_PRINTF("failed isnan(long double)\n");
+    }
+#endif
+    // special values
+    if (isnan(INFINITY)
+#ifndef KOKKOS_IMPL_WORKAROUND_INTEL_LLVM_DEFAULT_FLOATING_POINT_MODEL
+        || !isnan(NAN)
+#endif
+    ) {
+      ++e;
+      KOKKOS_IMPL_DO_NOT_USE_PRINTF(
+          "failed isnan(floating_point) special values\n");
+    }
+
+    static_assert(std::is_same<decltype(isnan(1)), bool>::value, "");
+    static_assert(std::is_same<decltype(isnan(2.f)), bool>::value, "");
+    static_assert(std::is_same<decltype(isnan(3.)), bool>::value, "");
+#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS
+    static_assert(std::is_same<decltype(isnan(4.l)), bool>::value, "");
+#endif
+  }
+};
+
+TEST(TEST_CATEGORY, mathematical_functions_isnan) {
+  TestIsNaN<TEST_EXECSPACE>();
+}
diff --git a/packages/kokkos/core/unit_test/TestMathematicalSpecialFunctions.hpp b/packages/kokkos/core/unit_test/TestMathematicalSpecialFunctions.hpp
index 2d9b4db6bdef50c48a7010d907fb9abf02e05c35..45d8bd08ab270818f26ee755f2e29de3037065e4 100644
--- a/packages/kokkos/core/unit_test/TestMathematicalSpecialFunctions.hpp
+++ b/packages/kokkos/core/unit_test/TestMathematicalSpecialFunctions.hpp
@@ -1857,10 +1857,14 @@ TEST(TEST_CATEGORY, mathspecialfunc_expint1) {
   test.testit();
 }
 
+// FIXME_OPENMPTARGET: This unit test fails with a misaligned address error at
+// runtime with LLVM/13.
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
 TEST(TEST_CATEGORY, mathspecialfunc_errorfunc) {
   TestComplexErrorFunction<TEST_EXECSPACE> test;
   test.testit();
 }
+#endif
 
 TEST(TEST_CATEGORY, mathspecialfunc_cbesselj0y0) {
   TestComplexBesselJ0Y0Function<TEST_EXECSPACE> test;
diff --git a/packages/kokkos/core/unit_test/TestMinMaxClamp.hpp b/packages/kokkos/core/unit_test/TestMinMaxClamp.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..abf24ef97cbbf6a203450eab72c2499ec9d25842
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestMinMaxClamp.hpp
@@ -0,0 +1,333 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+
+// FIXME C++17
+#define STATIC_ASSERT(cond) static_assert(cond, "");
+
+namespace Test {
+template <class T>
+struct Greater {
+  KOKKOS_FUNCTION constexpr bool operator()(T const& lhs, T const& rhs) {
+    return lhs > rhs;
+  }
+};
+
+struct PairIntCompareFirst {
+  int first;
+  int second;
+
+ private:
+  friend KOKKOS_FUNCTION constexpr bool operator<(
+      PairIntCompareFirst const& lhs, PairIntCompareFirst const& rhs) {
+    return lhs.first < rhs.first;
+  }
+};
+}  // namespace Test
+
+// ----------------------------------------------------------
+// test max()
+// ----------------------------------------------------------
+TEST(TEST_CATEGORY, max) {
+  namespace KE = Kokkos::Experimental;
+
+  int a = 1;
+  int b = 2;
+  EXPECT_TRUE(KE::max(a, b) == 2);
+
+  a = 3;
+  b = 1;
+  EXPECT_TRUE(KE::max(a, b) == 3);
+
+  STATIC_ASSERT(KE::max(1, 2) == 2);
+  STATIC_ASSERT(KE::max(1, 2, ::Test::Greater<int>{}) == 1);
+
+  EXPECT_TRUE(KE::max({3.f, -1.f, 0.f}) == 3.f);
+
+  STATIC_ASSERT(KE::max({3, -1, 0}) == 3);
+  STATIC_ASSERT(KE::max({3, -1, 0}, ::Test::Greater<int>{}) == -1);
+
+  STATIC_ASSERT(KE::max({
+                            ::Test::PairIntCompareFirst{255, 0},
+                            ::Test::PairIntCompareFirst{255, 1},
+                            ::Test::PairIntCompareFirst{0, 2},
+                            ::Test::PairIntCompareFirst{0, 3},
+                            ::Test::PairIntCompareFirst{255, 4},
+                            ::Test::PairIntCompareFirst{0, 5},
+                        })
+                    .second == 0);  // leftmost element
+}
+
+template <class ViewType>
+struct StdAlgoMinMaxOpsTestMax {
+  ViewType m_view;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int& ind) const {
+    namespace KE = Kokkos::Experimental;
+    auto v1      = 10.;
+    if (KE::max(v1, m_view(ind)) == 10.) {
+      m_view(ind) = 6.;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  StdAlgoMinMaxOpsTestMax(ViewType aIn) : m_view(aIn) {}
+};
+
+TEST(TEST_CATEGORY, max_within_parfor) {
+  namespace KE = Kokkos::Experimental;
+
+  using view_t = Kokkos::View<double*>;
+  view_t a("a", 10);
+
+  StdAlgoMinMaxOpsTestMax<view_t> fnc(a);
+  Kokkos::parallel_for(a.extent(0), fnc);
+  auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
+  for (int i = 0; i < 10; ++i) {
+    EXPECT_DOUBLE_EQ(a_h(0), 6.);
+  }
+}
+
+// ----------------------------------------------------------
+// test min()
+// ----------------------------------------------------------
+TEST(TEST_CATEGORY, min) {
+  namespace KE = Kokkos::Experimental;
+
+  int a = 1;
+  int b = 2;
+  EXPECT_TRUE(KE::min(a, b) == 1);
+
+  a = 3;
+  b = 2;
+  EXPECT_TRUE(KE::min(a, b) == 2);
+
+  STATIC_ASSERT(KE::min(3.f, 2.f) == 2.f);
+  STATIC_ASSERT(KE::min(3.f, 2.f, ::Test::Greater<int>{}) == 3.f);
+
+  EXPECT_TRUE(KE::min({3.f, -1.f, 0.f}) == -1.f);
+
+  STATIC_ASSERT(KE::min({3, -1, 0}) == -1);
+  STATIC_ASSERT(KE::min({3, -1, 0}, ::Test::Greater<int>{}) == 3);
+
+  STATIC_ASSERT(KE::min({
+                            ::Test::PairIntCompareFirst{255, 0},
+                            ::Test::PairIntCompareFirst{255, 1},
+                            ::Test::PairIntCompareFirst{0, 2},
+                            ::Test::PairIntCompareFirst{0, 3},
+                            ::Test::PairIntCompareFirst{255, 4},
+                            ::Test::PairIntCompareFirst{0, 5},
+                        })
+                    .second == 2);  // leftmost element
+}
+
+template <class ViewType>
+struct StdAlgoMinMaxOpsTestMin {
+  ViewType m_view;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int& ind) const {
+    namespace KE = Kokkos::Experimental;
+    auto v1      = 10.;
+    if (KE::min(v1, m_view(ind)) == 0.) {
+      m_view(ind) = 8.;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  StdAlgoMinMaxOpsTestMin(ViewType aIn) : m_view(aIn) {}
+};
+
+TEST(TEST_CATEGORY, min_within_parfor) {
+  namespace KE = Kokkos::Experimental;
+  using view_t = Kokkos::View<double*>;
+  view_t a("a", 10);
+
+  StdAlgoMinMaxOpsTestMin<view_t> fnc(a);
+  Kokkos::parallel_for(a.extent(0), fnc);
+  auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
+  for (int i = 0; i < 10; ++i) {
+    EXPECT_DOUBLE_EQ(a_h(0), 8.);
+  }
+}
+
+// ----------------------------------------------------------
+// test minmax()
+// ----------------------------------------------------------
+TEST(TEST_CATEGORY, minmax) {
+  namespace KE  = Kokkos::Experimental;
+  int a         = 1;
+  int b         = 2;
+  const auto& r = KE::minmax(a, b);
+  EXPECT_TRUE(r.first == 1);
+  EXPECT_TRUE(r.second == 2);
+
+  a              = 3;
+  b              = 2;
+  const auto& r2 = KE::minmax(a, b);
+  EXPECT_TRUE(r2.first == 2);
+  EXPECT_TRUE(r2.second == 3);
+
+  STATIC_ASSERT((Kokkos::pair<float, float>(KE::minmax(3.f, 2.f)) ==
+                 Kokkos::make_pair(2.f, 3.f)));
+  STATIC_ASSERT(
+      (Kokkos::pair<float, float>(KE::minmax(
+           3.f, 2.f, ::Test::Greater<int>{})) == Kokkos::make_pair(3.f, 2.f)));
+
+  EXPECT_TRUE(KE::minmax({3.f, -1.f, 0.f}) == Kokkos::make_pair(-1.f, 3.f));
+
+  STATIC_ASSERT(KE::minmax({3, -1, 0}) == Kokkos::make_pair(-1, 3));
+  STATIC_ASSERT(KE::minmax({3, -1, 0}, ::Test::Greater<int>{}) ==
+                Kokkos::make_pair(3, -1));
+
+  STATIC_ASSERT(KE::minmax({
+                               ::Test::PairIntCompareFirst{255, 0},
+                               ::Test::PairIntCompareFirst{255, 1},
+                               ::Test::PairIntCompareFirst{0, 2},
+                               ::Test::PairIntCompareFirst{0, 3},
+                               ::Test::PairIntCompareFirst{255, 4},
+                               ::Test::PairIntCompareFirst{0, 5},
+                           })
+                    .first.second == 2);  // leftmost
+  STATIC_ASSERT(KE::minmax({
+                               ::Test::PairIntCompareFirst{255, 0},
+                               ::Test::PairIntCompareFirst{255, 1},
+                               ::Test::PairIntCompareFirst{0, 2},
+                               ::Test::PairIntCompareFirst{0, 3},
+                               ::Test::PairIntCompareFirst{255, 4},
+                               ::Test::PairIntCompareFirst{0, 5},
+                           })
+                    .second.second == 4);  // rightmost
+}
+
+template <class ViewType>
+struct StdAlgoMinMaxOpsTestMinMax {
+  ViewType m_view;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int& ind) const {
+    namespace KE  = Kokkos::Experimental;
+    auto v1       = 7.;
+    const auto& r = KE::minmax(v1, m_view(ind));
+    m_view(ind)   = (double)(r.first - r.second);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  StdAlgoMinMaxOpsTestMinMax(ViewType aIn) : m_view(aIn) {}
+};
+
+TEST(TEST_CATEGORY, minmax_within_parfor) {
+  namespace KE = Kokkos::Experimental;
+  using view_t = Kokkos::View<double*>;
+  view_t a("a", 10);
+
+  StdAlgoMinMaxOpsTestMinMax<view_t> fnc(a);
+  Kokkos::parallel_for(a.extent(0), fnc);
+  auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
+  for (int i = 0; i < 10; ++i) {
+    EXPECT_DOUBLE_EQ(a_h(0), -7.);
+  }
+}
+
+// ----------------------------------------------------------
+// test clamp()
+// ----------------------------------------------------------
+TEST(TEST_CATEGORY, clamp) {
+  namespace KE = Kokkos::Experimental;
+
+  int a         = 1;
+  int b         = 2;
+  int c         = 19;
+  const auto& r = KE::clamp(a, b, c);
+  EXPECT_TRUE(&r == &b);
+  EXPECT_TRUE(r == b);
+
+  a              = 5;
+  b              = -2;
+  c              = 3;
+  const auto& r2 = KE::clamp(a, b, c);
+  EXPECT_TRUE(&r2 == &c);
+  EXPECT_TRUE(r2 == c);
+
+  a              = 5;
+  b              = -2;
+  c              = 7;
+  const auto& r3 = KE::clamp(a, b, c);
+  EXPECT_TRUE(&r3 == &a);
+  EXPECT_TRUE(r3 == a);
+}
+
+template <class ViewType>
+struct StdAlgoMinMaxOpsTestClamp {
+  ViewType m_view;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int& ind) const {
+    namespace KE  = Kokkos::Experimental;
+    m_view(ind)   = 10.;
+    const auto b  = -2.;
+    const auto c  = 3.;
+    const auto& r = KE::clamp(m_view(ind), b, c);
+    m_view(ind)   = (double)(r);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  StdAlgoMinMaxOpsTestClamp(ViewType aIn) : m_view(aIn) {}
+};
+
+TEST(TEST_CATEGORY, clamp_within_parfor) {
+  namespace KE = Kokkos::Experimental;
+  using view_t = Kokkos::View<double*>;
+  view_t a("a", 10);
+
+  StdAlgoMinMaxOpsTestClamp<view_t> fnc(a);
+  Kokkos::parallel_for(a.extent(0), fnc);
+  auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
+  for (std::size_t i = 0; i < a.extent(0); ++i) {
+    EXPECT_DOUBLE_EQ(a_h(0), 3.);
+  }
+}
diff --git a/packages/kokkos/core/unit_test/TestNumericTraits.hpp b/packages/kokkos/core/unit_test/TestNumericTraits.hpp
index cb69cb83211e7b82f941e544b0498da6df737cf4..52989aa5dd0ebe49993f6a59e8bba6323c8228ac 100644
--- a/packages/kokkos/core/unit_test/TestNumericTraits.hpp
+++ b/packages/kokkos/core/unit_test/TestNumericTraits.hpp
@@ -81,6 +81,8 @@ struct FiniteMin { template <class T> using trait = Kokkos::Experimental::finite
 struct FiniteMax { template <class T> using trait = Kokkos::Experimental::finite_max<T>; };
 struct RoundError { template <class T> using trait = Kokkos::Experimental::round_error<T>; };
 struct NormMin { template <class T> using trait = Kokkos::Experimental::norm_min<T>; };
+struct DenormMin { template <class T> using trait = Kokkos::Experimental::denorm_min<T>; };
+struct ReciprocalOverflowThreshold { template <class T> using trait = Kokkos::Experimental::reciprocal_overflow_threshold<T>; };
 struct Digits { template <class T> using trait = Kokkos::Experimental::digits<T>; };
 struct Digits10 { template <class T> using trait = Kokkos::Experimental::digits10<T>; };
 struct MaxDigits10 { template <class T> using trait = Kokkos::Experimental::max_digits10<T>; };
@@ -89,6 +91,8 @@ struct MinExponent { template <class T> using trait = Kokkos::Experimental::min_
 struct MaxExponent { template <class T> using trait = Kokkos::Experimental::max_exponent<T>; };
 struct MinExponent10 { template <class T> using trait = Kokkos::Experimental::min_exponent10<T>; };
 struct MaxExponent10 { template <class T> using trait = Kokkos::Experimental::max_exponent10<T>; };
+struct QuietNaN { template <class T> using trait = Kokkos::Experimental::quiet_NaN<T>; };
+struct SignalingNaN { template <class T> using trait = Kokkos::Experimental::signaling_NaN<T>; };
 // clang-format on
 
 template <class T>
@@ -149,10 +153,23 @@ struct TestNumericTraits {
     use_on_device();
   }
 
+  KOKKOS_FUNCTION void operator()(ReciprocalOverflowThreshold, int,
+                                  int& e) const {
+    using Kokkos::Experimental::reciprocal_overflow_threshold;
+    auto const inv = 1 / reciprocal_overflow_threshold<T>::value;
+    if (inv + inv == inv && inv != 0) {
+      KOKKOS_IMPL_DO_NOT_USE_PRINTF(
+          "inverse of reciprocal overflow threshold is inf\n");
+      ++e;
+    }
+    use_on_device();
+  }
+
   // clang-format off
   KOKKOS_FUNCTION void operator()(FiniteMax, int, int&) const { use_on_device(); }
   KOKKOS_FUNCTION void operator()(RoundError, int, int&) const { use_on_device(); }
   KOKKOS_FUNCTION void operator()(NormMin, int, int&) const { use_on_device(); }
+  KOKKOS_FUNCTION void operator()(DenormMin, int, int&) const { use_on_device(); }
   KOKKOS_FUNCTION void operator()(Digits, int, int&) const { use_on_device(); }
   KOKKOS_FUNCTION void operator()(Digits10, int, int&) const { use_on_device(); }
   KOKKOS_FUNCTION void operator()(MaxDigits10, int, int&) const { use_on_device(); }
@@ -162,6 +179,30 @@ struct TestNumericTraits {
   KOKKOS_FUNCTION void operator()(MinExponent10, int, int&) const { use_on_device(); }
   KOKKOS_FUNCTION void operator()(MaxExponent10, int, int&) const { use_on_device(); }
   // clang-format on
+  KOKKOS_FUNCTION void operator()(QuietNaN, int, int& e) const {
+#ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC
+    using Kokkos::Experimental::quiet_NaN;
+    constexpr auto nan  = quiet_NaN<T>::value;
+    constexpr auto zero = T(0);
+    e += (int)!(nan != nan);
+    e += (int)!(nan != zero);
+#else
+    (void)e;
+#endif
+    use_on_device();
+  }
+  KOKKOS_FUNCTION void operator()(SignalingNaN, int, int& e) const {
+#ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC
+    using Kokkos::Experimental::signaling_NaN;
+    constexpr auto nan  = signaling_NaN<T>::value;
+    constexpr auto zero = T(0);
+    e += (int)!(nan != nan);
+    e += (int)!(nan != zero);
+#else
+    (void)e;
+#endif
+    use_on_device();
+  }
 
   KOKKOS_FUNCTION void use_on_device() const {
 #if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_ENABLE_OPENMPTARGET)
@@ -196,6 +237,11 @@ struct TestNumericTraits<
 };
 #endif
 
+#ifdef KOKKOS_COMPILER_NVHPC
+// warning: 'long double' is treated as 'double' in device code
+#pragma diag_suppress 20208
+#endif
+
 TEST(TEST_CATEGORY, numeric_traits_infinity) {
   TestNumericTraits<TEST_EXECSPACE, float, Infinity>();
   TestNumericTraits<TEST_EXECSPACE, double, Infinity>();
@@ -224,6 +270,18 @@ TEST(TEST_CATEGORY, numeric_traits_norm_min) {
   TestNumericTraits<TEST_EXECSPACE, long double, NormMin>();
 }
 
+TEST(TEST_CATEGORY, numeric_traits_denorm_min) {
+  TestNumericTraits<TEST_EXECSPACE, float, DenormMin>();
+  TestNumericTraits<TEST_EXECSPACE, double, DenormMin>();
+  TestNumericTraits<TEST_EXECSPACE, long double, DenormMin>();
+}
+
+TEST(TEST_CATEGORY, numeric_traits_reciprocal_overflow_threshold) {
+  TestNumericTraits<TEST_EXECSPACE, float, ReciprocalOverflowThreshold>();
+  TestNumericTraits<TEST_EXECSPACE, double, ReciprocalOverflowThreshold>();
+  TestNumericTraits<TEST_EXECSPACE, long double, ReciprocalOverflowThreshold>();
+}
+
 TEST(TEST_CATEGORY, numeric_traits_finite_min_max) {
   TestNumericTraits<TEST_EXECSPACE, char, FiniteMin>();
   TestNumericTraits<TEST_EXECSPACE, char, FiniteMax>();
@@ -338,6 +396,15 @@ TEST(TEST_CATEGORY, numeric_traits_min_max_exponent10) {
   TestNumericTraits<TEST_EXECSPACE, long double, MaxExponent10>();
 }
 
+TEST(TEST_CATEGORY, numeric_traits_quiet_and_signaling_nan) {
+  TestNumericTraits<TEST_EXECSPACE, float, QuietNaN>();
+  TestNumericTraits<TEST_EXECSPACE, float, SignalingNaN>();
+  TestNumericTraits<TEST_EXECSPACE, double, QuietNaN>();
+  TestNumericTraits<TEST_EXECSPACE, double, SignalingNaN>();
+  TestNumericTraits<TEST_EXECSPACE, long double, QuietNaN>();
+  TestNumericTraits<TEST_EXECSPACE, long double, SignalingNaN>();
+}
+
 namespace NumericTraitsSFINAE {
 
 struct HasNoSpecialization {};
@@ -355,6 +422,9 @@ CHECK_TRAIT_IS_SFINAE_FRIENDLY(finite_max)
 CHECK_TRAIT_IS_SFINAE_FRIENDLY(epsilon)
 CHECK_TRAIT_IS_SFINAE_FRIENDLY(round_error)
 CHECK_TRAIT_IS_SFINAE_FRIENDLY(norm_min)
+CHECK_TRAIT_IS_SFINAE_FRIENDLY(denorm_min)
+CHECK_TRAIT_IS_SFINAE_FRIENDLY(quiet_NaN)
+CHECK_TRAIT_IS_SFINAE_FRIENDLY(signaling_NaN)
 
 CHECK_TRAIT_IS_SFINAE_FRIENDLY(digits)
 CHECK_TRAIT_IS_SFINAE_FRIENDLY(digits10)
@@ -417,6 +487,11 @@ CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(long double, epsilon);
 CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(float, round_error);
 CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(double, round_error);
 CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(long double, round_error);
+CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(float, denorm_min);
+CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(double, denorm_min);
+CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(long double, denorm_min);
+// NOTE reciprocal_overflow_threshold purposefully omitted since it does not
+// exist in std::numeric_limits
 // clang-format off
 static_assert(Kokkos::Experimental::norm_min<float      >::value == std::numeric_limits<      float>::min(), "");
 static_assert(Kokkos::Experimental::norm_min<double     >::value == std::numeric_limits<     double>::min(), "");
@@ -516,3 +591,108 @@ CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_CONSTANT(long double, max_exponent10);
 
 #undef CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION
 #undef CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_CONSTANT
+
+#define CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(T, TRAIT)             \
+  static_assert(Kokkos::Experimental::TRAIT<T>::value !=                       \
+                    Kokkos::Experimental::TRAIT<T>::value,                     \
+                "");                                                           \
+  static_assert(                                                               \
+      std::numeric_limits<T>::TRAIT() != std::numeric_limits<T>::TRAIT(), ""); \
+  static_assert(Kokkos::Experimental::TRAIT<T>::value !=                       \
+                    std::numeric_limits<T>::TRAIT(),                           \
+                "")
+
+// Workaround compiler issue error: expression must have a constant value
+// See kokkos/kokkos#4574
+#ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC
+CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(float, quiet_NaN);
+CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(double, quiet_NaN);
+CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(long double, quiet_NaN);
+CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(float, signaling_NaN);
+CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(double, signaling_NaN);
+CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(long double, signaling_NaN);
+#endif
+
+#undef CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION
+
+#define CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(T, TRAIT)              \
+  static_assert(Kokkos::Experimental::TRAIT<T const>::value ==          \
+                    Kokkos::Experimental::TRAIT<T>::value,              \
+                "");                                                    \
+  static_assert(Kokkos::Experimental::TRAIT<T volatile>::value ==       \
+                    Kokkos::Experimental::TRAIT<T>::value,              \
+                "");                                                    \
+  static_assert(Kokkos::Experimental::TRAIT<T const volatile>::value == \
+                    Kokkos::Experimental::TRAIT<T>::value,              \
+                "")
+
+#define CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(TRAIT) \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(float, TRAIT);              \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(double, TRAIT);             \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(long double, TRAIT)
+
+#define CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_INTEGRAL(TRAIT)      \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(bool, TRAIT);              \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(char, TRAIT);              \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(signed char, TRAIT);       \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(unsigned char, TRAIT);     \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(short, TRAIT);             \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(unsigned short, TRAIT);    \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(int, TRAIT);               \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(unsigned int, TRAIT);      \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(long int, TRAIT);          \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(unsigned long int, TRAIT); \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(long long int, TRAIT);     \
+  CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES(unsigned long long int, TRAIT)
+
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(infinity);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(finite_min);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_INTEGRAL(finite_min);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(finite_max);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_INTEGRAL(finite_max);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(epsilon);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(round_error);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(norm_min);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(
+    reciprocal_overflow_threshold);
+
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(digits);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_INTEGRAL(digits);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(digits10);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_INTEGRAL(digits10);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(max_digits10);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(radix);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_INTEGRAL(radix);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(min_exponent);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(min_exponent10);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(max_exponent);
+CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(max_exponent10);
+
+#undef CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_INTEGRAL
+#undef CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT
+#undef CHECK_INSTANTIATED_ON_CV_QUALIFIED_TYPES
+
+#define CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES(T, TRAIT)          \
+  static_assert(Kokkos::Experimental::TRAIT<T>::value !=                \
+                    Kokkos::Experimental::TRAIT<T>::value,              \
+                "");                                                    \
+  static_assert(Kokkos::Experimental::TRAIT<T const>::value !=          \
+                    Kokkos::Experimental::TRAIT<T>::value,              \
+                "");                                                    \
+  static_assert(Kokkos::Experimental::TRAIT<T volatile>::value !=       \
+                    Kokkos::Experimental::TRAIT<T>::value,              \
+                "");                                                    \
+  static_assert(Kokkos::Experimental::TRAIT<T const volatile>::value != \
+                    Kokkos::Experimental::TRAIT<T>::value,              \
+                "")
+
+#define CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(TRAIT) \
+  CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES(float, TRAIT);              \
+  CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES(double, TRAIT);             \
+  CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES(long double, TRAIT)
+
+CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(quiet_NaN);
+CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(signaling_NaN);
+
+#undef CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT
+#undef CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES
diff --git a/packages/kokkos/core/unit_test/TestOther.hpp b/packages/kokkos/core/unit_test/TestOther.hpp
index 2d298d30784160ec4b65652864003985e2d488c4..c068d250cdb61435cf774fdc3eab19957b521a5e 100644
--- a/packages/kokkos/core/unit_test/TestOther.hpp
+++ b/packages/kokkos/core/unit_test/TestOther.hpp
@@ -50,5 +50,8 @@
 #include <TestCXX11.hpp>
 
 #include <TestViewCtorPropEmbeddedDim.hpp>
+// with VS 16.11.3 and CUDA 11.4.2 getting cudafe stackoverflow crash
+#if !(defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA))
 #include <TestViewLayoutTiled.hpp>
 #endif
+#endif
diff --git a/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp b/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp
index d75d78b31f08f3d6234a174053630abace0d781a..26eb22670134c96b1ace40e15777027fa9a7262b 100644
--- a/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp
+++ b/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp
@@ -288,37 +288,42 @@ class TestRangePolicyConstruction {
     }
   }
   void test_runtime_parameters() {
-    using policy_t = Kokkos::RangePolicy<>;
+    using policy_t     = Kokkos::RangePolicy<>;
+    using index_t      = policy_t::index_type;
+    index_t work_begin = 5;
+    index_t work_end   = 15;
+    index_t chunk_size = 10;
     {
-      policy_t p(5, 15);
-      ASSERT_EQ(p.begin(), 5);
-      ASSERT_EQ(p.end(), 15);
+      policy_t p(work_begin, work_end);
+      ASSERT_EQ(p.begin(), work_begin);
+      ASSERT_EQ(p.end(), work_end);
     }
     {
-      policy_t p(Kokkos::DefaultExecutionSpace(), 5, 15);
-      ASSERT_EQ(p.begin(), 5);
-      ASSERT_EQ(p.end(), 15);
+      policy_t p(Kokkos::DefaultExecutionSpace(), work_begin, work_end);
+      ASSERT_EQ(p.begin(), work_begin);
+      ASSERT_EQ(p.end(), work_end);
     }
     {
-      policy_t p(5, 15, Kokkos::ChunkSize(10));
-      ASSERT_EQ(p.begin(), 5);
-      ASSERT_EQ(p.end(), 15);
-      ASSERT_EQ(p.chunk_size(), 10);
+      policy_t p(work_begin, work_end, Kokkos::ChunkSize(chunk_size));
+      ASSERT_EQ(p.begin(), work_begin);
+      ASSERT_EQ(p.end(), work_end);
+      ASSERT_EQ(p.chunk_size(), chunk_size);
     }
     {
-      policy_t p(Kokkos::DefaultExecutionSpace(), 5, 15, Kokkos::ChunkSize(10));
-      ASSERT_EQ(p.begin(), 5);
-      ASSERT_EQ(p.end(), 15);
-      ASSERT_EQ(p.chunk_size(), 10);
+      policy_t p(Kokkos::DefaultExecutionSpace(), work_begin, work_end,
+                 Kokkos::ChunkSize(chunk_size));
+      ASSERT_EQ(p.begin(), work_begin);
+      ASSERT_EQ(p.end(), work_end);
+      ASSERT_EQ(p.chunk_size(), chunk_size);
     }
     {
       policy_t p;
-      ASSERT_EQ(p.begin(), 0);
-      ASSERT_EQ(p.end(), 0);
-      p = policy_t(5, 15, Kokkos::ChunkSize(10));
-      ASSERT_EQ(p.begin(), 5);
-      ASSERT_EQ(p.end(), 15);
-      ASSERT_EQ(p.chunk_size(), 10);
+      ASSERT_EQ(p.begin(), index_t(0));
+      ASSERT_EQ(p.end(), index_t(0));
+      p = policy_t(work_begin, work_end, Kokkos::ChunkSize(chunk_size));
+      ASSERT_EQ(p.begin(), work_begin);
+      ASSERT_EQ(p.end(), work_end);
+      ASSERT_EQ(p.chunk_size(), chunk_size);
     }
   }
 };
@@ -580,88 +585,85 @@ class TestTeamPolicyConstruction {
     policy_t p1(league_size, team_size);
     ASSERT_EQ(p1.league_size(), league_size);
     ASSERT_EQ(p1.team_size(), team_size);
-// FIXME_SYCL implement chunk_size
-#ifndef KOKKOS_ENABLE_SYCL
     ASSERT_GT(p1.chunk_size(), 0);
-#endif
-    ASSERT_EQ(p1.scratch_size(0), 0);
+    ASSERT_EQ(size_t(p1.scratch_size(0)), 0u);
 
     policy_t p2 = p1.set_chunk_size(chunk_size);
     ASSERT_EQ(p1.league_size(), league_size);
     ASSERT_EQ(p1.team_size(), team_size);
     ASSERT_EQ(p1.chunk_size(), chunk_size);
-    ASSERT_EQ(p1.scratch_size(0), 0);
+    ASSERT_EQ(size_t(p1.scratch_size(0)), 0u);
 
     ASSERT_EQ(p2.league_size(), league_size);
     ASSERT_EQ(p2.team_size(), team_size);
     ASSERT_EQ(p2.chunk_size(), chunk_size);
-    ASSERT_EQ(p2.scratch_size(0), 0);
+    ASSERT_EQ(size_t(p2.scratch_size(0)), 0u);
 
     policy_t p3 = p2.set_scratch_size(0, Kokkos::PerTeam(per_team_scratch));
     ASSERT_EQ(p2.league_size(), league_size);
     ASSERT_EQ(p2.team_size(), team_size);
     ASSERT_EQ(p2.chunk_size(), chunk_size);
-    ASSERT_EQ(p2.scratch_size(0), per_team_scratch);
+    ASSERT_EQ(size_t(p2.scratch_size(0)), size_t(per_team_scratch));
     ASSERT_EQ(p3.league_size(), league_size);
     ASSERT_EQ(p3.team_size(), team_size);
     ASSERT_EQ(p3.chunk_size(), chunk_size);
-    ASSERT_EQ(p3.scratch_size(0), per_team_scratch);
+    ASSERT_EQ(size_t(p3.scratch_size(0)), size_t(per_team_scratch));
 
     policy_t p4 = p2.set_scratch_size(0, Kokkos::PerThread(per_thread_scratch));
     ASSERT_EQ(p2.league_size(), league_size);
     ASSERT_EQ(p2.team_size(), team_size);
     ASSERT_EQ(p2.chunk_size(), chunk_size);
-    ASSERT_EQ(p2.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p2.scratch_size(0)), size_t(scratch_size));
     ASSERT_EQ(p4.league_size(), league_size);
     ASSERT_EQ(p4.team_size(), team_size);
     ASSERT_EQ(p4.chunk_size(), chunk_size);
-    ASSERT_EQ(p4.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p4.scratch_size(0)), size_t(scratch_size));
 
     policy_t p5 = p2.set_scratch_size(0, Kokkos::PerThread(per_thread_scratch),
                                       Kokkos::PerTeam(per_team_scratch));
     ASSERT_EQ(p2.league_size(), league_size);
     ASSERT_EQ(p2.team_size(), team_size);
     ASSERT_EQ(p2.chunk_size(), chunk_size);
-    ASSERT_EQ(p2.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p2.scratch_size(0)), size_t(scratch_size));
     ASSERT_EQ(p5.league_size(), league_size);
     ASSERT_EQ(p5.team_size(), team_size);
     ASSERT_EQ(p5.chunk_size(), chunk_size);
-    ASSERT_EQ(p5.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p5.scratch_size(0)), size_t(scratch_size));
 
     policy_t p6 = p2.set_scratch_size(0, Kokkos::PerTeam(per_team_scratch),
                                       Kokkos::PerThread(per_thread_scratch));
     ASSERT_EQ(p2.league_size(), league_size);
     ASSERT_EQ(p2.team_size(), team_size);
     ASSERT_EQ(p2.chunk_size(), chunk_size);
-    ASSERT_EQ(p2.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p2.scratch_size(0)), size_t(scratch_size));
     ASSERT_EQ(p6.league_size(), league_size);
     ASSERT_EQ(p6.team_size(), team_size);
     ASSERT_EQ(p6.chunk_size(), chunk_size);
-    ASSERT_EQ(p6.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p6.scratch_size(0)), size_t(scratch_size));
 
     policy_t p7 = p3.set_scratch_size(0, Kokkos::PerTeam(per_team_scratch),
                                       Kokkos::PerThread(per_thread_scratch));
     ASSERT_EQ(p3.league_size(), league_size);
     ASSERT_EQ(p3.team_size(), team_size);
     ASSERT_EQ(p3.chunk_size(), chunk_size);
-    ASSERT_EQ(p3.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p3.scratch_size(0)), size_t(scratch_size));
     ASSERT_EQ(p7.league_size(), league_size);
     ASSERT_EQ(p7.team_size(), team_size);
     ASSERT_EQ(p7.chunk_size(), chunk_size);
-    ASSERT_EQ(p7.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p7.scratch_size(0)), size_t(scratch_size));
 
     policy_t p8;  // default constructed
     ASSERT_EQ(p8.league_size(), 0);
-    ASSERT_EQ(p8.scratch_size(0), 0);
+    ASSERT_EQ(size_t(p8.scratch_size(0)), 0u);
     p8 = p3;  // call assignment operator
     ASSERT_EQ(p3.league_size(), league_size);
     ASSERT_EQ(p3.team_size(), team_size);
     ASSERT_EQ(p3.chunk_size(), chunk_size);
-    ASSERT_EQ(p3.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p3.scratch_size(0)), size_t(scratch_size));
     ASSERT_EQ(p8.league_size(), league_size);
     ASSERT_EQ(p8.team_size(), team_size);
     ASSERT_EQ(p8.chunk_size(), chunk_size);
-    ASSERT_EQ(p8.scratch_size(0), scratch_size);
+    ASSERT_EQ(size_t(p8.scratch_size(0)), size_t(scratch_size));
   }
 
   void test_run_time_parameters() {
@@ -716,6 +718,7 @@ TEST(TEST_CATEGORY, policy_converting_constructor_from_other_policy) {
       Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>{});
 }
 
+#ifndef KOKKOS_COMPILER_NVHPC       // FIXME_NVHPC
 #ifndef KOKKOS_ENABLE_OPENMPTARGET  // FIXME_OPENMPTARGET
 TEST(TEST_CATEGORY_DEATH, policy_bounds_unsafe_narrowing_conversions) {
   using Policy = Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>,
@@ -729,6 +732,7 @@ TEST(TEST_CATEGORY_DEATH, policy_bounds_unsafe_narrowing_conversions) {
       "unsafe narrowing conversion");
 }
 #endif
+#endif
 
 template <class Policy>
 void test_prefer_desired_occupancy(Policy const& policy) {
@@ -790,6 +794,8 @@ struct static_assert_dummy_policy_must_be_size_of_desired_occupancy<
     sizeof(Kokkos::Experimental::DesiredOccupancy),
     sizeof(Kokkos::Experimental::DesiredOccupancy)> {};
 
+// EBO failure with VS 16.11.3 and CUDA 11.4.2
+#if !(defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA))
 TEST(TEST_CATEGORY, desired_occupancy_empty_base_optimization) {
   DummyPolicy<TEST_EXECSPACE> const policy{};
   static_assert(sizeof(decltype(policy)) == 1, "");
@@ -807,6 +813,7 @@ TEST(TEST_CATEGORY, desired_occupancy_empty_base_optimization) {
       _assert2{};
   (void)&_assert2;  // avoid unused variable warning
 }
+#endif
 
 template <typename Policy>
 void test_desired_occupancy_converting_constructors(Policy const& policy) {
diff --git a/packages/kokkos/core/unit_test/TestRealloc.hpp b/packages/kokkos/core/unit_test/TestRealloc.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2b3e1ac3738223f1eaaa03d79a7ed757ee064d5a
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestRealloc.hpp
@@ -0,0 +1,164 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef TESTREALLOC_HPP_
+#define TESTREALLOC_HPP_
+
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+
+namespace TestViewRealloc {
+
+struct Default {};
+struct WithoutInitializing {};
+
+template <typename View, typename... Args>
+inline void realloc_dispatch(Default, View& v, Args&&... args) {
+  Kokkos::realloc(v, std::forward<Args>(args)...);
+}
+
+template <typename View, typename... Args>
+inline void realloc_dispatch(WithoutInitializing, View& v, Args&&... args) {
+  Kokkos::realloc(Kokkos::WithoutInitializing, v, std::forward<Args>(args)...);
+}
+
+template <class DeviceType, class Tag = Default>
+void impl_testRealloc() {
+  const size_t sizes[8] = {2, 3, 4, 5, 6, 7, 8, 9};
+
+  // Check #904 fix (no reallocation if dimensions didn't change).
+  {
+    using view_type = Kokkos::View<int*, DeviceType>;
+    view_type view_1d("view_1d", sizes[0]);
+    const int* oldPointer = view_1d.data();
+    EXPECT_TRUE(oldPointer != nullptr);
+    realloc_dispatch(Tag{}, view_1d, sizes[0]);
+    const int* newPointer = view_1d.data();
+    EXPECT_TRUE(oldPointer == newPointer);
+  }
+  {
+    using view_type = Kokkos::View<int**, DeviceType>;
+    view_type view_2d("view_2d", sizes[0], sizes[1]);
+    const int* oldPointer = view_2d.data();
+    EXPECT_TRUE(oldPointer != nullptr);
+    realloc_dispatch(Tag{}, view_2d, sizes[0], sizes[1]);
+    const int* newPointer = view_2d.data();
+    EXPECT_TRUE(oldPointer == newPointer);
+  }
+  {
+    using view_type = Kokkos::View<int***, DeviceType>;
+    view_type view_3d("view_3d", sizes[0], sizes[1], sizes[2]);
+    const int* oldPointer = view_3d.data();
+    EXPECT_TRUE(oldPointer != nullptr);
+    realloc_dispatch(Tag{}, view_3d, sizes[0], sizes[1], sizes[2]);
+    const int* newPointer = view_3d.data();
+    EXPECT_TRUE(oldPointer == newPointer);
+  }
+  {
+    using view_type = Kokkos::View<int****, DeviceType>;
+    view_type view_4d("view_4d", sizes[0], sizes[1], sizes[2], sizes[3]);
+    const int* oldPointer = view_4d.data();
+    EXPECT_TRUE(oldPointer != nullptr);
+    realloc_dispatch(Tag{}, view_4d, sizes[0], sizes[1], sizes[2], sizes[3]);
+    const int* newPointer = view_4d.data();
+    EXPECT_TRUE(oldPointer == newPointer);
+  }
+  {
+    using view_type = Kokkos::View<int*****, DeviceType>;
+    view_type view_5d("view_5d", sizes[0], sizes[1], sizes[2], sizes[3],
+                      sizes[4]);
+    const int* oldPointer = view_5d.data();
+    EXPECT_TRUE(oldPointer != nullptr);
+    realloc_dispatch(Tag{}, view_5d, sizes[0], sizes[1], sizes[2], sizes[3],
+                     sizes[4]);
+    const int* newPointer = view_5d.data();
+    EXPECT_TRUE(oldPointer == newPointer);
+  }
+  {
+    using view_type = Kokkos::View<int******, DeviceType>;
+    view_type view_6d("view_6d", sizes[0], sizes[1], sizes[2], sizes[3],
+                      sizes[4], sizes[5]);
+    const int* oldPointer = view_6d.data();
+    EXPECT_TRUE(oldPointer != nullptr);
+    realloc_dispatch(Tag{}, view_6d, sizes[0], sizes[1], sizes[2], sizes[3],
+                     sizes[4], sizes[5]);
+    const int* newPointer = view_6d.data();
+    EXPECT_TRUE(oldPointer == newPointer);
+  }
+  {
+    using view_type = Kokkos::View<int*******, DeviceType>;
+    view_type view_7d("view_7d", sizes[0], sizes[1], sizes[2], sizes[3],
+                      sizes[4], sizes[5], sizes[6]);
+    const int* oldPointer = view_7d.data();
+    EXPECT_TRUE(oldPointer != nullptr);
+    realloc_dispatch(Tag{}, view_7d, sizes[0], sizes[1], sizes[2], sizes[3],
+                     sizes[4], sizes[5], sizes[6]);
+    const int* newPointer = view_7d.data();
+    EXPECT_TRUE(oldPointer == newPointer);
+  }
+  {
+    using view_type = Kokkos::View<int********, DeviceType>;
+    view_type view_8d("view_8d", sizes[0], sizes[1], sizes[2], sizes[3],
+                      sizes[4], sizes[5], sizes[6], sizes[7]);
+    const int* oldPointer = view_8d.data();
+    EXPECT_TRUE(oldPointer != nullptr);
+    realloc_dispatch(Tag{}, view_8d, sizes[0], sizes[1], sizes[2], sizes[3],
+                     sizes[4], sizes[5], sizes[6], sizes[7]);
+    const int* newPointer = view_8d.data();
+    EXPECT_TRUE(oldPointer == newPointer);
+  }
+}
+
+template <class DeviceType>
+void testRealloc() {
+  {
+    impl_testRealloc<DeviceType>();  // with data initialization
+  }
+  {
+    impl_testRealloc<DeviceType,
+                     WithoutInitializing>();  // without data initialization
+  }
+}
+
+}  // namespace TestViewRealloc
+#endif  // TESTREALLOC_HPP_
diff --git a/packages/kokkos/core/unit_test/TestReduce.hpp b/packages/kokkos/core/unit_test/TestReduce.hpp
index 81e063f83e3ae4fba46f525756c262cb851d2068..161b21615febdd9064c94482429ddd5495514952 100644
--- a/packages/kokkos/core/unit_test/TestReduce.hpp
+++ b/packages/kokkos/core/unit_test/TestReduce.hpp
@@ -520,6 +520,7 @@ class TestReduceDynamicView {
       // Test result to host pointer:
 
       std::string str("TestKernelReduce");
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
       if (count % 2 == 0) {
         Kokkos::parallel_reduce(nw, functor_type(nw, count),
                                 host_result.data());
@@ -528,6 +529,20 @@ class TestReduceDynamicView {
                                 host_result.data());
       }
 
+      for (unsigned j = 0; j < count; ++j) {
+        const uint64_t correct = 0 == j % 3 ? nw : nsum;
+        ASSERT_EQ(host_result(j), (ScalarType)correct);
+        host_result(j) = 0;
+      }
+#endif
+
+      if (count % 2 == 0) {
+        Kokkos::parallel_reduce(nw, functor_type(nw, count), host_result);
+      } else {
+        Kokkos::parallel_reduce(str, nw, functor_type(nw, count), host_result);
+      }
+      Kokkos::fence("Fence before accessing result on the host");
+
       for (unsigned j = 0; j < count; ++j) {
         const uint64_t correct = 0 == j % 3 ? nw : nsum;
         ASSERT_EQ(host_result(j), (ScalarType)correct);
@@ -539,10 +554,12 @@ class TestReduceDynamicView {
 
 }  // namespace
 
+// FIXME_SYCL
 // FIXME_OPENMPTARGET : The feature works with LLVM/13 on NVIDIA
 // architectures. The jenkins currently tests with LLVM/12.
-#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_COMPILER_CLANG) && \
-    (KOKKOS_COMPILER_CLANG >= 1300)
+#if !defined(KOKKOS_ENABLE_SYCL) &&          \
+    (!defined(KOKKOS_ENABLE_OPENMPTARGET) || \
+     defined(KOKKOS_COMPILER_CLANG) && (KOKKOS_COMPILER_CLANG >= 1300))
 TEST(TEST_CATEGORY, int64_t_reduce) {
   TestReduce<int64_t, TEST_EXECSPACE>(0);
   TestReduce<int64_t, TEST_EXECSPACE>(1000000);
@@ -585,9 +602,9 @@ TEST(TEST_CATEGORY, int_combined_reduce) {
                           Kokkos::RangePolicy<TEST_EXECSPACE>(0, nw),
                           functor_type(nw), result1, result2, result3);
 
-  ASSERT_EQ(nw, result1);
-  ASSERT_EQ(nsum, result2);
-  ASSERT_EQ(nsum, result3);
+  ASSERT_EQ(nw, uint64_t(result1));
+  ASSERT_EQ(nsum, uint64_t(result2));
+  ASSERT_EQ(nsum, uint64_t(result3));
 }
 
 TEST(TEST_CATEGORY, mdrange_combined_reduce) {
@@ -606,9 +623,9 @@ TEST(TEST_CATEGORY, mdrange_combined_reduce) {
                                                              {{nw, 1, 1}}),
       functor_type(nw), result1, result2, result3);
 
-  ASSERT_EQ(nw, result1);
-  ASSERT_EQ(nsum, result2);
-  ASSERT_EQ(nsum, result3);
+  ASSERT_EQ(nw, uint64_t(result1));
+  ASSERT_EQ(nsum, uint64_t(result2));
+  ASSERT_EQ(nsum, uint64_t(result3));
 }
 
 TEST(TEST_CATEGORY, int_combined_reduce_mixed) {
@@ -629,9 +646,9 @@ TEST(TEST_CATEGORY, int_combined_reduce_mixed) {
                           functor_type(nw), result1_v, result2,
                           Kokkos::Sum<int64_t, Kokkos::HostSpace>{result3_v});
 
-  ASSERT_EQ(nw, result1_v());
-  ASSERT_EQ(nsum, result2);
-  ASSERT_EQ(nsum, result3_v());
+  ASSERT_EQ(int64_t(nw), result1_v());
+  ASSERT_EQ(int64_t(nsum), result2);
+  ASSERT_EQ(int64_t(nsum), result3_v());
 }
 #endif
 }  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestReducers.hpp b/packages/kokkos/core/unit_test/TestReducers.hpp
index 0d5f7fe7ba538524e0119c950f01469c7aa48a83..7584227945e8f8fc35c398c05507351de4deab66 100644
--- a/packages/kokkos/core/unit_test/TestReducers.hpp
+++ b/packages/kokkos/core/unit_test/TestReducers.hpp
@@ -314,10 +314,7 @@ struct TestReducers {
 
       Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, 0), f,
                               reducer_scalar);
-// Zero length reduction not yet supported
-#ifndef KOKKOS_ENABLE_OPENMPTARGET
       ASSERT_EQ(sum_scalar, init);
-#endif
 
       Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, N), f,
                               reducer_scalar);
@@ -340,10 +337,7 @@ struct TestReducers {
                               reducer_view);
       Kokkos::fence();
       Scalar sum_view_scalar = sum_view();
-// Zero length reduction not yet supported
-#ifndef KOKKOS_ENABLE_OPENMPTARGET
       ASSERT_EQ(sum_view_scalar, init);
-#endif
 
       Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, N), f,
                               reducer_view);
@@ -355,8 +349,6 @@ struct TestReducers {
       ASSERT_EQ(sum_view_view, reference_sum);
     }
 
-    // Reduction to device view not yet supported
-#ifndef KOKKOS_ENABLE_OPENMPTARGET
     {
       Kokkos::View<Scalar, typename ExecSpace::memory_space> sum_view("View");
       Kokkos::deep_copy(sum_view, Scalar(1));
@@ -375,7 +367,6 @@ struct TestReducers {
       Kokkos::deep_copy(sum_view_scalar, sum_view);
       ASSERT_EQ(sum_view_scalar, reference_sum);
     }
-#endif
   }
 
   static void test_prod(int N) {
@@ -400,10 +391,7 @@ struct TestReducers {
       Kokkos::Prod<Scalar> reducer_scalar(prod_scalar);
       Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, 0), f,
                               reducer_scalar);
-// Zero length reduction not yet supported
-#ifndef KOKKOS_ENABLE_OPENMPTARGET
       ASSERT_EQ(prod_scalar, init);
-#endif
 
       Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, N), f,
                               reducer_scalar);
@@ -426,10 +414,7 @@ struct TestReducers {
                               reducer_view);
       Kokkos::fence();
       Scalar prod_view_scalar = prod_view();
-// Zero length reduction not yet supported
-#ifndef KOKKOS_ENABLE_OPENMPTARGET
       ASSERT_EQ(prod_view_scalar, init);
-#endif
 
       Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, N), f,
                               reducer_view);
@@ -441,8 +426,6 @@ struct TestReducers {
       ASSERT_EQ(prod_view_view, reference_prod);
     }
 
-    // Reduction to device view not yet supported
-#ifndef KOKKOS_ENABLE_OPENMPTARGET
     {
       Kokkos::View<Scalar, typename ExecSpace::memory_space> prod_view("View");
       Kokkos::deep_copy(prod_view, Scalar(0));
@@ -461,7 +444,6 @@ struct TestReducers {
       Kokkos::deep_copy(prod_view_scalar, prod_view);
       ASSERT_EQ(prod_view_scalar, reference_prod);
     }
-#endif
   }
 
   static void test_min(int N) {
@@ -1016,10 +998,10 @@ struct TestReducers {
     test_minloc(10003);
     test_max(10007);
     test_maxloc(10007);
-    // FIXME_OPENMPTARGET - The minmaxloc test fails in the Release and
-    // RelWithDebInfo builds for the OPENMPTARGET backend but passes in Debug
-    // mode.
-#if !defined(KOKKOS_ENABLE_OPENMPTARGET)
+#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_COMPILER_CLANG) && \
+    (KOKKOS_COMPILER_CLANG < 1300)
+    // FIXME_OPENMPTARGET - The minmaxloc test fails llvm <= 13 version.
+#else
     test_minmaxloc(10007);
 #endif
   }
@@ -1034,10 +1016,10 @@ struct TestReducers {
     test_minloc(10003);
     test_max(10007);
     test_maxloc(10007);
-    // FIXME_OPENMPTARGET - The minmaxloc test fails in the Release and
-    // RelWithDebInfo builds for the OPENMPTARGET backend but passes in Debug
-    // mode.
-#if !defined(KOKKOS_ENABLE_OPENMPTARGET)
+#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_COMPILER_CLANG) && \
+    (KOKKOS_COMPILER_CLANG < 1300)
+    // FIXME_OPENMPTARGET - The minmaxloc test fails llvm <= 13 version.
+#else
     test_minmaxloc(10007);
 #endif
     test_BAnd(35);
@@ -1050,6 +1032,11 @@ struct TestReducers {
     test_sum(10001);
     test_prod(35);
   }
+
+  static void execute_bool() {
+    test_LAnd(10001);
+    test_LOr(35);
+  }
 };
 
 }  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestReducers_d.hpp b/packages/kokkos/core/unit_test/TestReducers_d.hpp
index 2d5802cdd4fcde24e8ac1dfe0f8d42ba9eaf396b..67f30e6cf238a8c3362012e110bd716cde1820fe 100644
--- a/packages/kokkos/core/unit_test/TestReducers_d.hpp
+++ b/packages/kokkos/core/unit_test/TestReducers_d.hpp
@@ -79,6 +79,52 @@ TEST(TEST_CATEGORY, reducers_half_t) {
   TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(25);
 }
 
+// TODO: File a bug report for this?
+// This fails on the CUDA-11.0-NVCC-C++17-RDC CI check.
+// TEST(TEST_CATEGORY, openmp_cuda11_reduction_bug_with_bhalf_t) {
+//  using ThisTestType = Kokkos::Experimental::bhalf_t;
+//  TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(50);
+//  TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(51);
+//  // For some reason commenting out reductions of 52,53,54,55 causes
+//  // the reduction of 56 to fail on OpenMP with Cuda/11.0
+//  //TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(52);
+//  //TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(53);
+//  //TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(54);
+//  //TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(55);
+//  TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(56);
+//}
+
+TEST(TEST_CATEGORY, reducers_bhalf_t) {
+#if defined(KOKKOS_ENABLE_OPENMP)
+  if (!std::is_same<TEST_EXECSPACE, Kokkos::OpenMP>::value)
+#else
+  if (true)
+#endif  // ENABLE_OPENMP
+  {
+    using ThisTestType = Kokkos::Experimental::bhalf_t;
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(2);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(50);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(51);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(52);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(53);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(54);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(55);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(56);
+    // TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(57);
+    // This could be 57 on device but there seems to be a loss of precision when
+    // running on OpenMP with Cuda/11.0
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(5);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(10);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(15);
+#if (CUDA_VERSION < 11000)
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(20);
+    TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(21);
+#endif
+  } else {
+    GTEST_SKIP();
+  }
+}
+
 TEST(TEST_CATEGORY, reducers_int8_t) {
   using ThisTestType = int8_t;
 
diff --git a/packages/kokkos/core/unit_test/TestReducers_e.hpp b/packages/kokkos/core/unit_test/TestReducers_e.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8a01e350f7cf6ae8531bb2d3a2570c85222e969
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestReducers_e.hpp
@@ -0,0 +1,52 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestReducers.hpp>
+
+namespace Test {
+TEST(TEST_CATEGORY, reducers_bool) {
+  TestReducers<bool, TEST_EXECSPACE>::execute_bool();
+}
+
+}  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestReductions_DeviceView.hpp b/packages/kokkos/core/unit_test/TestReductions_DeviceView.hpp
index 6ffa11b11ca2d639bd9fd930a733d41ae7950482..c1d6c5cec10596fd1b355f6588e4987cd1a96ca7 100644
--- a/packages/kokkos/core/unit_test/TestReductions_DeviceView.hpp
+++ b/packages/kokkos/core/unit_test/TestReductions_DeviceView.hpp
@@ -54,6 +54,11 @@ void test_reduce_device_view(int64_t N, PolicyType policy,
   Kokkos::deep_copy(reducer_result, result);
   Kokkos::deep_copy(result, 0);
   ASSERT_EQ(N, reducer_result);
+
+  // We need a warm-up to get reasonable results
+  Kokkos::parallel_reduce("Test::ReduceDeviceView::TestView", policy, functor,
+                          result);
+  Kokkos::fence();
   timer.reset();
 
   // Test View
diff --git a/packages/kokkos/core/unit_test/TestSharedAlloc.hpp b/packages/kokkos/core/unit_test/TestSharedAlloc.hpp
index b5eb77dc2a964fe1066048b2edfac61d531b4fab..46534eeb13db2f4816d352d2ef302298b97d8486 100644
--- a/packages/kokkos/core/unit_test/TestSharedAlloc.hpp
+++ b/packages/kokkos/core/unit_test/TestSharedAlloc.hpp
@@ -65,7 +65,6 @@ struct SharedAllocDestroy {
 
 template <class MemorySpace, class ExecutionSpace>
 void test_shared_alloc() {
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
   using Header     = const Kokkos::Impl::SharedAllocationHeader;
   using Tracker    = Kokkos::Impl::SharedAllocationTracker;
   using RecordBase = Kokkos::Impl::SharedAllocationRecord<void, void>;
@@ -91,16 +90,16 @@ void test_shared_alloc() {
 
   {
     // Since always executed on host space, leave [=]
-    Kokkos::parallel_for(range, [=](size_t i) {
+    Kokkos::parallel_for(range, [=](int i) {
       char name[64];
-      sprintf(name, "test_%.2d", int(i));
+      sprintf(name, "test_%.2d", i);
 
       r[i] = RecordMemS::allocate(s, name, size * (i + 1));
       h[i] = Header::get_header(r[i]->data());
 
       ASSERT_EQ(r[i]->use_count(), 0);
 
-      for (size_t j = 0; j < (i / 10) + 1; ++j) RecordBase::increment(r[i]);
+      for (int j = 0; j < (i / 10) + 1; ++j) RecordBase::increment(r[i]);
 
       ASSERT_EQ(r[i]->use_count(), (i / 10) + 1);
       ASSERT_EQ(r[i], RecordMemS::get_record(r[i]->data()));
@@ -115,14 +114,18 @@ void test_shared_alloc() {
     // RecordMemS::print_records( std::cout, s, true );
 #endif
 
-    Kokkos::parallel_for(range, [=](size_t i) {
+    // This must be a plain for-loop since deallocation (which can be triggered
+    // by RecordBase::decrement) fences all execution space instances. If this
+    // is a parallel_for, the test can hang with the parallel_for blocking
+    // waiting for itself to complete.
+    for (size_t i = range.begin(); i < range.end(); ++i) {
       while (nullptr !=
              (r[i] = static_cast<RecordMemS*>(RecordBase::decrement(r[i])))) {
 #ifdef KOKKOS_ENABLE_DEBUG
         if (r[i]->use_count() == 1) RecordBase::is_sane(r[i]);
 #endif
       }
-    });
+    }
 
     Kokkos::fence();
   }
@@ -146,7 +149,7 @@ void test_shared_alloc() {
 
       for (size_t j = 0; j < (i / 10) + 1; ++j) RecordBase::increment(r[i]);
 
-      ASSERT_EQ(r[i]->use_count(), (i / 10) + 1);
+      ASSERT_EQ(r[i]->use_count(), int((i / 10) + 1));
       ASSERT_EQ(r[i], RecordMemS::get_record(r[i]->data()));
     });
 
@@ -156,14 +159,18 @@ void test_shared_alloc() {
     RecordBase::is_sane(r[0]);
 #endif
 
-    Kokkos::parallel_for(range, [=](size_t i) {
+    // This must be a plain for-loop since deallocation (which can be triggered
+    // by RecordBase::decrement) fences all execution space instances. If this
+    // is a parallel_for, the test can hang with the parallel_for blocking
+    // waiting for itself to complete.
+    for (size_t i = range.begin(); i < range.end(); ++i) {
       while (nullptr !=
              (r[i] = static_cast<RecordMemS*>(RecordBase::decrement(r[i])))) {
 #ifdef KOKKOS_ENABLE_DEBUG
         if (r[i]->use_count() == 1) RecordBase::is_sane(r[i]);
 #endif
       }
-    });
+    }
 
     Kokkos::fence();
 
@@ -223,8 +230,6 @@ void test_shared_alloc() {
 
     ASSERT_EQ(destroy_count, 1);
   }
-
-#endif /* #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) */
 }
 
 TEST(TEST_CATEGORY, impl_shared_alloc) {
diff --git a/packages/kokkos/core/unit_test/TestTeam.hpp b/packages/kokkos/core/unit_test/TestTeam.hpp
index a5e3de85bbc49508a2fe3c456860da9aa0b8af57..cade6b0243ff4554fd7b023b67f5d7f3b9086890 100644
--- a/packages/kokkos/core/unit_test/TestTeam.hpp
+++ b/packages/kokkos/core/unit_test/TestTeam.hpp
@@ -465,8 +465,9 @@ class TestScanTeam {
     functor_type functor;
 
     policy_type team_exec(nteam, 1);
-    team_exec = policy_type(
-        nteam, team_exec.team_size_max(functor, Kokkos::ParallelReduceTag()));
+    const auto team_size =
+        team_exec.team_size_max(functor, Kokkos::ParallelReduceTag());
+    team_exec = policy_type(nteam, team_size);
 
     for (unsigned i = 0; i < Repeat; ++i) {
       int64_t accum = 0;
@@ -807,6 +808,13 @@ struct TestScratchTeam {
         Functor(), result_type(&error_count));
     Kokkos::fence();
     ASSERT_EQ(error_count, 0);
+
+    Kokkos::parallel_reduce(
+        team_exec.set_scratch_size(1, Kokkos::PerTeam(team_scratch_size),
+                                   Kokkos::PerThread(thread_scratch_size)),
+        Functor(), Kokkos::Sum<typename Functor::value_type>(error_count));
+    Kokkos::fence();
+    ASSERT_EQ(error_count, 0);
   }
 };
 
@@ -1530,7 +1538,7 @@ struct TestScratchAlignment {
             .set_scratch_size(0, Kokkos::PerTeam(shmem_size)),
         KOKKOS_LAMBDA(
             const typename Kokkos::TeamPolicy<ExecSpace>::member_type &team) {
-          if (allocate_small) ScratchViewInt p(team.team_scratch(0), 1);
+          if (allocate_small) ScratchViewInt(team.team_scratch(0), 1);
           ScratchView a(team.team_scratch(0), 11);
           if (ptrdiff_t(a.data()) % sizeof(TestScalar) != 0)
             Kokkos::abort("Error: invalid scratch view alignment\n");
diff --git a/packages/kokkos/core/unit_test/TestTeamBasic.hpp b/packages/kokkos/core/unit_test/TestTeamBasic.hpp
index 17899f63b1f7816cff75a34ccdce0b42d0ee1b3e..1582783a76251508c91015cee1dfee82af8abb0d 100644
--- a/packages/kokkos/core/unit_test/TestTeamBasic.hpp
+++ b/packages/kokkos/core/unit_test/TestTeamBasic.hpp
@@ -83,6 +83,37 @@ TEST(TEST_CATEGORY, team_reduce) {
 }
 #endif
 
+template <typename ExecutionSpace>
+struct TestTeamReduceLarge {
+  using team_policy_t = Kokkos::TeamPolicy<ExecutionSpace>;
+  using member_t      = typename team_policy_t::member_type;
+
+  int m_range;
+
+  TestTeamReduceLarge(const int range) : m_range(range) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const member_t& t, int& update) const {
+    Kokkos::single(Kokkos::PerTeam(t), [&]() { update++; });
+  }
+
+  void run() {
+    int result = 0;
+    Kokkos::parallel_reduce(team_policy_t(m_range, Kokkos::AUTO), *this,
+                            result);
+    EXPECT_EQ(m_range, result);
+  }
+};
+
+TEST(TEST_CATEGORY, team_reduce_large) {
+  std::vector<int> ranges{(2LU << 23) - 1, 2LU << 23, (2LU << 24),
+                          (2LU << 24) + 1, 1LU << 29};
+  for (const auto range : ranges) {
+    TestTeamReduceLarge<TEST_EXECSPACE> test(range);
+    test.run();
+  }
+}
+
 TEST(TEST_CATEGORY, team_broadcast_long) {
   TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>,
                     long>::test_teambroadcast(0, 1);
diff --git a/packages/kokkos/core/unit_test/TestTeamScan.hpp b/packages/kokkos/core/unit_test/TestTeamScan.hpp
index 4693bae8c1e2f4f39603a46d36d1109c6effde22..9edba57a06c3689d4b9ece32ed79048b1ce5089b 100644
--- a/packages/kokkos/core/unit_test/TestTeamScan.hpp
+++ b/packages/kokkos/core/unit_test/TestTeamScan.hpp
@@ -49,28 +49,6 @@
 #include <sstream>
 #include <type_traits>
 
-#if defined(__clang__)
-#define is_clang true
-#else
-#define is_clang false
-#endif
-
-#if !defined(KOKKOS_ENABLE_OPENMPTARGET)
-// for avoid pre-processor block
-namespace Kokkos {
-namespace Experimental {
-class OpenMPTarget;
-}
-}  // namespace Kokkos
-#endif
-
-#if !defined(KOKKOS_ENABLE_CUDA)
-// for avoid pre-processor block
-namespace Kokkos {
-class Cuda;
-}  // namespace Kokkos
-#endif
-
 namespace Test {
 
 template <class ExecutionSpace, class DataType>
@@ -105,9 +83,11 @@ struct TestTeamScan {
   }
 
   auto operator()(int32_t _M, int32_t _N) {
-    std::cout << "Launching " << Kokkos::Impl::demangle(typeid(*this).name())
-              << " with "
-              << "M=" << _M << " and N=" << _N << "..." << std::endl;
+    std::stringstream ss;
+    ss << Kokkos::Impl::demangle(typeid(*this).name());
+    ss << "(/*M=*/" << _M << ", /*N=*/" << _N << ")";
+    std::string const test_id = ss.str();
+
     M   = _M;
     N   = _N;
     a_d = view_type("a_d", M, N);
@@ -131,30 +111,32 @@ struct TestTeamScan {
     Kokkos::deep_copy(a_o, a_r);
 
     for (int32_t i = 0; i < M; ++i) {
-      value_type _scan_real = 0;
-      value_type _scan_calc = 0;
-      value_type _epsilon   = std::numeric_limits<value_type>::epsilon();
+      value_type scan_ref = 0;
+      value_type scan_calc;
+      value_type abs_err = 0;
       // each fp addition is subject to small loses in precision and these
       // compound as loop so we set the base error to be the machine epsilon and
       // then add in another epsilon each iteration. For example, with CUDA
       // backend + 32-bit float + large N values (e.g. 1,000) + high
       // thread-counts (e.g. 1024), this test will fail w/o epsilon
       // accommodation
+      constexpr value_type epsilon = std::numeric_limits<value_type>::epsilon();
       for (int32_t j = 0; j < N; ++j) {
-        _scan_real += a_i(i, j);
-        _scan_calc     = a_o(i, j);
-        auto _get_mesg = [=]() {
-          std::stringstream ss, idx;
-          idx << "(" << i << ", " << j << ") = ";
-          ss << "a_d" << idx.str() << a_i(i, j);
-          ss << ", a_r" << idx.str() << a_o(i, j);
-          return ss.str();
-        };
+        scan_ref += a_i(i, j);
+        scan_calc = a_o(i, j);
         if (std::is_integral<value_type>::value) {
-          ASSERT_EQ(_scan_real, _scan_calc) << _get_mesg();
+          ASSERT_EQ(scan_ref, scan_calc)
+              << test_id
+              << " calculated scan output value differs from reference at "
+                 "indices i="
+              << i << " and j=" << j;
         } else {
-          _epsilon += std::numeric_limits<value_type>::epsilon();
-          ASSERT_NEAR(_scan_real, _scan_calc, _epsilon) << _get_mesg();
+          abs_err += epsilon;
+          ASSERT_NEAR(scan_ref, scan_calc, abs_err)
+              << test_id
+              << " calculated scan output value differs from reference at "
+                 "indices i="
+              << i << " and j=" << j;
         }
       }
     }
diff --git a/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp
index f64c5b8809a214d4e2376e43df29d7900eccd1de..c0e698d92eed7fbfcae1df33381fd5f298200d56 100644
--- a/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp
+++ b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp
@@ -182,27 +182,22 @@ TEST(TEST_CATEGORY, team_policy_max_recommended) {
 }
 
 template <typename TeamHandleType, typename ReducerValueType>
-struct PrintFunctor1 {
-  KOKKOS_INLINE_FUNCTION void operator()(const TeamHandleType& team,
-                                         ReducerValueType&) const {
-    KOKKOS_IMPL_DO_NOT_USE_PRINTF("Test %i %i\n", int(team.league_rank()),
-                                  int(team.team_rank()));
+struct MinMaxTeamLeagueRank {
+  KOKKOS_FUNCTION void operator()(const TeamHandleType& team,
+                                  ReducerValueType& update) const {
+    int const x = team.league_rank();
+    if (x < update.min_val) {
+      update.min_val = x;
+    }
+    if (x > update.max_val) {
+      update.max_val = x;
+    }
   }
 };
 
-template <typename TeamHandleType, typename ReducerValueType>
-struct PrintFunctor2 {
-  KOKKOS_INLINE_FUNCTION void operator()(const TeamHandleType& team,
-                                         ReducerValueType& teamVal) const {
-    KOKKOS_IMPL_DO_NOT_USE_PRINTF("Test %i %i\n", int(team.league_rank()),
-                                  int(team.team_rank()));
-    teamVal += 1;
-  }
-};
-
-TEST(TEST_CATEGORY, team_policy_max_scalar_without_plus_equal_k) {
+TEST(TEST_CATEGORY, team_policy_minmax_scalar_without_plus_equal_k) {
   using ExecSpace           = TEST_EXECSPACE;
-  using ReducerType         = Kokkos::MinMax<double, Kokkos::HostSpace>;
+  using ReducerType         = Kokkos::MinMax<int, Kokkos::HostSpace>;
   using ReducerValueType    = typename ReducerType::value_type;
   using DynamicScheduleType = Kokkos::Schedule<Kokkos::Dynamic>;
   using TeamPolicyType = Kokkos::TeamPolicy<ExecSpace, DynamicScheduleType>;
@@ -213,21 +208,11 @@ TEST(TEST_CATEGORY, team_policy_max_scalar_without_plus_equal_k) {
   ReducerType reducer(val);
 
   TeamPolicyType p(num_teams, Kokkos::AUTO);
-  PrintFunctor1<TeamHandleType, ReducerValueType> f1;
-  const int max_team_size =
-      p.team_size_max(f1, reducer, Kokkos::ParallelReduceTag());
-
-  const int recommended_team_size =
-      p.team_size_recommended(f1, reducer, Kokkos::ParallelReduceTag());
-
-  printf("Max TeamSize: %i Recommended TeamSize: %i\n", max_team_size,
-         recommended_team_size);
+  MinMaxTeamLeagueRank<TeamHandleType, ReducerValueType> f1;
 
   Kokkos::parallel_reduce(p, f1, reducer);
-  double sum;
-  Kokkos::parallel_reduce(TeamPolicyType(num_teams, Kokkos::AUTO),
-                          PrintFunctor2<TeamHandleType, double>{}, sum);
-  printf("Sum: %lf\n", sum);
+  ASSERT_EQ(val.min_val, 0);
+  ASSERT_EQ(val.max_val, num_teams - 1);
 }
 
 }  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestUtilities.hpp b/packages/kokkos/core/unit_test/TestUtilities.hpp
index 1d3e19da105161e0b71c733ad2bb1232add1d8aa..fc8e615dab5780bb1b06bc1092499a39dad5c299 100644
--- a/packages/kokkos/core/unit_test/TestUtilities.hpp
+++ b/packages/kokkos/core/unit_test/TestUtilities.hpp
@@ -84,8 +84,8 @@ std::size_t do_comma_emulation_test(std::integer_sequence<std::size_t, Idxs...>,
 }
 
 TEST(utilities, comma_operator_emulation) {
-  ASSERT_EQ(
-      0, do_comma_emulation_test(std::make_index_sequence<5>{}, 0, 1, 2, 3, 4));
+  ASSERT_EQ(0u, do_comma_emulation_test(std::make_index_sequence<5>{}, 0, 1, 2,
+                                        3, 4));
 }
 
 }  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestViewAPI.hpp b/packages/kokkos/core/unit_test/TestViewAPI.hpp
index 73531e6196f0ca145789ef98f680328ece747df9..21602be086b40d582608042f06d0260a5d63cfac 100644
--- a/packages/kokkos/core/unit_test/TestViewAPI.hpp
+++ b/packages/kokkos/core/unit_test/TestViewAPI.hpp
@@ -892,8 +892,8 @@ struct TestViewMirror {
     for (int i = 0; i < 10; i++) {
       a_h(i) = (double)i;
     }
-    auto a_d = Kokkos::create_mirror_view(DeviceType(), a_h,
-                                          Kokkos::WithoutInitializing);
+    auto a_d = Kokkos::create_mirror_view(Kokkos::WithoutInitializing,
+                                          DeviceType(), a_h);
 
     int equal_ptr_h_d = (a_h.data() == a_d.data()) ? 1 : 0;
     constexpr int is_same_memspace =
@@ -1082,10 +1082,10 @@ class TestViewAPI {
     dx = dView4("dx", N0);
     dy = dView4("dy", N0);
 
-    ASSERT_EQ(dx.use_count(), size_t(1));
+    ASSERT_EQ(dx.use_count(), 1);
 
     dView4_unmanaged unmanaged_dx = dx;
-    ASSERT_EQ(dx.use_count(), size_t(1));
+    ASSERT_EQ(dx.use_count(), 1);
 
     dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged(
         dx.data(), dx.extent(0), dx.extent(1), dx.extent(2), dx.extent(3));
@@ -1097,24 +1097,24 @@ class TestViewAPI {
     }
 
     const_dView4 const_dx = dx;
-    ASSERT_EQ(dx.use_count(), size_t(2));
+    ASSERT_EQ(dx.use_count(), 2);
 
     {
       const_dView4 const_dx2;
       const_dx2 = const_dx;
-      ASSERT_EQ(dx.use_count(), size_t(3));
+      ASSERT_EQ(dx.use_count(), 3);
 
       const_dx2 = dy;
-      ASSERT_EQ(dx.use_count(), size_t(2));
+      ASSERT_EQ(dx.use_count(), 2);
 
       const_dView4 const_dx3(dx);
-      ASSERT_EQ(dx.use_count(), size_t(3));
+      ASSERT_EQ(dx.use_count(), 3);
 
       dView4_unmanaged dx4_unmanaged(dx);
-      ASSERT_EQ(dx.use_count(), size_t(3));
+      ASSERT_EQ(dx.use_count(), 3);
     }
 
-    ASSERT_EQ(dx.use_count(), size_t(2));
+    ASSERT_EQ(dx.use_count(), 2);
 
     ASSERT_NE(dx.data(), nullptr);
     ASSERT_NE(const_dx.data(), nullptr);
@@ -1478,6 +1478,13 @@ class TestViewAPI {
     if (std::is_same<typename dView1::memory_space,
                      Kokkos::Experimental::OpenMPTargetSpace>::value)
       return;
+#endif
+// FIXME_MSVC_WITH_CUDA
+// This test doesn't behave as expected on Windows with CUDA
+#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA)
+    if (std::is_same<typename dView1::memory_space,
+                     Kokkos::CudaUVMSpace>::value)
+      return;
 #endif
     auto alloc_size = std::numeric_limits<size_t>::max() - 42;
     try {
diff --git a/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp
index 9ce3a34236956572b5a63c38765c05564a536140..d592fcaf20c407da8d2c639be98b9cb54b29711e 100644
--- a/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp
+++ b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp
@@ -617,6 +617,7 @@ TEST(TEST_CATEGORY, view_layoutstride_right_to_layoutright_assignment) {
   }
 }
 
+#ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC
 TEST(TEST_CATEGORY_DEATH, view_layoutstride_right_to_layoutleft_assignment) {
   using exec_space = TEST_EXECSPACE;
 
@@ -926,6 +927,7 @@ TEST(TEST_CATEGORY_DEATH, view_layoutstride_left_to_layoutright_assignment) {
                  "View assignment must have compatible layouts");
   }
 }
+#endif
 
 }  // namespace Test
 
diff --git a/packages/kokkos/core/unit_test/TestViewMapping_a.hpp b/packages/kokkos/core/unit_test/TestViewMapping_a.hpp
index 974d7c98cafb56c91df55b425913b14c0dfd3ca1..edeb1abdaa5f8d1120ac141cb67283ec3d776ce0 100644
--- a/packages/kokkos/core/unit_test/TestViewMapping_a.hpp
+++ b/packages/kokkos/core/unit_test/TestViewMapping_a.hpp
@@ -44,6 +44,7 @@
 
 #include <gtest/gtest.h>
 
+#include <cstddef>
 #include <stdexcept>
 #include <sstream>
 #include <iostream>
@@ -168,25 +169,25 @@ void test_view_mapping() {
   dim_s0_s0_s0 d3(2, 3, 4, 5, 6, 7, 8, 9);
   dim_s0_s0_s0_s0 d4(2, 3, 4, 5, 6, 7, 8, 9);
 
-  ASSERT_EQ(d1.N0, 2);
-  ASSERT_EQ(d2.N0, 2);
-  ASSERT_EQ(d3.N0, 2);
-  ASSERT_EQ(d4.N0, 2);
+  ASSERT_EQ(d1.N0, 2u);
+  ASSERT_EQ(d2.N0, 2u);
+  ASSERT_EQ(d3.N0, 2u);
+  ASSERT_EQ(d4.N0, 2u);
 
-  ASSERT_EQ(d1.N1, 1);
-  ASSERT_EQ(d2.N1, 3);
-  ASSERT_EQ(d3.N1, 3);
-  ASSERT_EQ(d4.N1, 3);
+  ASSERT_EQ(d1.N1, 1u);
+  ASSERT_EQ(d2.N1, 3u);
+  ASSERT_EQ(d3.N1, 3u);
+  ASSERT_EQ(d4.N1, 3u);
 
-  ASSERT_EQ(d1.N2, 1);
-  ASSERT_EQ(d2.N2, 1);
-  ASSERT_EQ(d3.N2, 4);
-  ASSERT_EQ(d4.N2, 4);
+  ASSERT_EQ(d1.N2, 1u);
+  ASSERT_EQ(d2.N2, 1u);
+  ASSERT_EQ(d3.N2, 4u);
+  ASSERT_EQ(d4.N2, 4u);
 
-  ASSERT_EQ(d1.N3, 1);
-  ASSERT_EQ(d2.N3, 1);
-  ASSERT_EQ(d3.N3, 1);
-  ASSERT_EQ(d4.N3, 5);
+  ASSERT_EQ(d1.N3, 1u);
+  ASSERT_EQ(d2.N3, 1u);
+  ASSERT_EQ(d3.N3, 1u);
+  ASSERT_EQ(d4.N3, 5u);
 
   //----------------------------------------
 
@@ -205,17 +206,17 @@ void test_view_mapping() {
 
     stride_s0_s0_s0 stride3(off3);
 
-    ASSERT_EQ(off3.stride_0(), 1);
-    ASSERT_EQ(off3.stride_1(), 2);
-    ASSERT_EQ(off3.stride_2(), 6);
-    ASSERT_EQ(off3.span(), 24);
+    ASSERT_EQ(off3.stride_0(), 1u);
+    ASSERT_EQ(off3.stride_1(), 2u);
+    ASSERT_EQ(off3.stride_2(), 6u);
+    ASSERT_EQ(off3.span(), 24u);
 
     ASSERT_EQ(off3.stride_0(), stride3.stride_0());
     ASSERT_EQ(off3.stride_1(), stride3.stride_1());
     ASSERT_EQ(off3.stride_2(), stride3.stride_2());
     ASSERT_EQ(off3.span(), stride3.span());
 
-    int offset = 0;
+    unsigned offset = 0;
 
     for (int k = 0; k < 4; ++k)
       for (int j = 0; j < 3; ++j)
@@ -236,32 +237,32 @@ void test_view_mapping() {
 
     stride_s0_s0_s0 stride3(dyn_off3);
 
-    ASSERT_EQ(dyn_off3.m_dim.rank, 3);
-    ASSERT_EQ(dyn_off3.m_dim.N0, 2);
-    ASSERT_EQ(dyn_off3.m_dim.N1, 3);
-    ASSERT_EQ(dyn_off3.m_dim.N2, 4);
-    ASSERT_EQ(dyn_off3.m_dim.N3, 1);
-    ASSERT_EQ(dyn_off3.size(), 2 * 3 * 4);
+    ASSERT_EQ(dyn_off3.m_dim.rank, 3u);
+    ASSERT_EQ(dyn_off3.m_dim.N0, 2u);
+    ASSERT_EQ(dyn_off3.m_dim.N1, 3u);
+    ASSERT_EQ(dyn_off3.m_dim.N2, 4u);
+    ASSERT_EQ(dyn_off3.m_dim.N3, 1u);
+    ASSERT_EQ(dyn_off3.size(), (size_t)2 * 3 * 4);
 
     const Kokkos::LayoutLeft layout = dyn_off3.layout();
 
-    ASSERT_EQ(layout.dimension[0], 2);
-    ASSERT_EQ(layout.dimension[1], 3);
-    ASSERT_EQ(layout.dimension[2], 4);
-    ASSERT_EQ(layout.dimension[3], 1);
-    ASSERT_EQ(layout.dimension[4], 1);
-    ASSERT_EQ(layout.dimension[5], 1);
-    ASSERT_EQ(layout.dimension[6], 1);
-    ASSERT_EQ(layout.dimension[7], 1);
+    ASSERT_EQ(layout.dimension[0], 2u);
+    ASSERT_EQ(layout.dimension[1], 3u);
+    ASSERT_EQ(layout.dimension[2], 4u);
+    ASSERT_EQ(layout.dimension[3], 1u);
+    ASSERT_EQ(layout.dimension[4], 1u);
+    ASSERT_EQ(layout.dimension[5], 1u);
+    ASSERT_EQ(layout.dimension[6], 1u);
+    ASSERT_EQ(layout.dimension[7], 1u);
 
-    ASSERT_EQ(stride3.m_dim.rank, 3);
-    ASSERT_EQ(stride3.m_dim.N0, 2);
-    ASSERT_EQ(stride3.m_dim.N1, 3);
-    ASSERT_EQ(stride3.m_dim.N2, 4);
-    ASSERT_EQ(stride3.m_dim.N3, 1);
-    ASSERT_EQ(stride3.size(), 2 * 3 * 4);
+    ASSERT_EQ(stride3.m_dim.rank, 3u);
+    ASSERT_EQ(stride3.m_dim.N0, 2u);
+    ASSERT_EQ(stride3.m_dim.N1, 3u);
+    ASSERT_EQ(stride3.m_dim.N2, 4u);
+    ASSERT_EQ(stride3.m_dim.N3, 1u);
+    ASSERT_EQ(stride3.size(), (size_t)2 * 3 * 4);
 
-    int offset = 0;
+    size_t offset = 0;
 
     for (int k = 0; k < 4; ++k)
       for (int j = 0; j < 3; ++j)
@@ -277,8 +278,8 @@ void test_view_mapping() {
   //----------------------------------------
   // Large dimension is likely padded.
   {
-    constexpr int N0 = 2000;
-    constexpr int N1 = 300;
+    constexpr size_t N0 = 2000;
+    constexpr size_t N1 = 300;
 
     using left_s0_s0_s4 =
         Kokkos::Impl::ViewOffset<dim_s0_s0_s4, Kokkos::LayoutLeft>;
@@ -288,26 +289,26 @@ void test_view_mapping() {
 
     stride_s0_s0_s0 stride3(dyn_off3);
 
-    ASSERT_EQ(dyn_off3.m_dim.rank, 3);
+    ASSERT_EQ(dyn_off3.m_dim.rank, 3u);
     ASSERT_EQ(dyn_off3.m_dim.N0, N0);
     ASSERT_EQ(dyn_off3.m_dim.N1, N1);
-    ASSERT_EQ(dyn_off3.m_dim.N2, 4);
-    ASSERT_EQ(dyn_off3.m_dim.N3, 1);
+    ASSERT_EQ(dyn_off3.m_dim.N2, 4u);
+    ASSERT_EQ(dyn_off3.m_dim.N3, 1u);
     ASSERT_EQ(dyn_off3.size(), N0 * N1 * 4);
 
-    ASSERT_EQ(stride3.m_dim.rank, 3);
+    ASSERT_EQ(stride3.m_dim.rank, 3u);
     ASSERT_EQ(stride3.m_dim.N0, N0);
     ASSERT_EQ(stride3.m_dim.N1, N1);
-    ASSERT_EQ(stride3.m_dim.N2, 4);
-    ASSERT_EQ(stride3.m_dim.N3, 1);
+    ASSERT_EQ(stride3.m_dim.N2, 4u);
+    ASSERT_EQ(stride3.m_dim.N3, 1u);
     ASSERT_EQ(stride3.size(), N0 * N1 * 4);
     ASSERT_EQ(stride3.span(), dyn_off3.span());
 
-    int offset = 0;
+    size_t offset = 0;
 
-    for (int k = 0; k < 4; ++k)
-      for (int j = 0; j < N1; ++j)
-        for (int i = 0; i < N0; ++i) {
+    for (size_t k = 0; k < 4; ++k)
+      for (size_t j = 0; j < N1; ++j)
+        for (size_t i = 0; i < N0; ++i) {
           ASSERT_LE(offset, dyn_off3(i, j, k));
           ASSERT_EQ(stride3(i, j, k), dyn_off3(i, j, k));
           offset = dyn_off3(i, j, k) + 1;
@@ -328,9 +329,9 @@ void test_view_mapping() {
 
     stride_s0_s0_s0 stride3(off3);
 
-    ASSERT_EQ(off3.stride_0(), 12);
-    ASSERT_EQ(off3.stride_1(), 4);
-    ASSERT_EQ(off3.stride_2(), 1);
+    ASSERT_EQ(off3.stride_0(), 12u);
+    ASSERT_EQ(off3.stride_1(), 4u);
+    ASSERT_EQ(off3.stride_2(), 1u);
 
     ASSERT_EQ(off3.dimension_0(), stride3.dimension_0());
     ASSERT_EQ(off3.dimension_1(), stride3.dimension_1());
@@ -340,7 +341,7 @@ void test_view_mapping() {
     ASSERT_EQ(off3.stride_2(), stride3.stride_2());
     ASSERT_EQ(off3.span(), stride3.span());
 
-    int offset = 0;
+    size_t offset = 0;
 
     for (int i = 0; i < 2; ++i)
       for (int j = 0; j < 3; ++j)
@@ -363,12 +364,12 @@ void test_view_mapping() {
 
     stride_s0_s0_s0 stride3(dyn_off3);
 
-    ASSERT_EQ(dyn_off3.m_dim.rank, 3);
-    ASSERT_EQ(dyn_off3.m_dim.N0, 2);
-    ASSERT_EQ(dyn_off3.m_dim.N1, 3);
-    ASSERT_EQ(dyn_off3.m_dim.N2, 4);
-    ASSERT_EQ(dyn_off3.m_dim.N3, 1);
-    ASSERT_EQ(dyn_off3.size(), 2 * 3 * 4);
+    ASSERT_EQ(dyn_off3.m_dim.rank, 3u);
+    ASSERT_EQ(dyn_off3.m_dim.N0, 2u);
+    ASSERT_EQ(dyn_off3.m_dim.N1, 3u);
+    ASSERT_EQ(dyn_off3.m_dim.N2, 4u);
+    ASSERT_EQ(dyn_off3.m_dim.N3, 1u);
+    ASSERT_EQ(dyn_off3.size(), (size_t)2 * 3 * 4);
 
     ASSERT_EQ(dyn_off3.dimension_0(), stride3.dimension_0());
     ASSERT_EQ(dyn_off3.dimension_1(), stride3.dimension_1());
@@ -378,7 +379,7 @@ void test_view_mapping() {
     ASSERT_EQ(dyn_off3.stride_2(), stride3.stride_2());
     ASSERT_EQ(dyn_off3.span(), stride3.span());
 
-    int offset = 0;
+    size_t offset = 0;
 
     for (int i = 0; i < 2; ++i)
       for (int j = 0; j < 3; ++j)
@@ -393,8 +394,8 @@ void test_view_mapping() {
   //----------------------------------------
   // Large dimension is likely padded.
   {
-    constexpr int N0 = 2000;
-    constexpr int N1 = 300;
+    constexpr size_t N0 = 2000;
+    constexpr size_t N1 = 300;
 
     using right_s0_s0_s4 =
         Kokkos::Impl::ViewOffset<dim_s0_s0_s4, Kokkos::LayoutRight>;
@@ -404,11 +405,11 @@ void test_view_mapping() {
 
     stride_s0_s0_s0 stride3(dyn_off3);
 
-    ASSERT_EQ(dyn_off3.m_dim.rank, 3);
+    ASSERT_EQ(dyn_off3.m_dim.rank, 3u);
     ASSERT_EQ(dyn_off3.m_dim.N0, N0);
     ASSERT_EQ(dyn_off3.m_dim.N1, N1);
-    ASSERT_EQ(dyn_off3.m_dim.N2, 4);
-    ASSERT_EQ(dyn_off3.m_dim.N3, 1);
+    ASSERT_EQ(dyn_off3.m_dim.N2, 4u);
+    ASSERT_EQ(dyn_off3.m_dim.N3, 1u);
     ASSERT_EQ(dyn_off3.size(), N0 * N1 * 4);
 
     ASSERT_EQ(dyn_off3.dimension_0(), stride3.dimension_0());
@@ -419,11 +420,11 @@ void test_view_mapping() {
     ASSERT_EQ(dyn_off3.stride_2(), stride3.stride_2());
     ASSERT_EQ(dyn_off3.span(), stride3.span());
 
-    int offset = 0;
+    size_t offset = 0;
 
-    for (int i = 0; i < N0; ++i)
-      for (int j = 0; j < N1; ++j)
-        for (int k = 0; k < 4; ++k) {
+    for (size_t i = 0; i < N0; ++i)
+      for (size_t j = 0; j < N1; ++j)
+        for (size_t k = 0; k < 4; ++k) {
           ASSERT_LE(offset, dyn_off3(i, j, k));
           ASSERT_EQ(dyn_off3(i, j, k), stride3(i, j, k));
           offset = dyn_off3(i, j, k) + 1;
@@ -438,10 +439,10 @@ void test_view_mapping() {
     // Mapping rank 4 to rank 3
     using SubviewExtents = Kokkos::Impl::SubviewExtents<4, 3>;
 
-    constexpr int N0 = 1000;
-    constexpr int N1 = 2000;
-    constexpr int N2 = 3000;
-    constexpr int N3 = 4000;
+    constexpr size_t N0 = 1000;
+    constexpr size_t N1 = 2000;
+    constexpr size_t N2 = 3000;
+    constexpr size_t N3 = 4000;
 
     Kokkos::Impl::ViewDimension<N0, N1, N2, N3> dim;
 
@@ -450,26 +451,26 @@ void test_view_mapping() {
                        Kokkos::pair<int, int>(N3 / 4, 20 + N3 / 4));
 
     ASSERT_EQ(tmp.domain_offset(0), N0 / 2);
-    ASSERT_EQ(tmp.domain_offset(1), 0);
+    ASSERT_EQ(tmp.domain_offset(1), 0u);
     ASSERT_EQ(tmp.domain_offset(2), N2 / 4);
     ASSERT_EQ(tmp.domain_offset(3), N3 / 4);
 
-    ASSERT_EQ(tmp.range_index(0), 1);
-    ASSERT_EQ(tmp.range_index(1), 2);
-    ASSERT_EQ(tmp.range_index(2), 3);
+    ASSERT_EQ(tmp.range_index(0), 1u);
+    ASSERT_EQ(tmp.range_index(1), 2u);
+    ASSERT_EQ(tmp.range_index(2), 3u);
 
     ASSERT_EQ(tmp.range_extent(0), N1);
-    ASSERT_EQ(tmp.range_extent(1), 10);
-    ASSERT_EQ(tmp.range_extent(2), 20);
+    ASSERT_EQ(tmp.range_extent(1), 10u);
+    ASSERT_EQ(tmp.range_extent(2), 20u);
   }
 
   {
-    constexpr int N0 = 2000;
-    constexpr int N1 = 300;
+    constexpr size_t N0 = 2000;
+    constexpr size_t N1 = 300;
 
-    constexpr int sub_N0 = 1000;
-    constexpr int sub_N1 = 200;
-    constexpr int sub_N2 = 4;
+    constexpr size_t sub_N0 = 1000;
+    constexpr size_t sub_N1 = 200;
+    constexpr size_t sub_N2 = 4;
 
     using left_s0_s0_s4 =
         Kokkos::Impl::ViewOffset<dim_s0_s0_s4, Kokkos::LayoutLeft>;
@@ -493,20 +494,20 @@ void test_view_mapping() {
     ASSERT_EQ(dyn_off3.stride_2(), stride3.stride_2());
     ASSERT_GE(dyn_off3.span(), stride3.span());
 
-    for (int k = 0; k < sub_N2; ++k)
-      for (int j = 0; j < sub_N1; ++j)
-        for (int i = 0; i < sub_N0; ++i) {
+    for (size_t k = 0; k < sub_N2; ++k)
+      for (size_t j = 0; j < sub_N1; ++j)
+        for (size_t i = 0; i < sub_N0; ++i) {
           ASSERT_EQ(stride3(i, j, k), dyn_off3(i, j, k));
         }
   }
 
   {
-    constexpr int N0 = 2000;
-    constexpr int N1 = 300;
+    constexpr size_t N0 = 2000;
+    constexpr size_t N1 = 300;
 
-    constexpr int sub_N0 = 1000;
-    constexpr int sub_N1 = 200;
-    constexpr int sub_N2 = 4;
+    constexpr size_t sub_N0 = 1000;
+    constexpr size_t sub_N1 = 200;
+    constexpr size_t sub_N2 = 4;
 
     using right_s0_s0_s4 =
         Kokkos::Impl::ViewOffset<dim_s0_s0_s4, Kokkos::LayoutRight>;
@@ -530,9 +531,9 @@ void test_view_mapping() {
     ASSERT_EQ(dyn_off3.stride_2(), stride3.stride_2());
     ASSERT_GE(dyn_off3.span(), stride3.span());
 
-    for (int i = 0; i < sub_N0; ++i)
-      for (int j = 0; j < sub_N1; ++j)
-        for (int k = 0; k < sub_N2; ++k) {
+    for (size_t i = 0; i < sub_N0; ++i)
+      for (size_t j = 0; j < sub_N1; ++j)
+        for (size_t k = 0; k < sub_N2; ++k) {
           ASSERT_EQ(stride3(i, j, k), dyn_off3(i, j, k));
         }
   }
@@ -720,8 +721,8 @@ void test_view_mapping() {
     // Generate static_assert error:
     // T tmp( cr1 );
 
-    ASSERT_EQ(vr1.span(), N);
-    ASSERT_EQ(cr1.span(), N);
+    ASSERT_EQ(vr1.span(), size_t(N));
+    ASSERT_EQ(cr1.span(), size_t(N));
     ASSERT_EQ(vr1.data(), &data[0]);
     ASSERT_EQ(cr1.data(), &data[0]);
 
@@ -766,7 +767,7 @@ void test_view_mapping() {
 
     ASSERT_EQ(C::Rank, 1);
 
-    ASSERT_EQ(vr1.extent(0), N);
+    ASSERT_EQ(vr1.extent(0), size_t(N));
 
     if (Kokkos::SpaceAccessibility<Kokkos::HostSpace,
                                    typename Space::memory_space>::accessible) {
@@ -813,7 +814,7 @@ void test_view_mapping() {
     ASSERT_TRUE((std::is_same<typename T::reference_type, int&>::value));
     ASSERT_EQ(T::Rank, 1);
 
-    ASSERT_EQ(vr1.extent(0), N);
+    ASSERT_EQ(vr1.extent(0), size_t(N));
 
     if (Kokkos::SpaceAccessibility<Kokkos::HostSpace,
                                    typename Space::memory_space>::accessible) {
@@ -841,8 +842,8 @@ void test_view_mapping() {
     T vr1("vr1", N);
     C cr1(vr1);
 
-    ASSERT_EQ(vr1.extent(0), 0);
-    ASSERT_EQ(cr1.extent(0), 0);
+    ASSERT_EQ(vr1.extent(0), 0u);
+    ASSERT_EQ(cr1.extent(0), 0u);
   }
 
   // Testing using space instance for allocation.
@@ -890,15 +891,15 @@ void test_view_mapping() {
 
     const offset_t offset(std::integral_constant<unsigned, 0>(), stride);
 
-    ASSERT_EQ(offset.dimension_0(), 3);
-    ASSERT_EQ(offset.dimension_1(), 4);
-    ASSERT_EQ(offset.dimension_2(), 5);
+    ASSERT_EQ(offset.dimension_0(), 3u);
+    ASSERT_EQ(offset.dimension_1(), 4u);
+    ASSERT_EQ(offset.dimension_2(), 5u);
 
-    ASSERT_EQ(offset.stride_0(), 4);
-    ASSERT_EQ(offset.stride_1(), 1);
-    ASSERT_EQ(offset.stride_2(), 12);
+    ASSERT_EQ(offset.stride_0(), 4u);
+    ASSERT_EQ(offset.stride_1(), 1u);
+    ASSERT_EQ(offset.stride_2(), 12u);
 
-    ASSERT_EQ(offset.span(), 60);
+    ASSERT_EQ(offset.span(), 60u);
     ASSERT_TRUE(offset.span_is_contiguous());
 
     Kokkos::Impl::ViewMapping<traits_t, void> v(
@@ -910,24 +911,24 @@ void test_view_mapping() {
     using M           = typename V::HostMirror;
     using layout_type = typename Kokkos::View<int**, Space>::array_layout;
 
-    constexpr int N0 = 10;
-    constexpr int N1 = 11;
+    constexpr size_t N0 = 10;
+    constexpr size_t N1 = 11;
 
     V a("a", N0, N1);
     M b = Kokkos::create_mirror(a);
     M c = Kokkos::create_mirror_view(a);
     M d;
 
-    for (int i0 = 0; i0 < N0; ++i0)
-      for (int i1 = 0; i1 < N1; ++i1) {
+    for (size_t i0 = 0; i0 < N0; ++i0)
+      for (size_t i1 = 0; i1 < N1; ++i1) {
         b(i0, i1) = 1 + i0 + i1 * N0;
       }
 
     Kokkos::deep_copy(a, b);
     Kokkos::deep_copy(c, a);
 
-    for (int i0 = 0; i0 < N0; ++i0)
-      for (int i1 = 0; i1 < N1; ++i1) {
+    for (size_t i0 = 0; i0 < N0; ++i0)
+      for (size_t i1 = 0; i1 < N1; ++i1) {
         ASSERT_EQ(b(i0, i1), c(i0, i1));
       }
 
@@ -943,12 +944,12 @@ void test_view_mapping() {
     Kokkos::realloc(c, 5, 6);
     Kokkos::realloc(d, 5, 6);
 
-    ASSERT_EQ(b.extent(0), 5);
-    ASSERT_EQ(b.extent(1), 6);
-    ASSERT_EQ(c.extent(0), 5);
-    ASSERT_EQ(c.extent(1), 6);
-    ASSERT_EQ(d.extent(0), 5);
-    ASSERT_EQ(d.extent(1), 6);
+    ASSERT_EQ(b.extent(0), 5u);
+    ASSERT_EQ(b.extent(1), 6u);
+    ASSERT_EQ(c.extent(0), 5u);
+    ASSERT_EQ(c.extent(1), 6u);
+    ASSERT_EQ(d.extent(0), 5u);
+    ASSERT_EQ(d.extent(1), 6u);
 
     layout_type layout(7, 8);
     Kokkos::resize(b, layout);
@@ -971,12 +972,12 @@ void test_view_mapping() {
     Kokkos::realloc(c, layout);
     Kokkos::realloc(d, layout);
 
-    ASSERT_EQ(b.extent(0), 7);
-    ASSERT_EQ(b.extent(1), 8);
-    ASSERT_EQ(c.extent(0), 7);
-    ASSERT_EQ(c.extent(1), 8);
-    ASSERT_EQ(d.extent(0), 7);
-    ASSERT_EQ(d.extent(1), 8);
+    ASSERT_EQ(b.extent(0), 7u);
+    ASSERT_EQ(b.extent(1), 8u);
+    ASSERT_EQ(c.extent(0), 7u);
+    ASSERT_EQ(c.extent(1), 8u);
+    ASSERT_EQ(d.extent(0), 7u);
+    ASSERT_EQ(d.extent(1), 8u);
   }
 
   {
@@ -985,8 +986,8 @@ void test_view_mapping() {
     using layout_type =
         typename Kokkos::View<int**, Kokkos::LayoutStride, Space>::array_layout;
 
-    constexpr int N0 = 10;
-    constexpr int N1 = 11;
+    constexpr size_t N0 = 10;
+    constexpr size_t N1 = 11;
 
     const int dimensions[] = {N0, N1};
     const int order[]      = {1, 0};
@@ -996,16 +997,16 @@ void test_view_mapping() {
     M c = Kokkos::create_mirror_view(a);
     M d;
 
-    for (int i0 = 0; i0 < N0; ++i0)
-      for (int i1 = 0; i1 < N1; ++i1) {
+    for (size_t i0 = 0; i0 < N0; ++i0)
+      for (size_t i1 = 0; i1 < N1; ++i1) {
         b(i0, i1) = 1 + i0 + i1 * N0;
       }
 
     Kokkos::deep_copy(a, b);
     Kokkos::deep_copy(c, a);
 
-    for (int i0 = 0; i0 < N0; ++i0)
-      for (int i1 = 0; i1 < N1; ++i1) {
+    for (size_t i0 = 0; i0 < N0; ++i0)
+      for (size_t i1 = 0; i1 < N1; ++i1) {
         ASSERT_EQ(b(i0, i1), c(i0, i1));
       }
 
@@ -1024,12 +1025,12 @@ void test_view_mapping() {
     Kokkos::realloc(c, layout);
     Kokkos::realloc(d, layout);
 
-    ASSERT_EQ(b.extent(0), 7);
-    ASSERT_EQ(b.extent(1), 8);
-    ASSERT_EQ(c.extent(0), 7);
-    ASSERT_EQ(c.extent(1), 8);
-    ASSERT_EQ(d.extent(0), 7);
-    ASSERT_EQ(d.extent(1), 8);
+    ASSERT_EQ(b.extent(0), 7u);
+    ASSERT_EQ(b.extent(1), 8u);
+    ASSERT_EQ(c.extent(0), 7u);
+    ASSERT_EQ(c.extent(1), 8u);
+    ASSERT_EQ(d.extent(0), 7u);
+    ASSERT_EQ(d.extent(1), 8u);
   }
 
   {
@@ -1203,22 +1204,30 @@ struct TestViewMapOperator {
   }
 
   void run() {
-    ASSERT_EQ(v.extent(0),
-              (0 < ViewType::rank ? TestViewMapOperator<ViewType>::N0 : 1));
-    ASSERT_EQ(v.extent(1),
-              (1 < ViewType::rank ? TestViewMapOperator<ViewType>::N1 : 1));
-    ASSERT_EQ(v.extent(2),
-              (2 < ViewType::rank ? TestViewMapOperator<ViewType>::N2 : 1));
-    ASSERT_EQ(v.extent(3),
-              (3 < ViewType::rank ? TestViewMapOperator<ViewType>::N3 : 1));
-    ASSERT_EQ(v.extent(4),
-              (4 < ViewType::rank ? TestViewMapOperator<ViewType>::N4 : 1));
-    ASSERT_EQ(v.extent(5),
-              (5 < ViewType::rank ? TestViewMapOperator<ViewType>::N5 : 1));
-    ASSERT_EQ(v.extent(6),
-              (6 < ViewType::rank ? TestViewMapOperator<ViewType>::N6 : 1));
-    ASSERT_EQ(v.extent(7),
-              (7 < ViewType::rank ? TestViewMapOperator<ViewType>::N7 : 1));
+    ASSERT_EQ(
+        v.extent(0),
+        (size_t)(0 < ViewType::rank ? TestViewMapOperator<ViewType>::N0 : 1));
+    ASSERT_EQ(
+        v.extent(1),
+        (size_t)(1 < ViewType::rank ? TestViewMapOperator<ViewType>::N1 : 1));
+    ASSERT_EQ(
+        v.extent(2),
+        (size_t)(2 < ViewType::rank ? TestViewMapOperator<ViewType>::N2 : 1));
+    ASSERT_EQ(
+        v.extent(3),
+        (size_t)(3 < ViewType::rank ? TestViewMapOperator<ViewType>::N3 : 1));
+    ASSERT_EQ(
+        v.extent(4),
+        (size_t)(4 < ViewType::rank ? TestViewMapOperator<ViewType>::N4 : 1));
+    ASSERT_EQ(
+        v.extent(5),
+        (size_t)(5 < ViewType::rank ? TestViewMapOperator<ViewType>::N5 : 1));
+    ASSERT_EQ(
+        v.extent(6),
+        (size_t)(6 < ViewType::rank ? TestViewMapOperator<ViewType>::N6 : 1));
+    ASSERT_EQ(
+        v.extent(7),
+        (size_t)(7 < ViewType::rank ? TestViewMapOperator<ViewType>::N7 : 1));
 
     ASSERT_LE(v.extent(0) * v.extent(1) * v.extent(2) * v.extent(3) *
                   v.extent(4) * v.extent(5) * v.extent(6) * v.extent(7),
@@ -1321,8 +1330,8 @@ TEST(TEST_CATEGORY, view_mapping_operator) {
 
 TEST(TEST_CATEGORY, static_extent) {
   using T = Kokkos::View<double * [2][3]>;
-  ASSERT_EQ(T::static_extent(1), 2);
-  ASSERT_EQ(T::static_extent(2), 3);
+  ASSERT_EQ(T::static_extent(1), 2u);
+  ASSERT_EQ(T::static_extent(2), 3u);
 }
 
 }  // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestViewMapping_b.hpp b/packages/kokkos/core/unit_test/TestViewMapping_b.hpp
index 23035a303ad450de6f1d6f34abb8c3a33ca94d7b..3e6d91c0b5d245cb00274c4af95ef6539a52aeb7 100644
--- a/packages/kokkos/core/unit_test/TestViewMapping_b.hpp
+++ b/packages/kokkos/core/unit_test/TestViewMapping_b.hpp
@@ -141,25 +141,17 @@ struct MappingClassValueType {
   KOKKOS_INLINE_FUNCTION
   MappingClassValueType() {
 #if 0
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
-      printf( "TestViewMappingClassValue construct on Cuda\n" );
-#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-      printf( "TestViewMappingClassValue construct on Host\n" );
-#else
-      printf( "TestViewMappingClassValue construct unknown\n" );
-#endif
+    KOKKOS_IF_ON_DEVICE(
+        (printf("TestViewMappingClassValue construct on Device\n");))
+    KOKKOS_IF_ON_HOST((printf("TestViewMappingClassValue construct on Host\n");))
 #endif
   }
   KOKKOS_INLINE_FUNCTION
   ~MappingClassValueType() {
 #if 0
-#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
-      printf( "TestViewMappingClassValue destruct on Cuda\n" );
-#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-      printf( "TestViewMappingClassValue destruct on Host\n" );
-#else
-      printf( "TestViewMappingClassValue destruct unknown\n" );
-#endif
+    KOKKOS_IF_ON_DEVICE(
+        (printf("TestViewMappingClassValue destruct on Device\n");))
+    KOKKOS_IF_ON_HOST((printf("TestViewMappingClassValue destruct on Host\n");))
 #endif
   }
 };
diff --git a/packages/kokkos/core/unit_test/TestViewMapping_subview.hpp b/packages/kokkos/core/unit_test/TestViewMapping_subview.hpp
index 2a15a84380e7c6d979059a8342c64b9ee68d2eb9..e52362a054577d5c1b0e1f261b9cf04010cf7137 100644
--- a/packages/kokkos/core/unit_test/TestViewMapping_subview.hpp
+++ b/packages/kokkos/core/unit_test/TestViewMapping_subview.hpp
@@ -164,23 +164,23 @@ struct TestViewMappingSubview {
     TestViewMappingSubview<ExecSpace> self;
 
     ASSERT_EQ(Aa.extent(0), AN);
-    ASSERT_EQ(Ab.extent(0), AN - 2);
-    ASSERT_EQ(Ac.extent(0), AN - 2);
+    ASSERT_EQ(Ab.extent(0), (size_t)AN - 2);
+    ASSERT_EQ(Ac.extent(0), (size_t)AN - 2);
     ASSERT_EQ(Ba.extent(0), BN0);
     ASSERT_EQ(Ba.extent(1), BN1);
     ASSERT_EQ(Ba.extent(2), BN2);
-    ASSERT_EQ(Bb.extent(0), BN0 - 2);
-    ASSERT_EQ(Bb.extent(1), BN1 - 2);
-    ASSERT_EQ(Bb.extent(2), BN2 - 2);
+    ASSERT_EQ(Bb.extent(0), (size_t)BN0 - 2);
+    ASSERT_EQ(Bb.extent(1), (size_t)BN1 - 2);
+    ASSERT_EQ(Bb.extent(2), (size_t)BN2 - 2);
 
     ASSERT_EQ(Ca.extent(0), CN0);
     ASSERT_EQ(Ca.extent(1), CN1);
     ASSERT_EQ(Ca.extent(2), CN2);
-    ASSERT_EQ(Ca.extent(3), 13);
-    ASSERT_EQ(Ca.extent(4), 14);
-    ASSERT_EQ(Cb.extent(0), CN0 - 2);
-    ASSERT_EQ(Cb.extent(1), CN1 - 2);
-    ASSERT_EQ(Cb.extent(2), CN2 - 2);
+    ASSERT_EQ(Ca.extent(3), (size_t)13);
+    ASSERT_EQ(Ca.extent(4), (size_t)14);
+    ASSERT_EQ(Cb.extent(0), (size_t)CN0 - 2);
+    ASSERT_EQ(Cb.extent(1), (size_t)CN1 - 2);
+    ASSERT_EQ(Cb.extent(2), (size_t)CN2 - 2);
 
     ASSERT_EQ(Da.extent(0), DN0);
     ASSERT_EQ(Da.extent(1), DN1);
@@ -188,9 +188,9 @@ struct TestViewMappingSubview {
     ASSERT_EQ(Da.extent(3), DN3);
     ASSERT_EQ(Da.extent(4), DN4);
 
-    ASSERT_EQ(Db.extent(0), DN1 - 2);
-    ASSERT_EQ(Db.extent(1), DN2 - 2);
-    ASSERT_EQ(Db.extent(2), DN3 - 2);
+    ASSERT_EQ(Db.extent(0), (size_t)DN1 - 2);
+    ASSERT_EQ(Db.extent(1), (size_t)DN2 - 2);
+    ASSERT_EQ(Db.extent(2), (size_t)DN3 - 2);
 
     ASSERT_EQ(Da.stride_1(), Db.stride_0());
     ASSERT_EQ(Da.stride_2(), Db.stride_1());
diff --git a/packages/kokkos/core/unit_test/TestViewResize.hpp b/packages/kokkos/core/unit_test/TestViewResize.hpp
index 9a378e521163dcfe49d498dd51d157ddb389f33b..36214c2c6580f2aacab2f694c03b8acf07316f50 100644
--- a/packages/kokkos/core/unit_test/TestViewResize.hpp
+++ b/packages/kokkos/core/unit_test/TestViewResize.hpp
@@ -46,6 +46,7 @@
 
 #include <gtest/gtest.h>
 #include "TestResize.hpp"
+#include "TestRealloc.hpp"
 
 namespace Test {
 
@@ -54,5 +55,10 @@ TEST(TEST_CATEGORY, view_resize) {
   TestViewResize::testResize<ExecSpace>();
 }
 
+TEST(TEST_CATEGORY, view_realloc) {
+  using ExecSpace = TEST_EXECSPACE;
+  TestViewRealloc::testRealloc<ExecSpace>();
+}
+
 }  // namespace Test
 #endif  // TESTVIEWRESIZE_HPP_
diff --git a/packages/kokkos/core/unit_test/TestViewSubview.hpp b/packages/kokkos/core/unit_test/TestViewSubview.hpp
index 93eb5476b57be796f5e1fbcb8e9b3db140bb1615..43bbb4320ab117bcd1e16129bec9d1e952fa88a5 100644
--- a/packages/kokkos/core/unit_test/TestViewSubview.hpp
+++ b/packages/kokkos/core/unit_test/TestViewSubview.hpp
@@ -336,7 +336,7 @@ void test_left_0(bool constr) {
     make_subview(constr, x0, x_static_8, 0, 0, 0, 0, 0, 0, 0, 0);
 
     ASSERT_TRUE(x0.span_is_contiguous());
-    ASSERT_EQ(x0.span(), 1);
+    ASSERT_EQ(x0.span(), 1u);
     ASSERT_EQ(&x0(), &x_static_8(0, 0, 0, 0, 0, 0, 0, 0));
 
     Kokkos::View<int*, Kokkos::LayoutLeft, Space> x1;
@@ -344,7 +344,7 @@ void test_left_0(bool constr) {
                  0, 1, 2, 3);
 
     ASSERT_TRUE(x1.span_is_contiguous());
-    ASSERT_EQ(x1.span(), 2);
+    ASSERT_EQ(x1.span(), 2u);
     ASSERT_EQ(&x1(0), &x_static_8(0, 1, 2, 3, 0, 1, 2, 3));
     ASSERT_EQ(&x1(1), &x_static_8(1, 1, 2, 3, 0, 1, 2, 3));
 
@@ -353,7 +353,7 @@ void test_left_0(bool constr) {
                  3, 0, 1, 2, 3);
 
     ASSERT_TRUE(x_deg1.span_is_contiguous());
-    ASSERT_EQ(x_deg1.span(), 0);
+    ASSERT_EQ(x_deg1.span(), 0u);
     ASSERT_EQ(x_deg1.data(), &x_static_8(0, 1, 2, 3, 0, 1, 2, 3));
 
     Kokkos::View<int*, Kokkos::LayoutLeft, Space> x_deg2;
@@ -361,7 +361,7 @@ void test_left_0(bool constr) {
                  4, 1, 2, 3, 4);
 
     ASSERT_TRUE(x_deg2.span_is_contiguous());
-    ASSERT_EQ(x_deg2.span(), 0);
+    ASSERT_EQ(x_deg2.span(), 0u);
     ASSERT_EQ(x_deg2.data(), x_static_8.data() + x_static_8.span());
 
     Kokkos::View<int**, Kokkos::LayoutLeft, Space> x2;
@@ -444,14 +444,14 @@ void test_left_1(bool use_constr) {
                  0, 1, 2, 3);
 
     ASSERT_TRUE(x1_deg1.span_is_contiguous());
-    ASSERT_EQ(0, x1_deg1.span());
+    ASSERT_EQ(0u, x1_deg1.span());
     ASSERT_EQ(x1_deg1.data(), &x8(0, 1, 2, 3, 0, 1, 2, 3));
 
     Kokkos::View<int*, Kokkos::LayoutLeft, Space> x1_deg2;
     make_subview(use_constr, x1_deg2, x8, Kokkos::pair<int, int>(2, 2), 2, 3, 4,
                  1, 2, 3, 4);
 
-    ASSERT_EQ(0, x1_deg2.span());
+    ASSERT_EQ(0u, x1_deg2.span());
     ASSERT_TRUE(x1_deg2.span_is_contiguous());
     ASSERT_EQ(x1_deg2.data(), x8.data() + x8.span());
 
@@ -468,7 +468,7 @@ void test_left_1(bool use_constr) {
     Kokkos::View<int**, Kokkos::LayoutLeft, Space> x2_deg2;
     make_subview(use_constr, x2_deg2, x8, Kokkos::pair<int, int>(2, 2), 2, 3, 4,
                  1, 2, Kokkos::pair<int, int>(2, 3), 4);
-    ASSERT_EQ(0, x2_deg2.span());
+    ASSERT_EQ(0u, x2_deg2.span());
 
     // Kokkos::View< int**, Kokkos::LayoutLeft, Space > error_2 =
     Kokkos::View<int**, Kokkos::LayoutStride, Space> sx2;
@@ -484,7 +484,7 @@ void test_left_1(bool use_constr) {
     Kokkos::View<int**, Kokkos::LayoutStride, Space> sx2_deg;
     make_subview(use_constr, sx2, x8, 1, Kokkos::pair<int, int>(0, 0), 2, 3,
                  Kokkos::pair<int, int>(0, 2), 1, 2, 3);
-    ASSERT_EQ(0, sx2_deg.span());
+    ASSERT_EQ(0u, sx2_deg.span());
 
     Kokkos::View<int****, Kokkos::LayoutStride, Space> sx4;
     make_subview(use_constr, sx4, x8, 0,
@@ -625,14 +625,14 @@ void test_left_3() {
     Kokkos::View<int**, Kokkos::LayoutLeft, Space> x2_n1 =
         Kokkos::subview(xm, std::pair<int, int>(1, 1), Kokkos::ALL);
 
-    ASSERT_EQ(x2_n1.extent(0), 0);
+    ASSERT_EQ(x2_n1.extent(0), 0u);
     ASSERT_EQ(x2_n1.extent(1), xm.extent(1));
 
     Kokkos::View<int**, Kokkos::LayoutLeft, Space> x2_n2 =
         Kokkos::subview(xm, Kokkos::ALL, std::pair<int, int>(1, 1));
 
     ASSERT_EQ(x2_n2.extent(0), xm.extent(0));
-    ASSERT_EQ(x2_n2.extent(1), 0);
+    ASSERT_EQ(x2_n2.extent(1), 0u);
   }
 }
 
@@ -656,7 +656,7 @@ void test_right_0(bool use_constr) {
     make_subview(use_constr, x1, x_static_8, 0, 1, 2, 3, 0, 1, 2,
                  Kokkos::pair<int, int>(1, 3));
 
-    ASSERT_EQ(x1.extent(0), 2);
+    ASSERT_EQ(x1.extent(0), 2u);
     ASSERT_EQ(&x1(0), &x_static_8(0, 1, 2, 3, 0, 1, 2, 1));
     ASSERT_EQ(&x1(1), &x_static_8(0, 1, 2, 3, 0, 1, 2, 2));
 
@@ -665,8 +665,8 @@ void test_right_0(bool use_constr) {
                  Kokkos::pair<int, int>(1, 3), 0, 1, 2,
                  Kokkos::pair<int, int>(1, 3));
 
-    ASSERT_EQ(x2.extent(0), 2);
-    ASSERT_EQ(x2.extent(1), 2);
+    ASSERT_EQ(x2.extent(0), 2u);
+    ASSERT_EQ(x2.extent(1), 2u);
     ASSERT_EQ(&x2(0, 0), &x_static_8(0, 1, 2, 1, 0, 1, 2, 1));
     ASSERT_EQ(&x2(1, 0), &x_static_8(0, 1, 2, 2, 0, 1, 2, 1));
     ASSERT_EQ(&x2(0, 1), &x_static_8(0, 1, 2, 1, 0, 1, 2, 2));
@@ -677,8 +677,8 @@ void test_right_0(bool use_constr) {
     make_subview(use_constr, sx2, x_static_8, 1, Kokkos::pair<int, int>(0, 2),
                  2, 3, Kokkos::pair<int, int>(0, 2), 1, 2, 3);
 
-    ASSERT_EQ(sx2.extent(0), 2);
-    ASSERT_EQ(sx2.extent(1), 2);
+    ASSERT_EQ(sx2.extent(0), 2u);
+    ASSERT_EQ(sx2.extent(1), 2u);
     ASSERT_EQ(&sx2(0, 0), &x_static_8(1, 0, 2, 3, 0, 1, 2, 3));
     ASSERT_EQ(&sx2(1, 0), &x_static_8(1, 1, 2, 3, 0, 1, 2, 3));
     ASSERT_EQ(&sx2(0, 1), &x_static_8(1, 0, 2, 3, 1, 1, 2, 3));
@@ -695,10 +695,10 @@ void test_right_0(bool use_constr) {
                  2, Kokkos::pair<int, int>(2, 4) /* of [5] */
     );
 
-    ASSERT_EQ(sx4.extent(0), 2);
-    ASSERT_EQ(sx4.extent(1), 2);
-    ASSERT_EQ(sx4.extent(2), 2);
-    ASSERT_EQ(sx4.extent(3), 2);
+    ASSERT_EQ(sx4.extent(0), 2u);
+    ASSERT_EQ(sx4.extent(1), 2u);
+    ASSERT_EQ(sx4.extent(2), 2u);
+    ASSERT_EQ(sx4.extent(3), 2u);
     for (int i0 = 0; i0 < (int)sx4.extent(0); ++i0)
       for (int i1 = 0; i1 < (int)sx4.extent(1); ++i1)
         for (int i2 = 0; i2 < (int)sx4.extent(2); ++i2)
@@ -739,7 +739,7 @@ void test_right_1(bool use_constr) {
     Kokkos::View<int*, Kokkos::LayoutRight, Space> x1_deg1;
     make_subview(use_constr, x1_deg1, x8, 0, 1, 2, 3, 0, 1, 2,
                  Kokkos::pair<int, int>(3, 3));
-    ASSERT_EQ(0, x1_deg1.span());
+    ASSERT_EQ(0u, x1_deg1.span());
 
     Kokkos::View<int**, Kokkos::LayoutRight, Space> x2;
     make_subview(use_constr, x2, x8, 0, 1, 2, Kokkos::pair<int, int>(1, 3), 0,
@@ -753,7 +753,7 @@ void test_right_1(bool use_constr) {
     Kokkos::View<int**, Kokkos::LayoutRight, Space> x2_deg2;
     make_subview(use_constr, x2_deg2, x8, 0, 1, 2, Kokkos::pair<int, int>(1, 3),
                  0, 1, 2, Kokkos::pair<int, int>(3, 3));
-    ASSERT_EQ(0, x2_deg2.span());
+    ASSERT_EQ(0u, x2_deg2.span());
 
     // Kokkos::View< int**, Kokkos::LayoutRight, Space > error_2 =
     Kokkos::View<int**, Kokkos::LayoutStride, Space> sx2;
@@ -768,7 +768,7 @@ void test_right_1(bool use_constr) {
     Kokkos::View<int**, Kokkos::LayoutStride, Space> sx2_deg;
     make_subview(use_constr, sx2_deg, x8, 1, Kokkos::pair<int, int>(0, 2), 2, 3,
                  1, 1, 2, Kokkos::pair<int, int>(3, 3));
-    ASSERT_EQ(0, sx2_deg.span());
+    ASSERT_EQ(0u, sx2_deg.span());
 
     Kokkos::View<int****, Kokkos::LayoutStride, Space> sx4;
     make_subview(use_constr, sx4, x8, 0,
@@ -842,14 +842,14 @@ void test_right_3() {
     Kokkos::View<int**, Kokkos::LayoutRight, Space> x2_n1 =
         Kokkos::subview(xm, std::pair<int, int>(1, 1), Kokkos::ALL);
 
-    ASSERT_EQ(x2_n1.extent(0), 0);
+    ASSERT_EQ(x2_n1.extent(0), 0u);
     ASSERT_EQ(x2_n1.extent(1), xm.extent(1));
 
     Kokkos::View<int**, Kokkos::LayoutRight, Space> x2_n2 =
         Kokkos::subview(xm, Kokkos::ALL, std::pair<int, int>(1, 1));
 
     ASSERT_EQ(x2_n2.extent(0), xm.extent(0));
-    ASSERT_EQ(x2_n2.extent(1), 0);
+    ASSERT_EQ(x2_n2.extent(1), 0u);
   }
 }
 
diff --git a/packages/kokkos/core/unit_test/TestWithoutInitializing.hpp b/packages/kokkos/core/unit_test/TestWithoutInitializing.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..8a58888c7c96ba726fd828fe75b3532fe4cb36b7
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestWithoutInitializing.hpp
@@ -0,0 +1,98 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <Kokkos_Core.hpp>
+
+#include "tools/include/ToolTestingUtilities.hpp"
+
+TEST(TEST_CATEGORY, resize_realloc_no_init) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels());
+  Kokkos::View<int*** * [1][2][3][4], TEST_EXECSPACE> bla("bla", 5, 6, 7, 8);
+
+  auto success = validate_absence(
+      [&]() {
+        Kokkos::resize(Kokkos::WithoutInitializing, bla, 5, 6, 7, 9);
+        Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 8, 8, 8);
+      },
+      [&](BeginParallelForEvent event) {
+        if (event.descriptor().find("initialization") != std::string::npos)
+          return MatchDiagnostic{true, {"Found begin event"}};
+        return MatchDiagnostic{false};
+      },
+      [&](EndParallelForEvent event) {
+        if (event.descriptor().find("initialization") != std::string::npos)
+          return MatchDiagnostic{true, {"Found end event"}};
+        return MatchDiagnostic{false};
+      });
+  ASSERT_TRUE(success);
+  listen_tool_events(Config::DisableAll());
+}
+
+TEST(TEST_CATEGORY, resize_realloc_no_alloc) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels(),
+                     Config::EnableAllocs());
+  Kokkos::View<int*** * [1][2][3][4], TEST_EXECSPACE> bla("bla", 8, 7, 6, 5);
+
+  auto success = validate_absence(
+      [&]() {
+        Kokkos::resize(bla, 8, 7, 6, 5);
+        Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 7, 6, 5);
+      },
+      [&](BeginParallelForEvent) {
+        return MatchDiagnostic{true, {"Found begin event"}};
+      },
+      [&](EndParallelForEvent) {
+        return MatchDiagnostic{true, {"Found end event"}};
+      },
+      [&](AllocateDataEvent) {
+        return MatchDiagnostic{true, {"Found alloc event"}};
+      },
+      [&](DeallocateDataEvent) {
+        return MatchDiagnostic{true, {"Found dealloc event"}};
+      });
+  ASSERT_TRUE(success);
+  listen_tool_events(Config::DisableAll());
+}
diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_ReducerViewSizeLimit.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_ReducerViewSizeLimit.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3d7498b11ca528684d9347ae17310694270384c2
--- /dev/null
+++ b/packages/kokkos/core/unit_test/cuda/TestCuda_ReducerViewSizeLimit.cpp
@@ -0,0 +1,195 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <TestCuda_Category.hpp>
+#include <Kokkos_Core.hpp>
+
+namespace Test {
+
+using ValueType = double;
+using MemSpace  = Kokkos::CudaSpace;
+using Matrix2D  = Kokkos::View<ValueType**, MemSpace>;
+using Matrix3D  = Kokkos::View<ValueType***, MemSpace>;
+using Vector    = Kokkos::View<ValueType*, MemSpace>;
+
+namespace Impl {
+
+struct ArrayReduceFunctor {
+  using value_type = ValueType[];
+
+  int value_count;
+  Matrix2D m;
+
+  ArrayReduceFunctor(const Matrix2D& m_) : value_count(m_.extent(1)), m(m_) {}
+
+  KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type sum) const {
+    const int numVecs = value_count;
+    for (int j = 0; j < numVecs; ++j) {
+      sum[j] += m(i, j);
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION void init(value_type update) const {
+    const int numVecs = value_count;
+    for (int j = 0; j < numVecs; ++j) {
+      update[j] = 0.0;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION void join(volatile value_type update,
+                                   const volatile value_type source) const {
+    const int numVecs = value_count;
+    for (int j = 0; j < numVecs; ++j) {
+      update[j] += source[j];
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION void join(value_type update,
+                                   const value_type source) const {
+    const int numVecs = value_count;
+    for (int j = 0; j < numVecs; ++j) {
+      update[j] += source[j];
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION void final(value_type) const {}
+};
+
+struct MDArrayReduceFunctor {
+  using value_type = ValueType[];
+
+  int value_count;
+  Matrix3D m;
+
+  MDArrayReduceFunctor(const Matrix3D& m_) : value_count(m_.extent(2)), m(m_) {}
+
+  KOKKOS_INLINE_FUNCTION void operator()(const int i, const int j,
+                                         value_type sum) const {
+    const int numVecs = value_count;
+    for (int k = 0; k < numVecs; ++k) {
+      sum[k] += m(i, j, k);
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION void init(value_type update) const {
+    const int numVecs = value_count;
+    for (int j = 0; j < numVecs; ++j) {
+      update[j] = 0.0;
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION void final(value_type) const {}
+};
+
+struct ReduceViewSizeLimitTester {
+  const ValueType initValue           = 3;
+  const size_t nGlobalEntries         = 100;
+  const int testViewSize              = 200;
+  const size_t expectedInitShmemLimit = 373584;
+  const unsigned initBlockSize        = Kokkos::Impl::CudaTraits::WarpSize * 8;
+
+  void run_test_range() {
+    Matrix2D matrix;
+    Vector sum;
+
+    for (int i = 0; i < testViewSize; ++i) {
+      size_t sumInitShmemSize = (initBlockSize + 2) * sizeof(ValueType) * i;
+
+      Kokkos::resize(Kokkos::WithoutInitializing, sum, i);
+      Kokkos::resize(Kokkos::WithoutInitializing, matrix, nGlobalEntries, i);
+      Kokkos::deep_copy(matrix, initValue);
+
+      auto policy  = Kokkos::RangePolicy<TEST_EXECSPACE>(0, nGlobalEntries);
+      auto functor = ArrayReduceFunctor(matrix);
+
+      if (sumInitShmemSize < expectedInitShmemLimit) {
+        EXPECT_NO_THROW(Kokkos::parallel_reduce(policy, functor, sum));
+      } else {
+        EXPECT_THROW(Kokkos::parallel_reduce(policy, functor, sum),
+                     std::runtime_error);
+      }
+    }
+  }
+
+  void run_test_md_range_2D() {
+    Matrix3D matrix;
+    Vector sum;
+
+    for (int i = 0; i < testViewSize; ++i) {
+      size_t sumInitShmemSize = (initBlockSize + 2) * sizeof(ValueType) * i;
+
+      Kokkos::resize(Kokkos::WithoutInitializing, sum, i);
+      Kokkos::resize(Kokkos::WithoutInitializing, matrix, nGlobalEntries,
+                     nGlobalEntries, i);
+      Kokkos::deep_copy(matrix, initValue);
+
+      auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>(
+          {0, 0}, {nGlobalEntries, nGlobalEntries});
+      auto functor = MDArrayReduceFunctor(matrix);
+
+      if (sumInitShmemSize < expectedInitShmemLimit) {
+        EXPECT_NO_THROW(Kokkos::parallel_reduce(policy, functor, sum));
+      } else {
+        EXPECT_THROW(Kokkos::parallel_reduce(policy, functor, sum),
+                     std::runtime_error);
+      }
+    }
+  }
+};
+
+}  // namespace Impl
+
+TEST(cuda, reduceRangePolicyViewSizeLimit) {
+  Impl::ReduceViewSizeLimitTester reduceViewSizeLimitTester;
+
+  reduceViewSizeLimitTester.run_test_range();
+}
+
+TEST(cuda, reduceMDRangePolicyViewSizeLimit) {
+  Impl::ReduceViewSizeLimitTester reduceViewSizeLimitTester;
+
+  reduceViewSizeLimitTester.run_test_md_range_2D();
+}
+
+}  // namespace Test
diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeViewAPI.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeViewAPI.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..46556a20141739130505e96dbb48d248d4f9289f
--- /dev/null
+++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeViewAPI.cpp
@@ -0,0 +1,152 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+
+#include <Kokkos_Core.hpp>
+
+#include <TestDefaultDeviceType_Category.hpp>
+
+template <size_t... Ds>
+using _sizes = std::integer_sequence<size_t, Ds...>;
+
+template <class>
+struct TestViewAPI;
+template <class DataType, class Layout, size_t... DynamicSizes,
+          size_t... AllSizes>
+struct TestViewAPI<
+    std::tuple<DataType, Layout, std::integer_sequence<size_t, DynamicSizes...>,
+               std::integer_sequence<size_t, AllSizes...>>>
+    : public ::testing::Test {
+  using data_type   = DataType;
+  using layout_type = Layout;
+  using space_type  = Kokkos::DefaultExecutionSpace;
+  using traits_type =
+      Kokkos::MemoryTraits<0>;  // maybe we want to add that later to the matrix
+  using view_type =
+      Kokkos::View<data_type, layout_type, space_type, traits_type>;
+  using alloc_layout_type = typename std::conditional<
+      std::is_same<layout_type, Kokkos::LayoutStride>::value,
+      Kokkos::LayoutLeft, layout_type>::type;
+  using d_alloc_type = Kokkos::View<data_type, alloc_layout_type, space_type>;
+  using h_alloc_type = typename Kokkos::View<data_type, alloc_layout_type,
+                                             space_type>::HostMirror;
+
+  // add a +1 to avoid zero length static array
+  size_t dyn_sizes[sizeof...(DynamicSizes) + 1] = {DynamicSizes..., 1};
+  size_t all_sizes[sizeof...(AllSizes) + 1]     = {AllSizes..., 1};
+
+  constexpr static size_t expected_rank = sizeof...(AllSizes);
+
+  inline view_type create_view() const {
+    return d_alloc_type("TestViewAPI", DynamicSizes...);
+  }
+};
+
+using Kokkos::LayoutLeft;
+using Kokkos::LayoutRight;
+using Kokkos::LayoutStride;
+
+using compatible_extents_test_types = ::testing::Types<
+    // LayoutLeft
+    std::tuple<int, LayoutLeft, _sizes<>, _sizes<>>,
+    std::tuple<int[5], LayoutLeft, _sizes<>, _sizes<5>>,
+    std::tuple<int*, LayoutLeft, _sizes<5>, _sizes<5>>,
+    std::tuple<int[5][10], LayoutLeft, _sizes<>, _sizes<5, 10>>,
+    std::tuple<int * [10], LayoutLeft, _sizes<5>, _sizes<5, 10>>,
+    std::tuple<int**, LayoutLeft, _sizes<5, 10>, _sizes<5, 10>>,
+    std::tuple<int[5][10][15], LayoutLeft, _sizes<>, _sizes<5, 10, 15>>,
+    std::tuple<int * [10][15], LayoutLeft, _sizes<5>, _sizes<5, 10, 15>>,
+    std::tuple<int* * [15], LayoutLeft, _sizes<5, 10>, _sizes<5, 10, 15>>,
+    std::tuple<int***, LayoutLeft, _sizes<5, 10, 15>, _sizes<5, 10, 15>>,
+    // LayoutRight
+    std::tuple<int, LayoutRight, _sizes<>, _sizes<>>,
+    std::tuple<int[5], LayoutRight, _sizes<>, _sizes<5>>,
+    std::tuple<int*, LayoutRight, _sizes<5>, _sizes<5>>,
+    std::tuple<int[5][10], LayoutRight, _sizes<>, _sizes<5, 10>>,
+    std::tuple<int * [10], LayoutRight, _sizes<5>, _sizes<5, 10>>,
+    std::tuple<int**, LayoutRight, _sizes<5, 10>, _sizes<5, 10>>,
+    std::tuple<int[5][10][15], LayoutRight, _sizes<>, _sizes<5, 10, 15>>,
+    std::tuple<int * [10][15], LayoutRight, _sizes<5>, _sizes<5, 10, 15>>,
+    std::tuple<int* * [15], LayoutRight, _sizes<5, 10>, _sizes<5, 10, 15>>,
+    std::tuple<int***, LayoutRight, _sizes<5, 10, 15>, _sizes<5, 10, 15>>,
+    // LayoutStride
+    std::tuple<int, LayoutStride, _sizes<>, _sizes<>>,
+    std::tuple<int[5], LayoutStride, _sizes<>, _sizes<5>>,
+    std::tuple<int*, LayoutStride, _sizes<5>, _sizes<5>>,
+    std::tuple<int[5][10], LayoutStride, _sizes<>, _sizes<5, 10>>,
+    std::tuple<int * [10], LayoutStride, _sizes<5>, _sizes<5, 10>>,
+    std::tuple<int**, LayoutStride, _sizes<5, 10>, _sizes<5, 10>>,
+    std::tuple<int[5][10][15], LayoutStride, _sizes<>, _sizes<5, 10, 15>>,
+    std::tuple<int * [10][15], LayoutStride, _sizes<5>, _sizes<5, 10, 15>>,
+    std::tuple<int* * [15], LayoutStride, _sizes<5, 10>, _sizes<5, 10, 15>>,
+    std::tuple<int***, LayoutStride, _sizes<5, 10, 15>, _sizes<5, 10, 15>>,
+    // Degenerated Sizes
+    std::tuple<int*, LayoutLeft, _sizes<0>, _sizes<0>>,
+    std::tuple<int * [10], LayoutLeft, _sizes<0>, _sizes<0, 10>>,
+    std::tuple<int* * [15], LayoutLeft, _sizes<0, 0>, _sizes<0, 0, 15>>,
+    std::tuple<int*, LayoutRight, _sizes<0>, _sizes<0>>,
+    std::tuple<int * [10], LayoutRight, _sizes<0>, _sizes<0, 10>>,
+    std::tuple<int* * [15], LayoutRight, _sizes<0, 0>, _sizes<0, 0, 15>>,
+    std::tuple<int*, LayoutStride, _sizes<0>, _sizes<0>>,
+    std::tuple<int * [10], LayoutStride, _sizes<0>, _sizes<0, 10>>,
+    std::tuple<int* * [15], LayoutStride, _sizes<0, 0>, _sizes<0, 0, 15>>>;
+
+TYPED_TEST_SUITE(TestViewAPI, compatible_extents_test_types, );
+
+TYPED_TEST(TestViewAPI, sizes) {
+  using view_t = typename TestFixture::view_type;
+  auto a       = this->create_view();
+  static_assert(view_t::rank == TestFixture::expected_rank,
+                "TestViewAPI: Error: rank mismatch");
+  size_t expected_span = 1;
+  for (int r = 0; r < view_t::rank; r++) expected_span *= this->all_sizes[r];
+
+  EXPECT_EQ(expected_span, a.span());
+  for (int r = 0; r < view_t::rank; r++) {
+    EXPECT_EQ(this->all_sizes[r], a.extent(r));
+    EXPECT_EQ(this->all_sizes[r], size_t(a.extent_int(r)));
+  }
+}
diff --git a/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt b/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt
index 20b295650a610a601d73e88b2b116e5dda34c324..5a0c589ac7556d6b6a9048fc6499a60f72634f9f 100644
--- a/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt
+++ b/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt
@@ -13,6 +13,7 @@ file(GLOB KOKKOS_ALGORITHMS_HEADERS RELATIVE  ${BASE_DIR}/algorithms/src
 foreach (_header ${KOKKOS_CORE_HEADERS} ${KOKKOS_CONTAINERS_HEADERS} ${KOKKOS_ALGORITHMS_HEADERS})
   string(REGEX REPLACE "[\./]" "_" header_test_name ${_header})
   set(header_test_name Kokkos_HeaderSelfContained_${header_test_name})
+  set_source_files_properties(tstHeader.cpp PROPERTIES LANGUAGE ${KOKKOS_COMPILE_LANGUAGE})
   add_executable(${header_test_name} tstHeader.cpp)
   target_link_libraries(${header_test_name} PRIVATE Kokkos::kokkos)
   target_compile_definitions(${header_test_name} PRIVATE KOKKOS_HEADER_TEST_NAME=${_header})
diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp
index 8e89d6d6a5da981b33eea9349ae3ace63ec3f684..722614464b2e05ea86d0fbaccd8c18a4acdf8645 100644
--- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp
+++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp
@@ -46,7 +46,7 @@
 #include <TestHPX_Category.hpp>
 
 #include <hpx/config.hpp>
-#include <hpx/include/lcos.hpp>
+#include <hpx/local/future.hpp>
 
 #ifdef KOKKOS_ENABLE_HPX_ASYNC_DISPATCH
 #ifndef HPX_COMPUTE_DEVICE_CODE
@@ -153,7 +153,7 @@ TEST(hpx, independent_instances) {
     // future<void>>> (return type of when_all) into a future<void> which is
     // ready whenever the un-collapsed future would've been ready. HPX does not
     // currently have the functionality to collapse this automatically.
-    Kokkos::Experimental::HPX hpx4(hpx::util::get<0>(hpx::split_future(
+    Kokkos::Experimental::HPX hpx4(hpx::get<0>(hpx::split_future(
         hpx::when_all(hpx2.impl_get_future(), hpx3.impl_get_future()))));
     Kokkos::parallel_for(
         "Test::hpx::independent_instances::pointwise_sum",
diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp
index 0cedc068e594e70d750c9b515c4e08cbe527a1f4..ae0d8b5ab2e6c5c9034fdef228e56fa6e39e1fa1 100644
--- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp
+++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp
@@ -45,7 +45,7 @@
 #include <Kokkos_Core.hpp>
 #include <TestHPX_Category.hpp>
 
-#include <hpx/include/lcos.hpp>
+#include <hpx/local/future.hpp>
 
 #ifdef KOKKOS_ENABLE_HPX_ASYNC_DISPATCH
 
diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp
index de4cb01a7835d8b4e3d29920ed572edeeb9ef3fb..300cb111111ee242989733241d00425f9da1d0a4 100644
--- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp
+++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp
@@ -45,6 +45,8 @@
 #include <Kokkos_Core.hpp>
 #include <TestHPX_Category.hpp>
 
+#include <hpx/local/future.hpp>
+
 #ifdef KOKKOS_ENABLE_HPX_ASYNC_DISPATCH
 
 namespace Test {
@@ -54,35 +56,49 @@ TEST(hpx, instance_ids) {
   Kokkos::initialize(arguments);
 
   {
-    Kokkos::Experimental::HPX hpx_global1;
-    Kokkos::Experimental::HPX hpx_global2 = hpx_global1;
-    Kokkos::Experimental::HPX hpx_global3{hpx_global1};
-    Kokkos::Experimental::HPX hpx_global4(
-        Kokkos::Experimental::HPX::instance_mode::global);
+    Kokkos::Experimental::HPX hpx_default1;
+    Kokkos::Experimental::HPX hpx_default2 = hpx_default1;
+    Kokkos::Experimental::HPX hpx_default3{hpx_default1};
+    Kokkos::Experimental::HPX hpx_default4(
+        Kokkos::Experimental::HPX::instance_mode::default_);
+    Kokkos::Experimental::HPX hpx_default5;
+    hpx_default5 = hpx_default1;
 
-    ASSERT_EQ(0, hpx_global1.impl_instance_id());
-    ASSERT_EQ(0, hpx_global2.impl_instance_id());
-    ASSERT_EQ(0, hpx_global3.impl_instance_id());
-    ASSERT_EQ(0, hpx_global4.impl_instance_id());
+    ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(),
+              hpx_default1.impl_instance_id());
+    ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(),
+              hpx_default2.impl_instance_id());
+    ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(),
+              hpx_default3.impl_instance_id());
+    ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(),
+              hpx_default4.impl_instance_id());
+    ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(),
+              hpx_default5.impl_instance_id());
 
     Kokkos::Experimental::HPX hpx_independent1(
         Kokkos::Experimental::HPX::instance_mode::independent);
     Kokkos::Experimental::HPX hpx_independent2 = hpx_independent1;
     Kokkos::Experimental::HPX hpx_independent3{hpx_independent1};
+    Kokkos::Experimental::HPX hpx_independent4;
+    hpx_independent4 = hpx_independent1;
 
-    ASSERT_NE(hpx_global1.impl_instance_id(),
+    ASSERT_NE(hpx_default1.impl_instance_id(),
               hpx_independent1.impl_instance_id());
     ASSERT_EQ(hpx_independent1.impl_instance_id(),
               hpx_independent2.impl_instance_id());
     ASSERT_EQ(hpx_independent1.impl_instance_id(),
               hpx_independent3.impl_instance_id());
+    ASSERT_EQ(hpx_independent1.impl_instance_id(),
+              hpx_independent4.impl_instance_id());
 
     hpx::shared_future<void> f = hpx::make_ready_future<void>();
     Kokkos::Experimental::HPX hpx_independent_future1(f);
     Kokkos::Experimental::HPX hpx_independent_future2 = hpx_independent_future1;
     Kokkos::Experimental::HPX hpx_independent_future3{hpx_independent_future1};
+    Kokkos::Experimental::HPX hpx_independent_future4;
+    hpx_independent_future4 = hpx_independent_future1;
 
-    ASSERT_NE(hpx_global1.impl_instance_id(),
+    ASSERT_NE(hpx_default1.impl_instance_id(),
               hpx_independent1.impl_instance_id());
     ASSERT_NE(hpx_independent1.impl_instance_id(),
               hpx_independent_future1.impl_instance_id());
@@ -90,6 +106,8 @@ TEST(hpx, instance_ids) {
               hpx_independent_future2.impl_instance_id());
     ASSERT_EQ(hpx_independent_future1.impl_instance_id(),
               hpx_independent_future3.impl_instance_id());
+    ASSERT_EQ(hpx_independent_future1.impl_instance_id(),
+              hpx_independent_future4.impl_instance_id());
   }
 
   Kokkos::finalize();
diff --git a/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp b/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp
index 4d5ca46ba6ee6a41c8a9461bc5c26b36e5996a55..f157af4f2ca14212b5e3f8a88ff7b676cd0d1c62 100644
--- a/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp
+++ b/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp
@@ -104,7 +104,7 @@ TEST(TEST_CATEGORY, IncrTest_01_execspace_typedef) {
 }
 
 TEST(TEST_CATEGORY, IncrTest_01_execspace) {
-  ASSERT_TRUE(Kokkos::is_execution_space<TEST_EXECSPACE>::value);
+  ASSERT_FALSE(!Kokkos::is_execution_space<TEST_EXECSPACE>::value);
   ASSERT_FALSE(Kokkos::is_execution_space<
                TestIncrExecSpaceTypedef<TEST_EXECSPACE>>::value);
   TestIncrExecSpace<TEST_EXECSPACE> test;
diff --git a/packages/kokkos/core/unit_test/incremental/Test04_ParallelFor_RangePolicy.hpp b/packages/kokkos/core/unit_test/incremental/Test04_ParallelFor_RangePolicy.hpp
index 85eef21df3db0ba4eeaf59db4f8db187574592e7..ee9cdc3174d6b2f804d937b41f4956d8421b7975 100644
--- a/packages/kokkos/core/unit_test/incremental/Test04_ParallelFor_RangePolicy.hpp
+++ b/packages/kokkos/core/unit_test/incremental/Test04_ParallelFor_RangePolicy.hpp
@@ -110,6 +110,7 @@ struct TestParallel_For {
     // Copy the data back to Host memory space
     Kokkos::Impl::DeepCopy<h_memspace_type, d_memspace_type>(
         hostData, deviceData, num_elements * sizeof(value_type));
+    Kokkos::fence("Fence after copying data to host memory space");
 
     // Check if all data has been update correctly
     correctness_check(hostData);
diff --git a/packages/kokkos/core/unit_test/incremental/Test06_ParallelFor_MDRangePolicy.hpp b/packages/kokkos/core/unit_test/incremental/Test06_ParallelFor_MDRangePolicy.hpp
index 4192d4abe865f10a43e9a87ed6ee4aa877974dc0..2fed01dd66a2d42f1ade6f86b0e98dc34d43f682 100644
--- a/packages/kokkos/core/unit_test/incremental/Test06_ParallelFor_MDRangePolicy.hpp
+++ b/packages/kokkos/core/unit_test/incremental/Test06_ParallelFor_MDRangePolicy.hpp
@@ -170,6 +170,7 @@ struct TestMDRangePolicy {
     // Copy the data back to Host memory space
     Kokkos::Impl::DeepCopy<h_memspace_type, d_memspace_type>(
         hostData, deviceData, num_elements * sizeof(value_type));
+    Kokkos::fence("Fence after copying data to host");
 
     // Check if all data has been update correctly
     compare_equal_2D();
@@ -201,6 +202,7 @@ struct TestMDRangePolicy {
     // Copy the data back to Host memory space
     Kokkos::Impl::DeepCopy<h_memspace_type, d_memspace_type>(
         hostData, deviceData, num_elements * sizeof(value_type));
+    Kokkos::fence("Fence after copying data to host");
 
     // Check if all data has been update correctly
     compare_equal_3D();
@@ -232,6 +234,7 @@ struct TestMDRangePolicy {
     // Copy the data back to Host memory space
     Kokkos::Impl::DeepCopy<h_memspace_type, d_memspace_type>(
         hostData, deviceData, num_elements * sizeof(value_type));
+    Kokkos::fence("Fence after copying data to host");
 
     // Check if all data has been update correctly
     compare_equal_4D();
diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_PartitionMaster.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_PartitionMaster.cpp
index 902150da5806d27768603ac71207ce2aaef5551f..2978c98b50d4b0950364da8c1481e2450ef401ab 100644
--- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_PartitionMaster.cpp
+++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_PartitionMaster.cpp
@@ -50,6 +50,7 @@
 
 namespace Test {
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
 TEST(openmp, partition_master) {
   using Mutex = Kokkos::Experimental::MasterLock<Kokkos::OpenMP>;
 
@@ -128,5 +129,6 @@ TEST(openmp, partition_master) {
   Kokkos::OpenMP::partition_master(master, 8, 8);
   ASSERT_EQ(errors, 0);
 }
+#endif
 
 }  // namespace Test
diff --git a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp
index f81b7073392cc192318187e2ac31aa632f428489..40a88a6ca407387bfa0a3b3f23296a3eb2cad5f5 100644
--- a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp
+++ b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp
@@ -49,7 +49,10 @@ namespace Test {
 // Test Interoperability with SYCL Streams
 TEST(sycl, raw_sycl_queues) {
   sycl::default_selector device_selector;
-  sycl::queue queue(device_selector);
+  // FIXME_SYCL using an in-order queue here should not be necessary since we
+  // are using submit_barrier for managing kernel dependencies but this seems to
+  // be required as a hot fix for now.
+  sycl::queue queue(device_selector, sycl::property::queue::in_order());
   Kokkos::InitArguments arguments{-1, -1, -1, false};
   Kokkos::initialize(arguments);
   int* p            = sycl::malloc_device<int>(100, queue);
diff --git a/packages/kokkos/core/unit_test/tools/TestBuiltinTuners.cpp b/packages/kokkos/core/unit_test/tools/TestBuiltinTuners.cpp
index 870621c1e0d3e4530573fc70ee24208b3b5a7911..fbcc6541db745b1b29f29760be2ecec7006f7100 100644
--- a/packages/kokkos/core/unit_test/tools/TestBuiltinTuners.cpp
+++ b/packages/kokkos/core/unit_test/tools/TestBuiltinTuners.cpp
@@ -42,6 +42,7 @@
 //@HEADER
 */
 #include <Kokkos_Core.hpp>
+#include <impl/Kokkos_Tools_Generic.hpp>
 using ExecSpace  = Kokkos::DefaultHostExecutionSpace;
 using TeamMember = Kokkos::TeamPolicy<ExecSpace>::member_type;
 struct TestTeamFunctor {
@@ -57,11 +58,11 @@ int main(int argc, char* argv[]) {
     Kokkos::MDRangePolicy<Kokkos::Rank<2>> mdp({0, 0}, {1, 1});
     Kokkos::Tools::Experimental::TeamSizeTuner team_tune_this(
         "team_tuner", teamp, TestTeamFunctor{}, Kokkos::ParallelForTag{},
-        Kokkos::Tools::Impl::Impl::SimpleTeamSizeCalculator{});
+        Kokkos::Tools::Experimental::Impl::Impl::SimpleTeamSizeCalculator{});
 
     Kokkos::Tools::Experimental::MDRangeTuner<2> md_tune_this(
         "md_tuner", mdp, TestMDFunctor{}, Kokkos::ParallelForTag{},
-        Kokkos::Tools::Impl::Impl::SimpleTeamSizeCalculator{});
+        Kokkos::Tools::Experimental::Impl::Impl::SimpleTeamSizeCalculator{});
 
     std::vector<int> options{1, 2, 3, 4, 5};
 
diff --git a/packages/kokkos/core/unit_test/tools/TestCInterface.c b/packages/kokkos/core/unit_test/tools/TestCInterface.c
index 66e68154e99eb81d963988e038ca1bfa8d48ad1a..9bedcea3750728e52da7fa45b9a5f0574d464d5f 100644
--- a/packages/kokkos/core/unit_test/tools/TestCInterface.c
+++ b/packages/kokkos/core/unit_test/tools/TestCInterface.c
@@ -1,2 +1,2 @@
 #include <impl/Kokkos_Profiling_C_Interface.h>
-int main(){}
+int main() {}
diff --git a/packages/kokkos/core/unit_test/tools/TestEventCorrectness.cpp b/packages/kokkos/core/unit_test/tools/TestEventCorrectness.cpp
index ac0b4d26196351c6654c9b7996931784e4fa2653..2d73aa2e9c3a5bd3599c6c21cfc49dcd4010c959 100644
--- a/packages/kokkos/core/unit_test/tools/TestEventCorrectness.cpp
+++ b/packages/kokkos/core/unit_test/tools/TestEventCorrectness.cpp
@@ -46,4 +46,3 @@
 #include "Kokkos_Core.hpp"
 
 #include <tools/TestEventCorrectness.hpp>
-#include "../UnitTestMainInit.cpp"
diff --git a/packages/kokkos/core/unit_test/tools/TestEventCorrectness.hpp b/packages/kokkos/core/unit_test/tools/TestEventCorrectness.hpp
index 430677a335df32737a08520467cd26513f2e83e7..08863232ed679c5e70e87a69d02971825427ee07 100644
--- a/packages/kokkos/core/unit_test/tools/TestEventCorrectness.hpp
+++ b/packages/kokkos/core/unit_test/tools/TestEventCorrectness.hpp
@@ -48,6 +48,8 @@
 #include <impl/Kokkos_Stacktrace.hpp>
 #include <vector>
 #include <algorithm>
+#include "Kokkos_Core_fwd.hpp"
+#include "include/ToolTestingUtilities.hpp"
 namespace Kokkos {
 class Serial;
 class OpenMP;
@@ -118,9 +120,19 @@ struct increment {
   constexpr static const int size = 0;
 };
 int num_instances = 1;
+using index_type  = Kokkos::RangePolicy<>::index_type;
 struct TestFunctor {
-  KOKKOS_FUNCTION void operator()(const int) const {}
+  KOKKOS_FUNCTION void operator()(const index_type) const {}
 };
+struct TestReduceFunctor {
+  using value_type = int;
+  KOKKOS_FUNCTION void operator()(const index_type, value_type&) const {}
+};
+struct TestScanFunctor {
+  using value_type = int;
+  KOKKOS_FUNCTION void operator()(const index_type, value_type&, bool) const {}
+};
+
 template <typename Lambda>
 void test_wrapper(const Lambda& lambda) {
   if (!std::is_same<Kokkos::DefaultExecutionSpace, Kokkos::Serial>::value) {
@@ -131,7 +143,7 @@ void test_wrapper(const Lambda& lambda) {
  * Test that fencing an instance with a name yields a fence
  * event of that name, and the correct device ID
  */
-TEST(defaultdevicetype, test_named_instance_fence) {
+TEST(kokkosp, test_named_instance_fence) {
   test_wrapper([&]() {
     auto root = Kokkos::Tools::Experimental::device_id_root<
         Kokkos::DefaultExecutionSpace>();
@@ -150,7 +162,7 @@ TEST(defaultdevicetype, test_named_instance_fence) {
  * Test that fencing an instance without a name yields a fence
  * event of a correct name, and the correct device ID
  */
-TEST(defaultdevicetype, test_unnamed_instance_fence) {
+TEST(kokkosp, test_unnamed_instance_fence) {
   test_wrapper([&]() {
     auto root = Kokkos::Tools::Experimental::device_id_root<
         Kokkos::DefaultExecutionSpace>();
@@ -170,7 +182,7 @@ TEST(defaultdevicetype, test_unnamed_instance_fence) {
  * Test that invoking a global fence with a name yields a fence
  * event of a correct name, and fences the root of the default device
  */
-TEST(defaultdevicetype, test_named_global_fence) {
+TEST(kokkosp, test_named_global_fence) {
   test_wrapper([&]() {
     auto root = Kokkos::Tools::Experimental::device_id_root<
         Kokkos::DefaultExecutionSpace>();
@@ -187,7 +199,7 @@ TEST(defaultdevicetype, test_named_global_fence) {
  * Test that invoking a global fence with no name yields a fence
  * event of a correct name, and fences the root of the default device
  */
-TEST(defaultdevicetype, test_unnamed_global_fence) {
+TEST(kokkosp, test_unnamed_global_fence) {
   test_wrapper([&]() {
     auto root = Kokkos::Tools::Experimental::device_id_root<
         Kokkos::DefaultExecutionSpace>();
@@ -204,7 +216,7 @@ TEST(defaultdevicetype, test_unnamed_global_fence) {
  * Test that creating two default instances and fencing both yields
  * fence on the same device ID, as these should yield the same instance
  */
-TEST(defaultdevicetype, test_multiple_default_instances) {
+TEST(kokkosp, test_multiple_default_instances) {
   test_wrapper([&]() {
     std::vector<FencePayload> expected{};
     expect_fence_events(expected, [=]() {
@@ -217,10 +229,29 @@ TEST(defaultdevicetype, test_multiple_default_instances) {
   });
 }
 
+/**
+ * Test that device_id() and identifier_from_devid(id) are reciprocal
+ * operations
+ */
+TEST(kokkosp, test_id_gen) {
+  using namespace Kokkos::Tools::Experimental;
+  using Kokkos::Tools::Experimental::DeviceTypeTraits;
+  test_wrapper([&]() {
+    Kokkos::DefaultExecutionSpace ex;
+    auto id      = device_id(ex);
+    auto id_ref  = identifier_from_devid(id);
+    auto success = (id_ref.instance_id == ex.impl_instance_id()) &&
+                   (id_ref.device_id ==
+                    static_cast<uint32_t>(
+                        DeviceTypeTraits<Kokkos::DefaultExecutionSpace>::id));
+    ASSERT_TRUE(success);
+  });
+}
+
 /**
  * Test that fencing and kernels yield events on the correct device ID's
  */
-TEST(defaultdevicetype, test_kernel_sequence) {
+TEST(kokkosp, test_kernel_sequence) {
   test_wrapper([&]() {
     auto root = Kokkos::Tools::Experimental::device_id_root<
         Kokkos::DefaultExecutionSpace>();
@@ -248,7 +279,7 @@ TEST(defaultdevicetype, test_kernel_sequence) {
  * CUDA ONLY: test that creating instances from streams leads to events
  * on different device ID's
  */
-TEST(defaultdevicetype, test_streams) {
+TEST(kokkosp, test_streams) {
   test_wrapper([&]() {
     // auto root = Kokkos::Tools::Experimental::device_id_root<
     //    Kokkos::DefaultExecutionSpace>();
@@ -268,17 +299,366 @@ TEST(defaultdevicetype, test_streams) {
     found_payloads.erase(
         std::remove_if(found_payloads.begin(), found_payloads.end(),
                        [&](const auto& entry) {
-                         return (
-                             entry.name.find("Fence on space initialization") !=
-                             std::string::npos);
+                         return (entry.name.find("Unnamed Instance Fence") ==
+                                 std::string::npos);
                        }),
         found_payloads.end());
-    ASSERT_TRUE(found_payloads[0].dev_id != found_payloads[1].dev_id);
-    ASSERT_TRUE(found_payloads[2].dev_id != found_payloads[1].dev_id);
-    ASSERT_TRUE(found_payloads[2].dev_id != found_payloads[0].dev_id);
+    ASSERT_NE(found_payloads[0].dev_id, found_payloads[1].dev_id);
+    ASSERT_NE(found_payloads[2].dev_id, found_payloads[1].dev_id);
+    ASSERT_NE(found_payloads[2].dev_id, found_payloads[0].dev_id);
   });
 }
 
 #endif
+/** FIXME: OpenMPTarget currently has unexpected fences */
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+TEST(kokkosp, async_deep_copy) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableFences());
+  Kokkos::View<float*> left("left", 5), right("right", 5);
+
+  auto success = validate_absence(
+      [&]() {
+        Kokkos::deep_copy(Kokkos::DefaultExecutionSpace(), left, right);
+      },
+      [&](BeginFenceEvent begin) {
+        if (begin.deviceID !=
+            Kokkos::DefaultExecutionSpace().impl_instance_id()) {
+          std::stringstream error_message;
+          error_message
+              << "Fence encountered outside of the default instance, default: "
+              << Kokkos::DefaultExecutionSpace().impl_instance_id()
+              << ", encountered " << begin.deviceID << " , fence name "
+              << begin.name;
+          return MatchDiagnostic{true, {error_message.str()}};
+        }
+        return MatchDiagnostic{false};
+      });
+  ASSERT_TRUE(success);
+}
+#endif
+TEST(kokkosp, parallel_for) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels());
+  auto success = validate_event_set(
+      [=]() {
+        TestFunctor tf;
+        Kokkos::parallel_for("dogs", Kokkos::RangePolicy<>(0, 1), tf);
+      },
+      [=](BeginParallelForEvent begin_event, EndParallelForEvent end_event) {
+        if (begin_event.name != "dogs") {
+          return MatchDiagnostic{false, {"No match on BeginParallelFor name"}};
+        }
+        if (end_event.kID != ((begin_event.kID))) {
+          return MatchDiagnostic{false, {"No match on kID's"}};
+        }
+        return MatchDiagnostic{true};
+      });
+  ASSERT_TRUE(success);
+}
+
+TEST(kokkosp, parallel_reduce) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels());
+  auto success = validate_event_set(
+      [=]() {
+        TestReduceFunctor tf;
+        int result;
+        Kokkos::parallel_reduce("dogs", Kokkos::RangePolicy<>(0, 1), tf,
+                                Kokkos::Sum<int>(result));
+      },
+      [=](BeginParallelReduceEvent begin_event,
+          EndParallelReduceEvent end_event) {
+        if (begin_event.name != "dogs") {
+          return MatchDiagnostic{false,
+                                 {"No match on BeginParallelReduce name"}};
+        }
+        if (end_event.kID != ((begin_event.kID))) {
+          return MatchDiagnostic{false, {"No match on kID's"}};
+        }
+        return MatchDiagnostic{true};
+      });
+  ASSERT_TRUE(success);
+}
+
+TEST(kokkosp, parallel_scan) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels());
+  auto success = validate_event_set(
+      [=]() {
+        TestScanFunctor tf;
+        Kokkos::parallel_scan("dogs", Kokkos::RangePolicy<>(0, 1), tf);
+      },
+      [=](BeginParallelScanEvent begin_event, EndParallelScanEvent end_event) {
+        if (begin_event.name != "dogs") {
+          return MatchDiagnostic{false, {"No match on BeginParallelScan name"}};
+        }
+        if (end_event.kID != ((begin_event.kID))) {
+          return MatchDiagnostic{false, {"No match on kID's"}};
+        }
+        return MatchDiagnostic{true};
+      });
+// Currently, this test is known to fail with OpenMPTarget
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  ASSERT_TRUE(success);
+#else
+  (void)success;
+#endif
+}
+
+TEST(kokkosp, regions) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableRegions());
+  auto success = validate_event_set(
+      [=]() {
+        Kokkos::Tools::pushRegion("dogs");
+        Kokkos::Tools::popRegion();
+      },
+      [=](PushRegionEvent push_event, PopRegionEvent) {
+        if (push_event.name != "dogs") {
+          return MatchDiagnostic{false, {"No match on PushRegion name"}};
+        }
 
+        return MatchDiagnostic{true};
+      });
+  ASSERT_TRUE(success);
+}
+
+TEST(kokkosp, fences) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableFences());
+  auto success = validate_event_set(
+      [=]() { Kokkos::DefaultExecutionSpace().fence("dogs"); },
+      [=](BeginFenceEvent begin_event, EndFenceEvent end_event) {
+        if (begin_event.name != "dogs") {
+          return MatchDiagnostic{false, {"No match on BeginFence name"}};
+        }
+        if (end_event.kID != ((begin_event.kID))) {
+          return MatchDiagnostic{false, {"No match on kID's"}};
+        }
+        return MatchDiagnostic{true};
+      });
+  ASSERT_TRUE(success);
+}
+
+TEST(kokkosp, raw_allocation) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableAllocs());
+  auto success = validate_event_set(
+      [=]() {
+        void* foo =
+            Kokkos::kokkos_malloc<Kokkos::DefaultExecutionSpace::memory_space>(
+                "dogs", 1000);
+        Kokkos::kokkos_free(foo);
+      },
+      [=](AllocateDataEvent alloc, DeallocateDataEvent free) {
+        if (alloc.name != "dogs") {
+          return MatchDiagnostic{false, {"No match on alloc name"}};
+        }
+        if (alloc.size != 1000) {
+          return MatchDiagnostic{false, {"No match on alloc size"}};
+        }
+        if (alloc.ptr != free.ptr) {
+          return MatchDiagnostic{false, {"No match on pointers"}};
+        }
+        if (free.name != "dogs") {
+          return MatchDiagnostic{false, {"No match on free name"}};
+        }
+        if (free.size != 1000) {
+          return MatchDiagnostic{false, {"No match on free size"}};
+        }
+        return MatchDiagnostic{true};
+      });
+// Currently, this test is known to fail with OpenMPTarget
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  ASSERT_TRUE(success);
+#else
+  (void)success;
+#endif
+}
+
+TEST(kokkosp, view) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableAllocs());
+  auto success = validate_event_set(
+      [=]() { Kokkos::View<float*> dogs("dogs", 1000); },
+      [=](AllocateDataEvent alloc, DeallocateDataEvent free) {
+        if (alloc.name != "dogs") {
+          return MatchDiagnostic{false, {"No match on alloc name"}};
+        }
+        if (alloc.size != 1000 * sizeof(float)) {
+          return MatchDiagnostic{false, {"No match on alloc size"}};
+        }
+        if (alloc.ptr != free.ptr) {
+          return MatchDiagnostic{false, {"No match on pointers"}};
+        }
+        if (free.name != "dogs") {
+          return MatchDiagnostic{false, {"No match on free name"}};
+        }
+        if (free.size != 1000 * sizeof(float)) {
+          return MatchDiagnostic{false, {"No match on free size"}};
+        }
+        return MatchDiagnostic{true};
+      });
+// Currently, this test is known to fail with OpenMPTarget
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
+  ASSERT_TRUE(success);
+#else
+  (void)success;
+#endif
+}
+
+TEST(kokkosp, sections) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableSections());
+  auto success = validate_event_set(
+      [=]() {
+        uint32_t section_id;
+        Kokkos::Tools::createProfileSection("dogs", &section_id);
+        Kokkos::Tools::startSection(section_id);
+        Kokkos::Tools::stopSection(section_id);
+        Kokkos::Tools::destroyProfileSection(section_id);
+      },
+      [=](CreateProfileSectionEvent create, StartProfileSectionEvent start,
+          StopProfileSectionEvent stop, DestroyProfileSectionEvent destroy) {
+        if (create.name != "dogs") {
+          return MatchDiagnostic{false, {"No match on section name"}};
+        }
+        if ((create.id != start.id) || (stop.id != start.id) ||
+            (stop.id != destroy.id)) {
+          return MatchDiagnostic{false, {"No match on section IDs"}};
+        }
+        return MatchDiagnostic{true};
+      });
+  ASSERT_TRUE(success);
+}
+
+TEST(kokkosp, metadata) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableMetadata());
+  auto success = validate_event_set(
+      [=]() {
+        /** Attempts to decrease the value of dog_goodness will be rejected on
+         * review */
+        Kokkos::Tools::declareMetadata("dog_goodness", "infinity");
+      },
+      [=](DeclareMetadataEvent meta) {
+        if (meta.key != "dog_goodness") {
+          return MatchDiagnostic{false, {"No match on metadata key"}};
+        }
+        if (meta.value != "infinity") {
+          return MatchDiagnostic{false, {"No match on metadata value"}};
+        }
+        return MatchDiagnostic{true};
+      });
+  ASSERT_TRUE(success);
+}
+
+TEST(kokkosp, profile_events) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableProfileEvents());
+  auto success = validate_event_set(
+      [=]() { Kokkos::Tools::markEvent("dog_goodness>=infinity"); },
+      [=](ProfileEvent event) {
+        if (event.name != "dog_goodness>=infinity") {
+          return MatchDiagnostic{false, {"No match on profiled event name"}};
+        }
+        return MatchDiagnostic{true};
+      });
+  ASSERT_TRUE(success);
+}
+#if defined(KOKKOS_ENABLE_TUNING)
+TEST(kokkosp, tuning_sequence) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableTuning());
+  size_t input_id, output_id;
+  Kokkos::Tools::Experimental::VariableInfo input_info;
+  input_info.type = Kokkos::Tools::Experimental::ValueType::kokkos_value_int64;
+  input_info.category = Kokkos::Tools::Experimental::StatisticalCategory::
+      kokkos_value_categorical;
+  input_info.valueQuantity =
+      Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_unbounded;
+  Kokkos::Tools::Experimental::VariableInfo output_info = input_info;
+  output_info.valueQuantity =
+      Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_set;
+  std::vector<int64_t> values{1, 2, 3, 4, 5};
+  output_info.candidates = Kokkos::Tools::Experimental::make_candidate_set(
+      values.size(), values.data());
+
+  auto success = validate_event_set(
+      [&]() {
+        input_id = Kokkos::Tools::Experimental::declare_input_type("input.dogs",
+                                                                   input_info);
+        output_id = Kokkos::Tools::Experimental::declare_output_type(
+            "output.dogs", output_info);
+        auto next_context = Kokkos::Tools::Experimental::get_new_context_id();
+        Kokkos::Tools::Experimental::begin_context(next_context);
+        Kokkos::Tools::Experimental::VariableValue feature_value =
+            Kokkos::Tools::Experimental::make_variable_value(input_id,
+                                                             int64_t(0));
+        Kokkos::Tools::Experimental::VariableValue tuning_value =
+            Kokkos::Tools::Experimental::make_variable_value(output_id,
+                                                             int64_t(1));
+        Kokkos::Tools::Experimental::set_input_values(next_context, 1,
+                                                      &feature_value);
+        Kokkos::Tools::Experimental::request_output_values(next_context, 1,
+                                                           &tuning_value);
+        Kokkos::Tools::Experimental::end_context(next_context);
+      },
+      [&](DeclareInputTypeEvent input, DeclareOutputTypeEvent output) {
+        if (input.variable_id != input_id) {
+          return MatchDiagnostic{false, {"No match on input id"}};
+        }
+        if (output.variable_id != output_id) {
+          return MatchDiagnostic{false, {"No match on output id"}};
+        }
+        if (output.info.candidates.set.size != 5) {
+          return MatchDiagnostic{
+              false, {"Candidates not properly passed through tuning system"}};
+        }
+        return MatchDiagnostic{true};
+      },
+      [=](BeginContextEvent) { return MatchDiagnostic{true}; },
+      [&](RequestOutputValuesEvent value_request) {
+        if (value_request.inputs[0].metadata->type != input_info.type) {
+          return MatchDiagnostic{false, {"No match on input in request"}};
+        }
+        if (value_request.outputs[0].metadata->type != output_info.type) {
+          return MatchDiagnostic{false, {"No match on output in request"}};
+        }
+        return MatchDiagnostic{true};
+      },
+      [=](EndContextEvent) { return MatchDiagnostic{true}; });
+  ASSERT_TRUE(success);
+}
+#endif
+TEST(kokkosp, no_init_kernel) {
+  using namespace Kokkos::Test::Tools;
+
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels());
+  auto success = validate_absence(
+      [=]() {
+        Kokkos::View<float*> not_inited(
+            Kokkos::ViewAllocateWithoutInitializing("no_inits_here_dog"), 100);
+      },
+      [=](BeginParallelForEvent) {
+        return MatchDiagnostic{true, {"Found begin event"}};
+      },
+      [=](EndParallelForEvent) {
+        return MatchDiagnostic{true, {"Found end event"}};
+      });
+  ASSERT_TRUE(success);
+}
+
+TEST(kokkosp, get_events) {
+  using namespace Kokkos::Test::Tools;
+  auto event_vector = get_event_set([=]() {
+    Kokkos::Tools::pushRegion("dogs");
+    Kokkos::Tools::popRegion();
+  });
+  for (const auto& ptr : event_vector) {
+    auto ptr_as_begin = std::dynamic_pointer_cast<BeginParallelForEvent>(ptr);
+    ASSERT_TRUE(ptr_as_begin == nullptr);
+  }
+}
 }  // namespace Test
diff --git a/packages/kokkos/core/unit_test/tools/TestIndependence.cpp b/packages/kokkos/core/unit_test/tools/TestIndependence.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2579c4ef3afc8b57a41a2e075742754adebbd629
--- /dev/null
+++ b/packages/kokkos/core/unit_test/tools/TestIndependence.cpp
@@ -0,0 +1,58 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#include <impl/Kokkos_Tools.hpp>
+
+int main(int argc, char* argv[]) {
+  Kokkos::Tools::initialize(argc, argv);
+  Kokkos::Tools::pushRegion(
+      "The unanimous Declaration of the thirteen united States of America, "
+      "When in the Course of human events, it becomes necessary for one people "
+      "to dissolve the political bands which have connected them with another, "
+      "and to assume among the powers of the earth, the separate and equal "
+      "station to which the Laws of Nature and of Nature's God entitle them, a "
+      "decent respect to the opinions of mankind requires that they should "
+      "declare the causes which impel them to the separation.");
+  Kokkos::Tools::popRegion();
+  Kokkos::Tools::finalize();
+}
diff --git a/packages/kokkos/core/unit_test/tools/TestLogicalSpaces.hpp b/packages/kokkos/core/unit_test/tools/TestLogicalSpaces.hpp
index 29f6dd7a65e1f1e57769a3453175b484e5077a40..df250fe0d979ab282a394519d85e73a85215da73 100644
--- a/packages/kokkos/core/unit_test/tools/TestLogicalSpaces.hpp
+++ b/packages/kokkos/core/unit_test/tools/TestLogicalSpaces.hpp
@@ -173,6 +173,7 @@ void test_allowed_access() {
       "access_allowed",
       Kokkos::RangePolicy<typename Space::execution_space>(0, data_size),
       functor);
+  Kokkos::fence();
 }
 
 using semantically_independent_logical_space =
diff --git a/packages/kokkos/core/unit_test/tools/TestProfilingSection.cpp b/packages/kokkos/core/unit_test/tools/TestProfilingSection.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b73307832b47d491de503c14467e9bf746f5cda0
--- /dev/null
+++ b/packages/kokkos/core/unit_test/tools/TestProfilingSection.cpp
@@ -0,0 +1,143 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <Kokkos_Profiling_ProfileSection.hpp>
+
+#include <gtest/gtest.h>
+
+namespace {
+struct Section {
+  std::string name;
+  int start_call_cnt;
+  int stop_call_cnt;
+  int is_destroyed;
+  friend std::ostream& operator<<(std::ostream& os, Section const& s) {
+    os << "( " << s.name << ", " << s.start_call_cnt << ", " << s.stop_call_cnt
+       << ", " << s.is_destroyed << " )";
+    return os;
+  }
+  friend bool operator==(Section const& l, Section const& r) {
+    return (l.name == r.name) && (l.start_call_cnt == r.start_call_cnt) &&
+           (l.stop_call_cnt == r.stop_call_cnt) &&
+           (l.is_destroyed == r.is_destroyed);
+  }
+};
+
+std::vector<Section> kokkosp_test_section_vector;
+
+void kokkosp_test_create_section(char const* label, std::uint32_t* id) {
+  *id = kokkosp_test_section_vector.size();
+  kokkosp_test_section_vector.emplace_back(Section{label, 0, 0, 0});
+}
+
+void kokkosp_test_start_section(std::uint32_t id) {
+  ++kokkosp_test_section_vector[id].start_call_cnt;
+}
+
+void kokkosp_test_stop_section(std::uint32_t id) {
+  ++kokkosp_test_section_vector[id].stop_call_cnt;
+}
+
+void kokkosp_test_destroy_section(std::uint32_t id) {
+  ++kokkosp_test_section_vector[id].is_destroyed;
+}
+
+}  // namespace
+
+TEST(defaultdevicetype, profiling_section) {
+  Kokkos::Profiling::Experimental::set_create_profile_section_callback(
+      kokkosp_test_create_section);
+  Kokkos::Profiling::Experimental::set_destroy_profile_section_callback(
+      kokkosp_test_destroy_section);
+  Kokkos::Profiling::Experimental::set_start_profile_section_callback(
+      kokkosp_test_start_section);
+  Kokkos::Profiling::Experimental::set_stop_profile_section_callback(
+      kokkosp_test_stop_section);
+
+  ASSERT_TRUE(kokkosp_test_section_vector.empty());
+
+  {
+    Kokkos::Profiling::ProfilingSection profile_1("one");
+    ASSERT_EQ(kokkosp_test_section_vector.size(), 1u);
+    ASSERT_EQ(kokkosp_test_section_vector[0], (Section{"one", 0, 0, 0}));
+
+    // NOTE: ProfilingSection is a wrapper that manages the lifetime of the
+    // underlying section but does not care whether the start and stop call
+    // sequence makes any sense.
+    profile_1.stop();
+    profile_1.stop();
+    profile_1.start();
+    profile_1.start();
+    profile_1.start();
+    ASSERT_EQ(kokkosp_test_section_vector[0], (Section{"one", 3, 2, 0}));
+
+    {
+      Kokkos::Profiling::ProfilingSection profile_2("two");
+      profile_2.start();
+    }
+    ASSERT_EQ(kokkosp_test_section_vector.size(), 2u);
+    ASSERT_EQ(kokkosp_test_section_vector[1], (Section{"two", 1, 0, 1}));
+
+    profile_1.start();
+    profile_1.start();
+  }
+
+  ASSERT_EQ(kokkosp_test_section_vector.size(), 2u);
+  ASSERT_EQ(kokkosp_test_section_vector[0], (Section{"one", 5, 2, 1}));
+  ASSERT_EQ(kokkosp_test_section_vector[1], (Section{"two", 1, 0, 1}));
+
+  // Cleanup
+  kokkosp_test_section_vector.clear();
+  Kokkos::Tools::Experimental::set_create_profile_section_callback(nullptr);
+  Kokkos::Tools::Experimental::set_destroy_profile_section_callback(nullptr);
+  Kokkos::Tools::Experimental::set_start_profile_section_callback(nullptr);
+  Kokkos::Tools::Experimental::set_stop_profile_section_callback(nullptr);
+}
+
+using Kokkos::Profiling::ProfilingSection;
+static_assert(!std::is_default_constructible<ProfilingSection>::value, "");
+static_assert(!std::is_copy_constructible<ProfilingSection>::value, "");
+static_assert(!std::is_move_constructible<ProfilingSection>::value, "");
+static_assert(!std::is_copy_assignable<ProfilingSection>::value, "");
+static_assert(!std::is_move_assignable<ProfilingSection>::value, "");
diff --git a/packages/kokkos/core/unit_test/tools/TestWithoutInitializing.cpp b/packages/kokkos/core/unit_test/tools/TestWithoutInitializing.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c0a695d72004abce4e4d95bd1d5e96e72bf85c15
--- /dev/null
+++ b/packages/kokkos/core/unit_test/tools/TestWithoutInitializing.cpp
@@ -0,0 +1,77 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+
+#include "include/ToolTestingUtilities.hpp"
+
+TEST(kokkosp, create_mirror_no_init) {
+  using namespace Kokkos::Test::Tools;
+  listen_tool_events(Config::DisableAll(), Config::EnableKernels());
+  Kokkos::View<int*, Kokkos::DefaultExecutionSpace> device_view("device view",
+                                                                10);
+  Kokkos::View<int*, Kokkos::HostSpace> host_view("host view", 10);
+
+  auto success = validate_absence(
+      [&]() {
+        auto mirror_device =
+            Kokkos::create_mirror(Kokkos::WithoutInitializing, device_view);
+        auto mirror_host =
+            Kokkos::create_mirror(Kokkos::WithoutInitializing,
+                                  Kokkos::DefaultExecutionSpace{}, host_view);
+        auto mirror_device_view = Kokkos::create_mirror_view(
+            Kokkos::WithoutInitializing, device_view);
+        auto mirror_host_view = Kokkos::create_mirror_view(
+            Kokkos::WithoutInitializing, Kokkos::DefaultExecutionSpace{},
+            host_view);
+      },
+      [&](BeginParallelForEvent) {
+        return MatchDiagnostic{true, {"Found begin event"}};
+      },
+      [&](EndParallelForEvent) {
+        return MatchDiagnostic{true, {"Found end event"}};
+      });
+  ASSERT_TRUE(success);
+}
diff --git a/packages/kokkos/core/unit_test/tools/include/ToolTestingUtilities.hpp b/packages/kokkos/core/unit_test/tools/include/ToolTestingUtilities.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e5a03f7fb6b93126b7b51ab6665cb1431adca845
--- /dev/null
+++ b/packages/kokkos/core/unit_test/tools/include/ToolTestingUtilities.hpp
@@ -0,0 +1,1305 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+/**
+ * Before digging in to the code, it's worth taking a moment to review this
+ * design. Fundamentally, what we're looking to do is allow people to test that
+ * a piece of code produces some expected series of tool events. Maybe we want
+ * to check that deep_copy on an execution space instance only causes the
+ * expected types of fences, or that calls to resize(WithoutInitializing,...)
+ * don't call an initialization kernel.
+ *
+ * This design is realized with an interface in which you provide a code region,
+ * and a set of matchers that consume the events. These matchers are lambdas
+ * that accept some set of tool events, and analyze their content, and return
+ * success or failure.
+ *
+ * Digging into implementation, this works by having a class hierarchy of Tool
+ * Events, rooted at EventBase. Every Tool event inherits from this
+ * (BeginParallelForEvent, PushRegionEvent, etc). We subscribe a Kokkos Tool
+ * that pushes instances of these events into a vector as a code region runs. We
+ * then iterate over the list of events and the matchers, first making sure that
+ * every event is of the right type to be used in the matcher, and then passing
+ * it to the matcher.
+ *
+ * Current examples are in TestEventCorrectness.hpp
+ */
+
+#include <Kokkos_Core.hpp>
+#include <sstream>
+#include <iostream>
+#include <utility>
+#include <type_traits>
+namespace Kokkos {
+
+namespace Test {
+
+namespace Tools {
+
+/**
+ * @brief This is what a matcher should return
+ * It is a two-part struct, with a bool representing
+ * success (true if the match holds), and a vector of
+ * strings representing the diagnostics that should be
+ * printed in case of a failure
+ */
+struct MatchDiagnostic {
+  bool success                      = true;
+  std::vector<std::string> messages = {};
+};
+
+struct EventBase;  // forward declaration
+using EventBasePtr = std::shared_ptr<EventBase>;
+using event_vector = std::vector<EventBasePtr>;
+
+/**
+ * @brief Base case of a recursive reduction using templates
+ * Should be replaced with a fold in C++17
+ */
+
+inline bool are_valid() { return true; }
+
+/**
+ * @brief Recursive reduction to check whether any pointer in a set is null
+ *
+ * @tparam Head Type of the pointer to examine
+ * @tparam Tail Types of the rest of the pointers
+ * @param head The pointer to examine
+ * @param tail The rest of the pointers
+ * @return true if no pointer is null, false otherwise
+ *
+ */
+template <class Head, class... Tail>
+bool are_valid(const Head& head, const Tail&... tail) {
+  return (head != nullptr) && (are_valid(tail...));
+}
+
+/**
+ * @brief In order to call some arbitrary set of lambdas representing matchers,
+ * we need the ability to look at a lambda, and deduce its arguments.
+ *
+ * This is the base template, and will be specialized. All specializations
+ * should define
+ * - a return type R,
+ * - an args pack A,
+ * - a num_args, and
+ * - a function "invoke_as" that takes a functor and an arg-pack, and tries to
+ * call the functor with that arg-pack.
+ *
+ * The main original intent here is two-fold, one to allow us to look at how
+ * many args a functor takes, and two to look at the types of its args. The
+ * second of these is used to do a series of dynamic_casts, making sure that the
+ * EventBase instances captured in our event vectors are of the types being
+ * looked for by our matchers
+ *
+ * @tparam T a functor-like object
+ * @tparam typename used for specialization shenanigans
+ */
+template <typename T, typename = void>
+struct function_traits;
+
+/**
+ * @brief Specialization of function traits, representing a free function.
+ * See the base template for info on what this struct is doing.
+ *
+ * @tparam R return type of the function
+ * @tparam A arg pack
+ */
+template <typename R, typename... A>
+struct function_traits<R (*)(A...)> {
+  using return_type                  = R;
+  using class_type                   = void;
+  using args_type                    = std::tuple<A...>;
+  constexpr static int num_arguments = sizeof...(A);
+  template <class Call, class... Args>
+  static auto invoke_as(const Call& call, Args&&... args) {
+    if (!are_valid(std::dynamic_pointer_cast<A>(std::forward<Args>(args))...)) {
+      return MatchDiagnostic{false, {"Types didn't match on arguments"}};
+    }
+    return call(*std::dynamic_pointer_cast<A>(std::forward<Args>(args))...);
+  }
+};
+
+/**
+ * @brief Specialization of function traits, representing a class member
+ * function. See the base template for info on what this struct is doing
+ *
+ * @tparam R return type of the function
+ * @tparam C the class function being represented
+ * @tparam A arg pack
+ */
+
+template <typename R, typename C, typename... A>
+struct function_traits<R (C::*)(A...)> {
+  using return_type                  = R;
+  using class_type                   = void;
+  using args_type                    = std::tuple<A...>;
+  constexpr static int num_arguments = sizeof...(A);
+  template <class Call, class... Args>
+  static auto invoke_as(const Call& call, Args&&... args) {
+    if (!are_valid(std::dynamic_pointer_cast<A>(std::forward<Args>(args))...)) {
+      return MatchDiagnostic{false, {"Types didn't match on arguments"}};
+    }
+    return call(*std::dynamic_pointer_cast<A>(std::forward<Args>(args))...);
+  }
+};
+
+/**
+ * @brief Specialization of function traits, representing a *const* class member
+ * function. See the base template for info on what this struct is doing
+ *
+ * @tparam R return type of the function
+ * @tparam C the class function being represented
+ * @tparam A arg pack
+ */
+
+template <typename R, typename C, typename... A>
+struct function_traits<R (C::*)(A...) const>  // const
+{
+  using return_type                  = R;
+  using class_type                   = C;
+  using args_type                    = std::tuple<A...>;
+  constexpr static int num_arguments = sizeof...(A);
+  template <class Call, class... Args>
+  static auto invoke_as(const Call& call, Args&&... args) {
+    if (!are_valid(std::dynamic_pointer_cast<A>(std::forward<Args>(args))...)) {
+      return MatchDiagnostic{false, {"Types didn't match on arguments"}};
+    }
+    return call(*std::dynamic_pointer_cast<A>(std::forward<Args>(args))...);
+  }
+};
+
+/**
+ * @brief Specialization of function traits, representing a T that has a
+ * non-generic call operator, i.e. a functor/lambda whose operator() has no auto
+ * or template on it. See the base template for info on what this struct is
+ * doing.
+ *
+ * @tparam T The functor type
+ */
+template <typename T>
+struct function_traits<T, Kokkos::Impl::void_t<decltype(&T::operator())> >
+    : public function_traits<decltype(&T::operator())> {};
+
+/**
+ * @brief A struct to extract events from an event vector, and invoke a matcher
+ * with them.
+ *
+ * This one is a bit funky, you can't do std::get's or the like with a vector.
+ * So this takes in a number of arguments to pull from the vector, and a start
+ * index at which to begin taking from. It then makes an index sequence of that
+ * number of elements {0, 1, 2, ..., num}, and then uses the function_traits
+ * trick above to invoke the matcher with
+ * {events[index+0],events[index+1],...,events[index+num-1]}.
+ *
+ * @tparam num number of arguments to the functor
+ * @tparam Matcher the lambda we want to call with events from our event vector
+ */
+template <int num, class Matcher>
+struct invoke_helper {
+ private:
+  // private helper with an index_sequence, invokes the matcher
+  template <class Traits, size_t... Indices>
+  static auto call(int index, const event_vector& events,
+                   std::index_sequence<Indices...>, const Matcher& matcher) {
+    return Traits::invoke_as(matcher, events[index + Indices]...);
+  }
+
+ public:
+  // the entry point to the class, takes in a Traits class that knows how to
+  // invoke the matcher,
+  template <class Traits>
+  static auto call(int index, const event_vector& events,
+                   const Matcher& matcher) {
+    return call<Traits>(index, events, std::make_index_sequence<num>{},
+                        matcher);
+  }
+};
+
+/**
+ * @brief This is the base case of a recursive check of matchers, meaning no
+ * more matchers exist. The only check now should be that we made it all the way
+ * through the list of events captured by our lambda.
+ *
+ * @param events_scanned how many events we scanned
+ * @param events the vector containing our events
+ * @return MatchDiagnostic success if we scanned all events, failure otherwise
+ */
+inline MatchDiagnostic check_match(event_vector::size_type events_scanned,
+                                   const event_vector& events) {
+  auto result =
+      ((events_scanned == events.size())
+           ? MatchDiagnostic{true}
+           : MatchDiagnostic{false, {"Wrong number of events encountered"}});
+  return result;
+}
+
+/**
+ * @brief Checks that a set of matchers match the events produced by a code
+ * region
+ *
+ * @tparam Matcher a functor that accepts a set of events, and returns whether
+ * they meet an expected structure
+ * @tparam Matchers additional matchers to invoke, supposing the current one is
+ * fine
+ * @param index What position in our vector of events to begin pulling events
+ * from
+ * @param events A vector of events we want to match against our matchers
+ * @param matcher the instance of Matcher (see above)
+ * @param matchers the instances of Matchers (see above)
+ * @return MatchDiagnostic success if the matcher matches, failure otherwise
+ */
+template <class Matcher, class... Matchers>
+MatchDiagnostic check_match(event_vector::size_type index,
+                            const event_vector& events, const Matcher& matcher,
+                            const Matchers&... matchers) {
+  // struct that tells us what we want to know about our matcher, and helps us
+  // invoke it
+  using Traits = function_traits<Matcher>;
+  // how many args does that lambda have?
+  constexpr static event_vector::size_type num_args = Traits::num_arguments;
+  // make sure that we don't have insufficient events in our event vector
+  if (index + num_args > events.size()) {
+    return {false, {"Not enough events encountered to fill the matchers"}};
+  }
+  // Call the lambda, if it's callable with our args. Store the resulting
+  // MatchDiagnostic
+  auto result = invoke_helper<num_args, Matcher>::template call<Traits>(
+      index, events, matcher);
+  // If we fail, don't continue looking for more matches, just return
+  if (!result.success) {
+    return result;
+  }
+  // Otherwise, call with the next matcher
+  return check_match(index + num_args, events, matchers...);
+}
+
+/**
+ * @brief Small utility helper, an entry point into "check_match."
+ * The real "check_match" needs an index at which to start checking,
+ * this just tells it "hey, start at 0."
+ *
+ */
+template <class... Matchers>
+auto check_match(const event_vector& events, Matchers&&... matchers) {
+  return check_match(0, events, std::forward<Matchers>(matchers)...);
+}
+
+/**
+ * @brief Base class of representing everything you can do with an Event
+ * checked by this system. Not much is required, just the ability to
+ * represent yourself as a string for debugging purposes
+ */
+struct EventBase {
+  using PtrHandle                        = const void* const;
+  virtual ~EventBase()                   = default;
+  virtual std::string descriptor() const = 0;
+};
+
+/**
+ * @brief There are an unholy number of begin events in Kokkos, this is a base
+ * class for them (BeginParallel[For/Reduce/Scan], BeginFence).
+ *
+ * @tparam Derived CRTP, intended for use with dynamic_casts
+ */
+template <class Derived>
+struct BeginOperation : public EventBase {
+  const std::string name;
+  const uint32_t deviceID;
+  uint64_t kID;
+  BeginOperation(const std::string& n, const uint32_t devID, uint64_t k)
+      : name(n), deviceID(devID), kID(k) {}
+  std::string descriptor() const override {
+    std::stringstream s;
+    s << Derived::begin_op_name() << " { \"" << name << "\", ";
+    s << deviceID;
+    s << ",";
+    s << kID;
+    s << "}";
+    return s.str();
+  }
+};
+/**
+ * @brief Analogous to BeginOperation, there are a lot of things in Kokkos
+ * of roughly this structure.
+ *
+ * @tparam Derived CRTP, used for comparing that EventBase instances are of the
+ * same type
+ */
+template <class Derived>
+struct EndOperation : public EventBase {
+  uint64_t kID;
+  EndOperation(uint64_t k) : kID(k) {}
+
+  std::string descriptor() const override {
+    std::stringstream s;
+    s << Derived::end_op_name() << " { ";
+    s << kID;
+    s << "}";
+    return s.str();
+  }
+};
+
+/**
+ * Note, the following classes look identical, and they are. They exist because
+ * we're using dynamic_casts up above to check whether events are of the same
+ * type. So the different type names here are meaningful, even though the
+ * classes are empty
+ */
+struct BeginParallelForEvent : public BeginOperation<BeginParallelForEvent> {
+  static const std::string& begin_op_name() {
+    static std::string value = "BeginParallelFor";
+    return value;
+  }
+  BeginParallelForEvent(std::string n, const uint32_t devID, uint64_t k)
+      : BeginOperation<BeginParallelForEvent>(n, devID, k) {}
+};
+struct BeginParallelReduceEvent
+    : public BeginOperation<BeginParallelReduceEvent> {
+  static const std::string& begin_op_name() {
+    static std::string value = "BeginParallelReduce";
+    return value;
+  }
+
+  BeginParallelReduceEvent(std::string n, const uint32_t devID, uint64_t k)
+      : BeginOperation<BeginParallelReduceEvent>(n, devID, k) {}
+};
+struct BeginParallelScanEvent : public BeginOperation<BeginParallelScanEvent> {
+  static const std::string& begin_op_name() {
+    static std::string value = "BeginParallelScan";
+    return value;
+  }
+
+  BeginParallelScanEvent(std::string n, const uint32_t devID, uint64_t k)
+      : BeginOperation<BeginParallelScanEvent>(n, devID, k) {}
+};
+struct BeginFenceEvent : public BeginOperation<BeginFenceEvent> {
+  static const std::string& begin_op_name() {
+    static std::string value = "BeginFence";
+    return value;
+  }
+
+  BeginFenceEvent(std::string n, const uint32_t devID, uint64_t k)
+      : BeginOperation<BeginFenceEvent>(n, devID, k) {}
+};
+
+struct EndParallelForEvent : public EndOperation<EndParallelForEvent> {
+  static const std::string& end_op_name() {
+    static std::string value = "EndParallelFor";
+    return value;
+  }
+
+  EndParallelForEvent(uint64_t k) : EndOperation<EndParallelForEvent>(k) {}
+};
+struct EndParallelReduceEvent : public EndOperation<EndParallelReduceEvent> {
+  static const std::string& end_op_name() {
+    static std::string value = "EndParallelReduce";
+    return value;
+  }
+
+  EndParallelReduceEvent(uint64_t k)
+      : EndOperation<EndParallelReduceEvent>(k) {}
+};
+struct EndParallelScanEvent : public EndOperation<EndParallelScanEvent> {
+  static const std::string& end_op_name() {
+    static std::string value = "EndParallelScan";
+    return value;
+  }
+
+  EndParallelScanEvent(uint64_t k) : EndOperation<EndParallelScanEvent>(k) {}
+};
+struct EndFenceEvent : public EndOperation<EndFenceEvent> {
+  static const std::string& end_op_name() {
+    static std::string value = "EndFence";
+    return value;
+  }
+
+  EndFenceEvent(uint64_t k) : EndOperation<EndFenceEvent>(k) {}
+};
+
+struct InitEvent : public EventBase {
+  int load_sequence;
+  uint64_t version_number;
+  uint32_t num_device_infos;
+  Kokkos::Profiling::KokkosPDeviceInfo* device_infos;
+  std::string descriptor() const override {
+    std::stringstream s;
+    s << "InitEvent { load_sequence: " << load_sequence << ", version_number "
+      << version_number << ", num_device_infos " << num_device_infos << "}";
+    return s.str();
+  }
+  InitEvent(int l, uint64_t v_n, uint32_t n_d_i,
+            Kokkos::Profiling::KokkosPDeviceInfo* d_i)
+      : load_sequence(l),
+        version_number(v_n),
+        num_device_infos(n_d_i),
+        device_infos(d_i) {}
+};
+struct FinalizeEvent : public EventBase {
+  std::string descriptor() const override { return "FinalizeEvent{}"; }
+};
+
+struct ParseArgsEvent : public EventBase {
+  int num_args;
+  char** args;
+
+  std::string descriptor() const override {
+    std::stringstream s;
+    s << "ParseArgsEvent { num_args : " << num_args << std::endl;
+    for (int x = 0; x < num_args; ++x) {
+      s << "  \"" << args[x] << "\"" << std::endl;
+    }
+    s << "}";
+    return s.str();
+  }
+  ParseArgsEvent(int n_a, char** a) : num_args(n_a), args(a) {}
+};
+struct PrintHelpEvent : public EventBase {
+  char* prog_name;
+  std::string descriptor() const override {
+    return "PrintHelpEvent { Program Name: \"" + std::string(prog_name) + "\"}";
+  }
+  PrintHelpEvent(char* p_n) : prog_name(p_n) {}
+};
+struct PushRegionEvent : public EventBase {
+  std::string name;
+  std::string descriptor() const override {
+    return "PushRegionEvent { Region Name: \"" + name + "\" }";
+  }
+  PushRegionEvent(std::string n) : name(n) {}
+};
+struct PopRegionEvent : public EventBase {
+  std::string descriptor() const override { return "PopRegionEvent{}"; }
+};
+
+template <class Derived>
+struct DataEvent : public EventBase {
+  using SpaceHandleType = Kokkos::Profiling::SpaceHandle;
+  SpaceHandleType handle;
+  std::string name;
+  EventBase::PtrHandle ptr;
+  uint64_t size;
+
+  std::string descriptor() const override {
+    std::stringstream s;
+    s << Derived::event_name() << "{ In space \"" << handle.name
+      << "\", name: \"" << name << "\", ptr: " << ptr << ", size: " << size
+      << "}";
+    return s.str();
+  }
+  DataEvent(SpaceHandleType h, std::string n, EventBase::PtrHandle p,
+            uint64_t s)
+      : handle(h), name(n), ptr(p), size(s) {}
+};
+
+struct AllocateDataEvent : public DataEvent<AllocateDataEvent> {
+  static std::string event_name() { return "AllocateDataEvent"; }
+  AllocateDataEvent(DataEvent::SpaceHandleType h, std::string n,
+                    EventBase::PtrHandle p, uint64_t s)
+      : DataEvent<AllocateDataEvent>(h, n, p, s) {}
+};
+struct DeallocateDataEvent : public DataEvent<DeallocateDataEvent> {
+  static std::string event_name() { return "DeallocateDataEvent"; }
+  DeallocateDataEvent(DataEvent::SpaceHandleType h, std::string n,
+                      EventBase::PtrHandle p, uint64_t s)
+      : DataEvent<DeallocateDataEvent>(h, n, p, s) {}
+};
+
+struct CreateProfileSectionEvent : public EventBase {
+  std::string name;
+  uint32_t id;
+  std::string descriptor() const override {
+    return "CreateProfileSectionEvent {\"" + name + "\", " +
+           std::to_string(id) + "}";
+  }
+  CreateProfileSectionEvent(std::string n, uint32_t s_i) : name(n), id(s_i) {}
+};
+
+template <class Derived>
+struct ProfileSectionManipulationEvent : public EventBase {
+  uint32_t id;
+  std::string descriptor() const override {
+    std::stringstream s;
+    s << Derived::event_name() << "{ " << id << "}";
+    return s.str();
+  }
+  ProfileSectionManipulationEvent(uint32_t d_i) : id(d_i){};
+};
+
+struct StartProfileSectionEvent
+    : public ProfileSectionManipulationEvent<StartProfileSectionEvent> {
+  static std::string event_name() { return "StartProfileSectionEvent"; }
+  StartProfileSectionEvent(uint32_t d_i)
+      : ProfileSectionManipulationEvent<StartProfileSectionEvent>(d_i){};
+};
+struct StopProfileSectionEvent
+    : public ProfileSectionManipulationEvent<StopProfileSectionEvent> {
+  static std::string event_name() { return "StopProfileSectionEvent"; }
+  StopProfileSectionEvent(uint32_t d_i)
+      : ProfileSectionManipulationEvent<StopProfileSectionEvent>(d_i){};
+};
+struct DestroyProfileSectionEvent
+    : public ProfileSectionManipulationEvent<DestroyProfileSectionEvent> {
+  static std::string event_name() { return "DestroyProfileSectionEvent"; }
+  DestroyProfileSectionEvent(uint32_t d_i)
+      : ProfileSectionManipulationEvent<DestroyProfileSectionEvent>(d_i){};
+};
+
+struct ProfileEvent : public EventBase {
+  std::string name;
+  std::string descriptor() const override {
+    return "ProfileEvent {\"" + name + "\"}";
+  }
+  ProfileEvent(std::string n) : name(n) {}
+};
+
+struct BeginDeepCopyEvent : public EventBase {
+  using SpaceHandleType = Kokkos::Profiling::SpaceHandle;
+  SpaceHandleType src_handle;
+  std::string src_name;
+  EventBase::PtrHandle src_ptr;
+  SpaceHandleType dst_handle;
+  std::string dst_name;
+  EventBase::PtrHandle dst_ptr;
+  uint64_t size;
+  std::string descriptor() const override {
+    std::stringstream s;
+    s << "BeginDeepCopyEvent { size: " << size << std::endl;
+    s << "  dst: { \"" << dst_handle.name << "\", \"" << dst_name << "\", "
+      << dst_ptr << "}\n";
+    s << "  src: { \"" << src_handle.name << "\", \"" << src_name << "\", "
+      << src_ptr << "}\n";
+    s << "}";
+    return s.str();
+  }
+  BeginDeepCopyEvent(SpaceHandleType s_h, std::string s_n,
+                     EventBase::PtrHandle s_p, SpaceHandleType d_h,
+                     std::string d_n, EventBase::PtrHandle d_p, uint64_t s)
+      : src_handle(s_h),
+        src_name(s_n),
+        src_ptr(s_p),
+        dst_handle(d_h),
+        dst_name(d_n),
+        dst_ptr(d_p),
+        size(s) {}
+};
+struct EndDeepCopyEvent : public EventBase {
+  std::string descriptor() const override { return "EndDeepCopyEvent{}"; }
+};
+
+template <class Derived>
+struct DualViewEvent : public EventBase {
+  std::string name;
+  EventBase::PtrHandle ptr;
+  bool is_device;
+  DualViewEvent(std::string n, EventBase::PtrHandle p, bool i_d)
+      : name(n), ptr(p), is_device(i_d) {}
+  std::string descriptor() const override {
+    std::stringstream s;
+    s << Derived::event_name() << " { \"" << name << "\", " << std::hex << ptr
+      << ", " << std::boolalpha << is_device << "}";
+    return s.str();
+  }
+};
+struct DualViewModifyEvent : public DualViewEvent<DualViewModifyEvent> {
+  static std::string event_name() { return "DualViewModifyEvent"; }
+  DualViewModifyEvent(std::string n, EventBase::PtrHandle p, bool i_d)
+      : DualViewEvent(n, p, i_d) {}
+};
+struct DualViewSyncEvent : public DualViewEvent<DualViewSyncEvent> {
+  static std::string event_name() { return "DualViewSyncEvent"; }
+  DualViewSyncEvent(std::string n, EventBase::PtrHandle p, bool i_d)
+      : DualViewEvent(n, p, i_d) {}
+};
+
+struct DeclareMetadataEvent : public EventBase {
+  std::string key;
+  std::string value;
+  std::string descriptor() const override {
+    return "DeclareMetadataEvent {\"" + key + "\", \"" + value + "\"}";
+  }
+  DeclareMetadataEvent(std::string k, std::string v) : key(k), value(v) {}
+};
+
+struct ProvideToolProgrammingInterfaceEvent : public EventBase {
+  using Interface = Kokkos::Tools::Experimental::ToolProgrammingInterface;
+
+  uint32_t num_functions;
+  Interface interface;
+  ProvideToolProgrammingInterfaceEvent(uint32_t n_f, Interface i)
+      : num_functions(n_f), interface(i) {}
+  std::string descriptor() const override {
+    return "ProvideToolProgrammingInterfaceEvent {" +
+           std::to_string(num_functions) + "}";
+  }
+};
+struct RequestToolSettingsEvent : public EventBase {
+  using Settings = Kokkos::Tools::Experimental::ToolSettings;
+
+  uint32_t num_settings;
+  Settings settings;
+  RequestToolSettingsEvent(uint32_t n_s, Settings s)
+      : num_settings(n_s), settings(s) {}
+  std::string descriptor() const override {
+    return "RequestToolSettingsEvent {" + std::to_string(num_settings) + "}";
+  }
+};
+
+template <class Derived>
+struct TypeDeclarationEvent : public EventBase {
+  std::string name;
+  size_t variable_id;
+  Kokkos::Tools::Experimental::VariableInfo info;
+  std::string descriptor() const override {
+    return Derived::event_name() + "{ \"" + name + "\"," +
+           std::to_string(variable_id) + "}";
+  }
+  TypeDeclarationEvent(std::string n, size_t v_i,
+                       Kokkos::Tools::Experimental::VariableInfo i)
+      : name(n), variable_id(v_i), info(i) {}
+};
+struct DeclareOutputTypeEvent
+    : public TypeDeclarationEvent<DeclareOutputTypeEvent> {
+  static std::string event_name() { return "DeclarateOutputTypeEvent"; }
+  DeclareOutputTypeEvent(std::string n, size_t v_i,
+                         Kokkos::Tools::Experimental::VariableInfo i)
+      : TypeDeclarationEvent(n, v_i, i) {}
+};
+struct DeclareInputTypeEvent
+    : public TypeDeclarationEvent<DeclareInputTypeEvent> {
+  static std::string event_name() { return "DeclareInputTypeEvent"; }
+  DeclareInputTypeEvent(std::string n, size_t v_i,
+                        Kokkos::Tools::Experimental::VariableInfo i)
+      : TypeDeclarationEvent(n, v_i, i) {}
+};
+
+struct RequestOutputValuesEvent : public EventBase {
+  size_t context;
+  size_t num_inputs;
+  std::vector<Kokkos::Tools::Experimental::VariableValue> inputs;
+  size_t num_outputs;
+  std::vector<Kokkos::Tools::Experimental::VariableValue> outputs;
+  std::string descriptor() const override {
+    std::stringstream s;
+    s << "RequestOutputValuesEvent { ";
+    s << num_inputs << " inputs,";
+    s << num_outputs << " outputs}";
+    return s.str();
+  }
+  RequestOutputValuesEvent(
+      size_t c, size_t n_i,
+      std::vector<Kokkos::Tools::Experimental::VariableValue> i, size_t n_o,
+      std::vector<Kokkos::Tools::Experimental::VariableValue> o)
+      : context(c), num_inputs(n_i), inputs(i), num_outputs(n_o), outputs(o) {}
+};
+
+struct BeginContextEvent : public EventBase {
+  size_t context;
+  std::string descriptor() const override {
+    return "ContextBeginEvent{ " + std::to_string(context) + "}";
+  }
+  BeginContextEvent(size_t c) : context(c) {}
+};
+struct EndContextEvent : public EventBase {
+  size_t context;
+  Kokkos::Tools::Experimental::VariableValue value;
+  std::string descriptor() const override {
+    return "ContextEndEvent {" + std::to_string(context) + "}";
+  }
+  EndContextEvent(size_t c, Kokkos::Tools::Experimental::VariableValue v)
+      : context(c), value(v) {}
+};
+
+struct OptimizationGoalDeclarationEvent : public EventBase {
+  size_t context;
+  Kokkos::Tools::Experimental::OptimizationGoal goal;
+  std::string descriptor() const override {
+    return "OptimizationGoalDeclarationEvent{" + std::to_string(context) + "}";
+  }
+  OptimizationGoalDeclarationEvent(
+      size_t c, Kokkos::Tools::Experimental::OptimizationGoal g)
+      : context(c), goal(g) {}
+};
+
+/**
+ * @brief Takes a vector of events, a set of matchers, and checks whether
+ *        that event vector matches what those matchers expect
+ *
+ * @tparam Matchers types of our matchers
+ * @param events A vector containing events
+ * @param matchers A set of functors that match those Events
+ * @return true on successful match, false otherwise
+ */
+template <class... Matchers>
+bool compare_event_vectors(const event_vector& events, Matchers&&... matchers) {
+  // leans on check_match to do the bulk of the work
+  auto diagnostic = check_match(events, std::forward<Matchers>(matchers)...);
+  // On failure, print out the error messages
+  if (!diagnostic.success) {
+    for (const auto& message : diagnostic.messages) {
+      std::cerr << "Error matching event vectors: " << message << std::endl;
+    }
+  }
+  return diagnostic.success;
+}
+
+/**
+ * This section is odd, and needs explanation. Imagine that
+ * you're writing a test. Maybe you want to listen to all
+ * events. Maybe you want to listen to all profiling events.
+ * Maybe you want to listen to all profiling events, no
+ * infrastructure events, and only type declaration events
+ * in tuning.
+ *
+ * You can model this as a tree of preferences, a kind of
+ * hierarchical bool. By default,
+ * we listen to everything. But you can disable everything,
+ * or any subcomponent (profiling/tuning/infrastructure),
+ * or even a sub-subcomponent (profiling->kernels)
+ *
+ */
+
+/**
+ * @brief This tells the testing tool which events to listen to.
+ * My strong recommendation is to make this "all events" in most cases,
+ * but if there is an event that is hard to match in some cases, a stray
+ * deep_copy or the like, this will let you ignore that event. Users will
+ * not directly instantiate these.
+ */
+
+struct ToolValidatorConfiguration {
+  struct Profiling {
+    bool kernels        = true;
+    bool regions        = true;
+    bool fences         = true;
+    bool allocs         = true;
+    bool copies         = true;
+    bool dual_view_ops  = true;
+    bool sections       = true;
+    bool profile_events = true;
+    bool metadata       = true;
+  };
+  struct Tuning {
+    bool contexts          = true;
+    bool type_declarations = true;
+    bool request_values    = true;
+  };
+  struct Infrastructure {
+    bool init                  = true;
+    bool finalize              = true;
+    bool programming_interface = true;
+    bool request_settings      = true;
+  };
+  Profiling profiling           = Profiling();
+  Tuning tuning                 = Tuning();
+  Infrastructure infrastructure = Infrastructure();
+};
+
+namespace Config {
+/**
+ * @brief A config struct has a few properties:
+ *
+ * 1) What settings it toggles
+ * 2) Whether it toggles that setting on or off
+ * 3) What depth the setting is in the tree
+ *
+ * The first two hopefully make intuitive sense. The
+ * third is weird. In order to make this hierarchical
+ * bool concept work, you need to be able to first
+ * disable all events, then enable profiling.
+ *
+ * This is done by modeling the depth of the request.
+ * DisableAlls happen before EnableProfiling happen before
+ * DisableKernels. The implementation of that is in listen_tool_events,
+ * but needs machinery here.
+ *
+ */
+
+/**
+ * @brief Macro to make defining a configuration struct easier.
+ * Given a name, what value to override in the ToolConfiguration,
+ * and the depth of that configuration option, produces an
+ * EnableName struct to enable that option, and a DisableName
+ * struct to disable that option
+ *
+ * @param name : the name of the struct
+ * @param value: the value in ToolConfiguration to override
+ * @param depth: how deep in the configuration tree an option is
+ *               (0 is root, Profiling/Tuning/Infrastructure 1, 2 for
+ *                sub-options)
+ */
+#define KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(name, value, depth)    \
+  template <bool target_value>                                      \
+  struct Toggle##name : public std::integral_constant<int, depth> { \
+    void operator()(ToolValidatorConfiguration& config) {           \
+      config.value = target_value;                                  \
+    }                                                               \
+  };                                                                \
+  using Enable##name  = Toggle##name<true>;                         \
+  using Disable##name = Toggle##name<false>
+
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Kernels, profiling.kernels, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Regions, profiling.regions, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Fences, profiling.fences, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Allocs, profiling.allocs, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Copies, profiling.copies, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(DualViewOps, profiling.dual_view_ops, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Sections, profiling.sections, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(ProfileEvents, profiling.profile_events,
+                                     2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Metadata, profiling.metadata, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Contexts, tuning.contexts, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(TypeDeclarations, tuning.type_declarations,
+                                     2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(RequestValues, tuning.request_values, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Init, infrastructure.init, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(Finalize, infrastructure.finalize, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(ProgrammingInterface,
+                                     infrastructure.programming_interface, 2);
+KOKKOS_IMPL_TOOLS_TEST_CONFIG_OPTION(RequestSettings,
+                                     infrastructure.request_settings, 2);
+
+template <bool target_value>
+struct ToggleInfrastructure : public std::integral_constant<int, 1> {
+  void operator()(ToolValidatorConfiguration& config) {
+    ToggleInit<target_value>{}(config);
+    ToggleFinalize<target_value>{}(config);
+    ToggleProgrammingInterface<target_value>{}(config);
+    ToggleRequestSettings<target_value>{}(config);
+  }
+};
+
+using EnableInfrastructure  = ToggleInfrastructure<true>;
+using DisableInfrastructure = ToggleInfrastructure<false>;
+
+template <bool target_value>
+struct ToggleProfiling : public std::integral_constant<int, 1> {
+  void operator()(ToolValidatorConfiguration& config) {
+    ToggleKernels<target_value>{}(config);
+    ToggleRegions<target_value>{}(config);
+    ToggleFences<target_value>{}(config);
+    ToggleAllocs<target_value>{}(config);
+    ToggleCopies<target_value>{}(config);
+    ToggleDualViewOps<target_value>{}(config);
+    ToggleSections<target_value>{}(config);
+    ToggleProfileEvents<target_value>{}(config);
+    ToggleMetadata<target_value>{}(config);
+  }
+};
+
+using EnableProfiling  = ToggleProfiling<true>;
+using DisableProfiling = ToggleProfiling<false>;
+
+template <bool target_value>
+struct ToggleTuning : public std::integral_constant<int, 1> {
+  void operator()(ToolValidatorConfiguration& config) {
+    ToggleContexts<target_value>{}(config);
+    ToggleTypeDeclarations<target_value>{}(config);
+    ToggleRequestValues<target_value>{}(config);
+  }
+};
+
+using EnableTuning  = ToggleTuning<true>;
+using DisableTuning = ToggleTuning<false>;
+
+template <bool target_value>
+struct ToggleAll : public std::integral_constant<int, 0> {
+  void operator()(ToolValidatorConfiguration& config) {
+    ToggleProfiling<target_value>{}(config);
+    ToggleTuning<target_value>{}(config);
+    ToggleInfrastructure<target_value>{}(config);
+  }
+};
+
+using EnableAll  = ToggleAll<true>;
+using DisableAll = ToggleAll<false>;
+}  // namespace Config
+
+/**
+ * This is the vector tool callbacks will push events into.
+ * It needs to be outside of functions (to be global) because
+ * it needs to be used in the tools callbacks, which are function pointers,
+ * which can't capture variables. Thus we need something that doesn't require
+ * capturing. In short, a global variable. :(
+ */
+static std::vector<EventBasePtr> found_events;
+/**
+ * Needs to stand outside of functions, this is the kID of the last encountered
+ * begin event
+ */
+static uint64_t last_kernel_id;
+/**
+ * Needs to stand outside of functions, this is the section ID of the last
+ * encountered section id
+ */
+static uint32_t last_section_id;
+
+/** Subscribes to all of the requested callbacks */
+static void set_tool_events_impl(const ToolValidatorConfiguration& config) {
+  Kokkos::Tools::Experimental::pause_tools();  // remove all events
+  if (config.profiling.kernels) {
+    Kokkos::Tools::Experimental::set_begin_parallel_for_callback(
+        [](const char* n, const uint32_t d, uint64_t* k) {
+          *k = ++last_kernel_id;
+          found_events.push_back(
+              std::make_shared<BeginParallelForEvent>(std::string(n), d, *k));
+        });
+    Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(
+        [](const char* n, const uint32_t d, uint64_t* k) {
+          *k = ++last_kernel_id;
+          found_events.push_back(std::make_shared<BeginParallelReduceEvent>(
+              std::string(n), d, *k));
+        });
+    Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(
+        [](const char* n, const uint32_t d, uint64_t* k) {
+          *k = ++last_kernel_id;
+
+          found_events.push_back(
+              std::make_shared<BeginParallelScanEvent>(std::string(n), d, *k));
+        });
+    Kokkos::Tools::Experimental::set_end_parallel_for_callback(
+        [](const uint64_t k) {
+          found_events.push_back(std::make_shared<EndParallelForEvent>(k));
+        });
+    Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(
+        [](const uint64_t k) {
+          found_events.push_back(std::make_shared<EndParallelReduceEvent>(k));
+        });
+    Kokkos::Tools::Experimental::set_end_parallel_scan_callback(
+        [](const uint64_t k) {
+          found_events.push_back(std::make_shared<EndParallelScanEvent>(k));
+        });
+  }  // if profiling.kernels
+  if (config.profiling.regions) {
+    Kokkos::Tools::Experimental::set_push_region_callback([](const char* name) {
+      found_events.push_back(
+          std::make_shared<PushRegionEvent>(std::string(name)));
+    });
+    Kokkos::Tools::Experimental::set_pop_region_callback(
+        []() { found_events.push_back(std::make_shared<PopRegionEvent>()); });
+  }
+  if (config.profiling.fences) {
+    Kokkos::Tools::Experimental::set_begin_fence_callback(
+        [](const char* n, const uint32_t d, uint64_t* k) {
+          *k = ++last_kernel_id;
+          found_events.push_back(
+              std::make_shared<BeginFenceEvent>(std::string(n), d, *k));
+        });
+
+    Kokkos::Tools::Experimental::set_end_fence_callback([](const uint64_t k) {
+      found_events.push_back(std::make_shared<EndFenceEvent>(k));
+    });
+  }  // profiling.fences
+  if (config.profiling.allocs) {
+    Kokkos::Tools::Experimental::set_allocate_data_callback(
+        [](Kokkos::Tools::SpaceHandle handle, const char* name,
+           EventBase::PtrHandle const ptr, const uint64_t size) {
+          found_events.push_back(std::make_shared<AllocateDataEvent>(
+              handle, std::string(name), ptr, size));
+        });
+    Kokkos::Tools::Experimental::set_deallocate_data_callback(
+        [](Kokkos::Tools::SpaceHandle handle, const char* name,
+           EventBase::PtrHandle const ptr, const uint64_t size) {
+          found_events.push_back(std::make_shared<DeallocateDataEvent>(
+              handle, std::string(name), ptr, size));
+        });
+  }
+  if (config.profiling.copies) {
+    Kokkos::Tools::Experimental::set_begin_deep_copy_callback(
+        [](Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name,
+           EventBase::PtrHandle dst_ptr, Kokkos::Tools::SpaceHandle src_handle,
+           const char* src_name, EventBase::PtrHandle src_ptr, uint64_t size) {
+          found_events.push_back(std::make_shared<BeginDeepCopyEvent>(
+              dst_handle, std::string(dst_name), dst_ptr, src_handle,
+              std::string(src_name), src_ptr, size));
+        });
+    Kokkos::Tools::Experimental::set_end_deep_copy_callback(
+        []() { found_events.push_back(std::make_shared<EndDeepCopyEvent>()); });
+  }
+  if (config.profiling.dual_view_ops) {
+    Kokkos::Tools::Experimental::set_dual_view_sync_callback(
+        [](const char* name, EventBase::PtrHandle ptr, bool is_device) {
+          found_events.push_back(std::make_shared<DualViewSyncEvent>(
+              std::string(name), ptr, is_device));
+        });
+    Kokkos::Tools::Experimental::set_dual_view_modify_callback(
+        [](const char* name, EventBase::PtrHandle ptr, bool is_device) {
+          found_events.push_back(std::make_shared<DualViewModifyEvent>(
+              std::string(name), ptr, is_device));
+        });
+  }
+  if (config.profiling.sections) {
+    Kokkos::Tools::Experimental::set_create_profile_section_callback(
+        [](const char* name, uint32_t* id) {
+          *id = (++last_section_id);
+          found_events.push_back(std::make_shared<CreateProfileSectionEvent>(
+              std::string(name), *id));
+        });
+    Kokkos::Tools::Experimental::set_destroy_profile_section_callback(
+        [](uint32_t id) {
+          found_events.push_back(
+              std::make_shared<DestroyProfileSectionEvent>(id));
+        });
+    Kokkos::Tools::Experimental::set_start_profile_section_callback(
+        [](uint32_t id) {
+          found_events.push_back(
+              std::make_shared<StartProfileSectionEvent>(id));
+        });
+    Kokkos::Tools::Experimental::set_stop_profile_section_callback(
+        [](uint32_t id) {
+          found_events.push_back(std::make_shared<StopProfileSectionEvent>(id));
+        });
+  }
+  if (config.profiling.profile_events) {
+    Kokkos::Tools::Experimental::set_profile_event_callback(
+        [](const char* name) {
+          found_events.push_back(
+              std::make_shared<ProfileEvent>(std::string(name)));
+        });
+  }
+  if (config.profiling.metadata) {
+    Kokkos::Tools::Experimental::set_declare_metadata_callback(
+        [](const char* key, const char* value) {
+          found_events.push_back(std::make_shared<DeclareMetadataEvent>(
+              std::string(key), std::string(value)));
+        });
+  }
+  if (config.tuning.contexts) {
+    Kokkos::Tools::Experimental::set_begin_context_callback(
+        [](const size_t context) {
+          found_events.push_back(std::make_shared<BeginContextEvent>(context));
+        });
+    Kokkos::Tools::Experimental::set_end_context_callback(
+        [](const size_t context,
+           Kokkos::Tools::Experimental::VariableValue value) {
+          found_events.push_back(
+              std::make_shared<EndContextEvent>(context, value));
+        });
+  }
+  if (config.tuning.type_declarations) {
+    Kokkos::Tools::Experimental::set_declare_input_type_callback(
+        [](const char* name, const size_t id,
+           Kokkos::Tools::Experimental::VariableInfo* info) {
+          found_events.push_back(std::make_shared<DeclareInputTypeEvent>(
+              std::string(name), id, *info));
+        });
+    Kokkos::Tools::Experimental::set_declare_output_type_callback(
+        [](const char* name, const size_t id,
+           Kokkos::Tools::Experimental::VariableInfo* info) {
+          found_events.push_back(std::make_shared<DeclareOutputTypeEvent>(
+              std::string(name), id, *info));
+        });
+  }
+  if (config.tuning.request_values) {
+    Kokkos::Tools::Experimental::set_request_output_values_callback(
+        [](const size_t context, const size_t num_inputs,
+           const Kokkos::Tools::Experimental::VariableValue* inputs_in,
+           const size_t num_outputs,
+           Kokkos::Tools::Experimental::VariableValue* outputs_in) {
+          std::vector<Kokkos::Tools::Experimental::VariableValue> inputs,
+              outputs;
+          std::copy(inputs_in, inputs_in + num_inputs,
+                    std::back_inserter(inputs));
+          std::copy(outputs_in, outputs_in + num_inputs,
+                    std::back_inserter(outputs));
+
+          found_events.push_back(std::make_shared<RequestOutputValuesEvent>(
+              context, num_inputs, inputs, num_outputs, outputs));
+        });
+  }
+  if (config.infrastructure.init) {
+    Kokkos::Tools::Experimental::set_init_callback(
+        [](const int loadseq, const uint64_t version, const uint32_t num_infos,
+           Kokkos::Profiling::KokkosPDeviceInfo* infos) {
+          found_events.push_back(
+              std::make_shared<InitEvent>(loadseq, version, num_infos, infos));
+        });
+  }
+  if (config.infrastructure.finalize) {
+    Kokkos::Tools::Experimental::set_finalize_callback(
+        []() { found_events.push_back(std::make_shared<FinalizeEvent>()); });
+  }
+  if (config.infrastructure.programming_interface) {
+    Kokkos::Tools::Experimental::
+        set_provide_tool_programming_interface_callback(
+            [](const uint32_t num_functions,
+               Kokkos::Tools::Experimental::ToolProgrammingInterface
+                   interface) {
+              found_events.push_back(
+                  std::make_shared<ProvideToolProgrammingInterfaceEvent>(
+                      num_functions, interface));
+            });
+  }
+  if (config.infrastructure.request_settings) {
+    Kokkos::Tools::Experimental::set_request_tool_settings_callback(
+        [](const uint32_t num_settings,
+           Kokkos::Tools::Experimental::ToolSettings* settings) {
+          found_events.push_back(std::make_shared<RequestToolSettingsEvent>(
+              num_settings, *settings));
+        });
+  }
+}
+template <int priority>
+void listen_tool_events_impl(std::integral_constant<int, priority>,
+                             ToolValidatorConfiguration&) {}
+
+template <class Config>
+void invoke_config(ToolValidatorConfiguration& in, Config conf,
+                   std::true_type) {
+  conf(in);
+}
+template <class Config>
+void invoke_config(ToolValidatorConfiguration&, Config, std::false_type) {}
+
+template <int priority, class Config, class... Configs>
+void listen_tool_events_impl(std::integral_constant<int, priority> prio,
+                             ToolValidatorConfiguration& in, Config conf,
+                             Configs... configs) {
+  invoke_config(in, conf,
+                std::integral_constant<bool, priority == conf.value>{});
+  listen_tool_events_impl(prio, in, configs...);
+}
+template <class... Configs>
+void listen_tool_events(Configs... confs) {
+  ToolValidatorConfiguration conf;
+  listen_tool_events_impl(std::integral_constant<int, 0>{}, conf, confs...);
+  listen_tool_events_impl(std::integral_constant<int, 1>{}, conf, confs...);
+  listen_tool_events_impl(std::integral_constant<int, 2>{}, conf, confs...);
+  set_tool_events_impl(conf);
+}
+
+/**
+ * @brief This is the main entry point people will use to test their programs
+ *        Given a lambda representing a code region, and a set of matchers on
+ * tools events, verify that the given lambda produces events that match those
+ * expected by the matchers
+ *
+ * @tparam Lambda Type of lam
+ * @tparam Matchers Type of matchers
+ * @param lam The code region that will produce events
+ * @param matchers Matchers for those events, lambdas that expect events and
+ * compare them
+ * @return true if all events are consumed, all matchers are invoked, and all
+ * matchers success, false otherwise
+ */
+template <class Lambda, class... Matchers>
+bool validate_event_set(const Lambda& lam, Matchers&&... matchers) {
+  // First, erase events from previous invocations
+  found_events.clear();
+  // Invoke the lambda (this will populate found_events, via tooling)
+  lam();
+  // compare the found events against the expected ones
+  auto success =
+      compare_event_vectors(found_events, std::forward<Matchers>(matchers)...);
+  if (!success) {
+    // on failure, print out the events we found
+    for (const auto& event : found_events) {
+      std::cout << event->descriptor() << std::endl;
+    }
+  }
+  return success;
+}
+/**
+ * @brief Analogous to validate_event_set up above, except rather than
+ *        comparing to matchers, this just returns the found event vector
+ *
+ * @tparam Lambda as in validate_event_set
+ * @param lam as in validate_event_set
+ * @return auto
+ */
+template <class Lambda>
+auto get_event_set(const Lambda& lam) {
+  found_events.clear();
+  lam();
+  // return compare_event_vectors(expected, found_events);
+  std::vector<EventBasePtr> events;
+  std::copy(found_events.begin(), found_events.end(),
+            std::back_inserter(events));
+  return events;
+}
+
+inline MatchDiagnostic check_presence_of(const EventBasePtr&) {
+  return {false};
+}
+template <class Matcher, class... Matchers>
+MatchDiagnostic check_presence_of(const EventBasePtr& event, const Matcher& m,
+                                  Matchers&&... args) {
+  auto tail  = check_presence_of(event, args...);
+  auto match = function_traits<Matcher>::invoke_as(m, event);
+  if (tail.success) {
+    for (const auto& entry : tail.messages) {
+      match.messages.push_back(entry);
+    }
+  }
+  match.success |= tail.success;
+  return match;
+}
+
+template <class Lambda, class... Matchers>
+bool validate_absence(const Lambda& lam, const Matchers... matchers) {
+  // First, erase events from previous invocations
+  found_events.clear();
+  // Invoke the lambda (this will populate found_events, via tooling)
+  lam();
+  // compare the found events against the expected ones
+  for (const auto& event : found_events) {
+    MatchDiagnostic match = check_presence_of(event, matchers...);
+
+    if (match.success) {
+      std::cout << "Test failure: encountered unwanted events" << std::endl;
+      for (const auto& message : match.messages) {
+        std::cout << "  " << message << std::endl;
+      }
+      // on success, print out the events we found
+      for (const auto& p_event : found_events) {
+        std::cout << p_event->descriptor() << std::endl;
+      }
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace Tools
+}  // namespace Test
+}  // namespace Kokkos
diff --git a/packages/kokkos/doc/Doxyfile b/packages/kokkos/doc/Doxyfile
index bc5c7486b27fc55ede35359b969af0a8008f960b..62a56d79fe8f3747eb33122cd1c4d1da25413c09 100644
--- a/packages/kokkos/doc/Doxyfile
+++ b/packages/kokkos/doc/Doxyfile
@@ -99,7 +99,7 @@ EXPAND_AS_DEFINED      =
 TAGFILES               = ../../common/tag_files/teuchos.tag=../../../teuchos/doc/html ../../common/tag_files/epetra.tag=../../../epetra/doc/html \
                          ../../common/tag_files/belos.tag=../../../belos/doc/html ../../common/tag_files/anasazi.tag=../../../anasazi/doc/html \
                          ../../common/tag_files/kokkos.tag=../../../kokkos/doc/html 
-GENERATE_TAGFILE       = ../../common/tag_files/tpetra.tag
+GENERATE_TAGFILE       = ../../common/tag_files/kokkos.tag
 ALLEXTERNALS           = NO
 EXTERNAL_GROUPS        = NO
 #
diff --git a/packages/kokkos/doc/index.doc b/packages/kokkos/doc/index.doc
index 27a9e4f2e7b90e11bbcde7309e9bf1544e3b386f..221645d148026b990728de0c9dab82850efceed9 100644
--- a/packages/kokkos/doc/index.doc
+++ b/packages/kokkos/doc/index.doc
@@ -17,7 +17,7 @@ once, and just change a template parameter to get the optimal data
 layout for your hardware.  %Kokkos has back-ends for the following
 parallel programming models:
 
-- Kokkos::Threads: POSIX Threads (Pthreads)
+- Kokkos::Threads: C++11 Threads (std::thread)
 - Kokkos::OpenMP: OpenMP
 - Kokkos::Cuda: NVIDIA's CUDA programming model for graphics
   processing units (GPUs)
diff --git a/packages/kokkos/example/CMakeLists.txt b/packages/kokkos/example/CMakeLists.txt
index 3db566f83f923947af5993ccb0156a5836296548..7ecaec0f241da106de7571697b94dd3987783140 100644
--- a/packages/kokkos/example/CMakeLists.txt
+++ b/packages/kokkos/example/CMakeLists.txt
@@ -8,4 +8,3 @@ KOKKOS_ADD_EXAMPLE_DIRECTORIES(query_device)
 KOKKOS_ADD_EXAMPLE_DIRECTORIES(tutorial)
 
 KOKKOS_SUBPACKAGE_POSTPROCESS()
-
diff --git a/packages/kokkos/example/build_cmake_in_tree/CMakeLists.txt b/packages/kokkos/example/build_cmake_in_tree/CMakeLists.txt
index 7217807072c7fa3fe1dd3b9d0656e9733c34bed8..89f4b997d2a76242cd1714f9b35d382931330531 100644
--- a/packages/kokkos/example/build_cmake_in_tree/CMakeLists.txt
+++ b/packages/kokkos/example/build_cmake_in_tree/CMakeLists.txt
@@ -1,6 +1,6 @@
-# Kokkos minimally requires 3.10 right now,
+# Kokkos minimally requires 3.16 right now,
 # but your project can set it higher
-cmake_minimum_required(VERSION 3.10)
+cmake_minimum_required(VERSION 3.16)
 
 # Project can mix languages - must have C++ support
 # Kokkos flags are only applied to C++ files
diff --git a/packages/kokkos/example/build_cmake_in_tree/cmake_example.cpp b/packages/kokkos/example/build_cmake_in_tree/cmake_example.cpp
index b0fd9822a492aa55a797dd76277e775a8afcbd24..3d3afdc171615dea7a61d8bd526ba3782da2937a 100644
--- a/packages/kokkos/example/build_cmake_in_tree/cmake_example.cpp
+++ b/packages/kokkos/example/build_cmake_in_tree/cmake_example.cpp
@@ -47,7 +47,7 @@
 
 int main(int argc, char* argv[]) {
   Kokkos::initialize(argc, argv);
-  Kokkos::DefaultExecutionSpace::print_configuration(std::cout);
+  Kokkos::DefaultExecutionSpace{}.print_configuration(std::cout);
 
   if (argc < 2) {
     fprintf(stderr, "Usage: %s [<kokkos_options>] <size>\n", argv[0]);
diff --git a/packages/kokkos/example/build_cmake_installed/CMakeLists.txt b/packages/kokkos/example/build_cmake_installed/CMakeLists.txt
index 7998d2914d586a8725d39992a53cfa56ed436ad5..48d2cff512b2e076f3922c751391da58a51e7f61 100644
--- a/packages/kokkos/example/build_cmake_installed/CMakeLists.txt
+++ b/packages/kokkos/example/build_cmake_installed/CMakeLists.txt
@@ -1,6 +1,6 @@
-# Kokkos minimally requires 3.10 right now,
+# Kokkos minimally requires 3.16 right now,
 # but your project can set it higher
-cmake_minimum_required(VERSION 3.10)
+cmake_minimum_required(VERSION 3.16)
 
 # Projects can safely mix languages - must have C++ support
 # Kokkos flags will only apply to C++ files
diff --git a/packages/kokkos/example/build_cmake_installed_different_compiler/bar.cpp b/packages/kokkos/example/build_cmake_installed_different_compiler/bar.cpp
index e02c2b8c688650fe3c5e0beefb5ea1ce01de2fa8..09ea0e6c4ff702e0a95724bfe7e8c79a951987f3 100644
--- a/packages/kokkos/example/build_cmake_installed_different_compiler/bar.cpp
+++ b/packages/kokkos/example/build_cmake_installed_different_compiler/bar.cpp
@@ -1,3 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
 
 #include <cstdio>
 
diff --git a/packages/kokkos/example/build_cmake_installed_kk_as_language/CMakeLists.txt b/packages/kokkos/example/build_cmake_installed_kk_as_language/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..542916371fff5648e8eed1434ab9c4c6b0bef1e6
--- /dev/null
+++ b/packages/kokkos/example/build_cmake_installed_kk_as_language/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Kokkos minimally requires 3.16 right now,
+# but your project can set it higher
+cmake_minimum_required(VERSION 3.19)
+
+# Projects can safely mix languages - must have C++ support
+# Kokkos flags will only apply to C++ files
+project(Example CXX Fortran CUDA)
+
+# Look for an installed Kokkos
+find_package(Kokkos COMPONENTS separable_compilation)
+set_source_files_properties(cmake_example.cpp PROPERTIES LANGUAGE CUDA)
+add_executable(example cmake_example.cpp bar.cpp foo.f)
+
+# This is the only thing required to set up compiler/linker flags
+target_link_libraries(example Kokkos::kokkos)
+
+enable_testing()
+add_test(NAME KokkosInTree_Verify COMMAND example 10)
diff --git a/packages/kokkos/example/build_cmake_installed_kk_as_language/bar.cpp b/packages/kokkos/example/build_cmake_installed_kk_as_language/bar.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b0280c1f450377d8602a84a3fcbeec2dbf94e4e7
--- /dev/null
+++ b/packages/kokkos/example/build_cmake_installed_kk_as_language/bar.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cstdio>
+void print_cxx() { printf("Hello From C++\n"); }
diff --git a/packages/kokkos/example/build_cmake_installed_kk_as_language/cmake_example.cpp b/packages/kokkos/example/build_cmake_installed_kk_as_language/cmake_example.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5a0f93e9db3b871c2c8bfeae55f2661dcd67213b
--- /dev/null
+++ b/packages/kokkos/example/build_cmake_installed_kk_as_language/cmake_example.cpp
@@ -0,0 +1,90 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <Kokkos_Core.hpp>
+#include <cstdio>
+
+extern "C" void print_fortran_();
+void print_cxx();
+
+struct CountEvenIntegers {
+  KOKKOS_FUNCTION void operator()(const long i, long& lcount) const {
+    lcount += (i % 2) == 0;
+  }
+};
+
+int main(int argc, char* argv[]) {
+  Kokkos::ScopeGuard guard(argc, argv);
+  Kokkos::DefaultExecutionSpace::print_configuration(std::cout);
+
+  const long n = argc > 1 ? atoi(argv[1]) : 10;
+
+  printf("Number of even integers from 0 to %ld\n", n - 1);
+
+  Kokkos::Timer timer;
+  timer.reset();
+
+  // Compute the number of even integers from 0 to n-1, in parallel.
+  long count = 0;
+  CountEvenIntegers functor;
+  Kokkos::parallel_reduce(n, functor, count);
+
+  double count_time = timer.seconds();
+  printf("  Parallel: %ld    %10.6f\n", count, count_time);
+
+  timer.reset();
+
+  // Compare to a sequential loop.
+  long seq_count = 0;
+  for (long i = 0; i < n; ++i) {
+    seq_count += (i % 2) == 0;
+  }
+
+  count_time = timer.seconds();
+  printf("Sequential: %ld    %10.6f\n", seq_count, count_time);
+
+  print_fortran_();
+
+  return (count == seq_count) ? 0 : -1;
+}
diff --git a/packages/kokkos/example/build_cmake_installed_kk_as_language/foo.f b/packages/kokkos/example/build_cmake_installed_kk_as_language/foo.f
new file mode 100644
index 0000000000000000000000000000000000000000..e618455283b65602d98a5de00c8dc2abc6b0f8c2
--- /dev/null
+++ b/packages/kokkos/example/build_cmake_installed_kk_as_language/foo.f
@@ -0,0 +1,4 @@
+        FUNCTION print_fortran()
+          PRINT *, 'Hello World from Fortran'
+          RETURN
+        END
diff --git a/packages/kokkos/example/tutorial/CMakeLists.txt b/packages/kokkos/example/tutorial/CMakeLists.txt
index fd471fa6bea712ebf64952c1eedfc8c20c597efa..efbf8614a0df9a2c1816f2f830ab0a95b2bfccd4 100644
--- a/packages/kokkos/example/tutorial/CMakeLists.txt
+++ b/packages/kokkos/example/tutorial/CMakeLists.txt
@@ -12,5 +12,3 @@ KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_hello_world_lambda)
 KOKKOS_ADD_EXAMPLE_DIRECTORIES(02_simple_reduce_lambda)
 KOKKOS_ADD_EXAMPLE_DIRECTORIES(03_simple_view_lambda)
 
-
-
diff --git a/packages/kokkos/generate_makefile.bash b/packages/kokkos/generate_makefile.bash
index 5e33f592183b9da2a7f079a09feeab6943bceebf..f86147bb9fc1dd8508dfc1ee8a3226e50aab16a7 100755
--- a/packages/kokkos/generate_makefile.bash
+++ b/packages/kokkos/generate_makefile.bash
@@ -148,7 +148,7 @@ display_help_text() {
       echo "--with-openmptarget:                  Enable OpenMPTarget backend."
       echo "--with-sycl:                          Enable Sycl backend."
       echo "--with-openmp:                        Enable OpenMP backend."
-      echo "--with-pthread:                       Enable Pthreads backend."
+      echo "--with-threads:                       Enable Threads backend."
       echo "--with-serial:                        Enable Serial backend."
       echo "--with-devices:                       Explicitly add a set of backends."
       echo ""
@@ -162,7 +162,7 @@ display_help_text() {
       echo "                 VEGA900         = AMD GPU MI25 GFX900"
       echo "                 VEGA906         = AMD GPU MI50/MI60 GFX906"
       echo "                 VEGA908         = AMD GPU MI100 GFX908"
-      echo "                 VEGA90A         = "
+      echo "                 VEGA90A         = AMD GPU MI200 GFX90A"
       echo "               [ARM]"
       echo "                 ARMV80          = ARMv8.0 Compatible CPU"
       echo "                 ARMV81          = ARMv8.1 Compatible CPU"
@@ -291,8 +291,12 @@ do
     --with-sycl)
       update_kokkos_devices Sycl
       ;;
+    --with-threads)
+      update_kokkos_devices Threads
+      ;;
     --with-pthread)
       update_kokkos_devices Pthread
+      echo "warning: The --with-pthread option is deprecated. Use --with-threads instead!"
       ;;
     --with-serial)
       update_kokkos_devices Serial
diff --git a/packages/kokkos/gnu_generate_makefile.bash b/packages/kokkos/gnu_generate_makefile.bash
index 8a463270c85548e4d14fb8c8cda9d5c478bdb96f..15a095854e9c8ad9712e9b368a911f81c1bb9163 100755
--- a/packages/kokkos/gnu_generate_makefile.bash
+++ b/packages/kokkos/gnu_generate_makefile.bash
@@ -47,6 +47,10 @@ do
       ;;
     --with-pthread)
       KOKKOS_DEVICES="${KOKKOS_DEVICES},Pthread"
+      echo "warning: The --with-pthread option is deprecated. Use --with-threads instead!"
+      ;;
+    --with-threads)
+      KOKKOS_DEVICES="${KOKKOS_DEVICES},Threads"
       ;;
     --with-serial)
       KOKKOS_DEVICES="${KOKKOS_DEVICES},Serial"
@@ -128,7 +132,7 @@ do
       echo ""
       echo "--with-cuda[=/Path/To/Cuda]:          Enable Cuda and set path to Cuda Toolkit."
       echo "--with-openmp:                        Enable OpenMP backend."
-      echo "--with-pthread:                       Enable Pthreads backend."
+      echo "--with-threads:                       Enable Threads backend."
       echo "--with-serial:                        Enable Serial backend."
       echo "--with-devices:                       Explicitly add a set of backends."
       echo ""
diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt
index 69cd133b44b013145a3a4cfa4bb8c0124ef68d73..e174b47f67c1344c2aa355e0d96ab307f41edac0 100644
--- a/packages/kokkos/master_history.txt
+++ b/packages/kokkos/master_history.txt
@@ -25,4 +25,5 @@ tag:  3.3.00     date: 12:16:2020    master: 734f577a    release: 1535ba5c
 tag:  3.3.01     date: 01:06:2021    master: 6d65b5a3    release: 4d23839c
 tag:  3.4.00     date: 04:26:2021    master: 1fb0c284    release: 5d7738d6
 tag:  3.4.01     date: 05:20:2021    master: 4b97a22f    release: 410b15c8
-tag:  3.5.00     date: 10:28:2021    master: c28a8b03    release: ddad6256
+tag:  3.5.00     date: 11:19:2021    master: c28a8b03    release: 21b879e4
+tag:  3.6.00     date: 04:14:2022    master: 2834f94a    release: 6ea708ff
diff --git a/packages/kokkos/scripts/docker/Dockerfile.clang b/packages/kokkos/scripts/docker/Dockerfile.clang
index 92999a8a44a54c22a40e717a5858a9d0dc5b7199..9df93b57545ae5fe1f0e5cb51ac46aef7d4e8de3 100644
--- a/packages/kokkos/scripts/docker/Dockerfile.clang
+++ b/packages/kokkos/scripts/docker/Dockerfile.clang
@@ -26,7 +26,7 @@ RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSIO
     wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && \
     wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && \
     gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && \
-    grep ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sha256sum --check && \
+    grep -i ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sed -e s/linux/Linux/ | sha256sum --check && \
     mkdir -p ${CMAKE_DIR} && \
     sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \
     rm cmake*
diff --git a/packages/kokkos/scripts/docker/Dockerfile.gcc b/packages/kokkos/scripts/docker/Dockerfile.gcc
index 51d50e64063b611a79a86c8bea159c6435bdc492..84c432cd2480fa9477c7889b083713186c1a9517 100644
--- a/packages/kokkos/scripts/docker/Dockerfile.gcc
+++ b/packages/kokkos/scripts/docker/Dockerfile.gcc
@@ -1,4 +1,22 @@
-FROM gcc:5.3.0
+FROM ubuntu:20.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get upgrade -y && apt-get install -y \
+    build-essential \
+    wget \
+    git \
+    bc \
+    ccache \
+    && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sources.list && \
+    echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial universe" >> /etc/apt/sources.list && \
+    apt-get update && apt-get install -y g++-5 && \
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 5 && \
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 5 && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
 
 RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \
     KEYDUMP_FILE=keydump && \
@@ -17,7 +35,7 @@ RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSIO
     wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && \
     wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && \
     gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && \
-    grep ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sha256sum --check && \
+    grep -i ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sed -e s/linux/Linux/ | sha256sum --check && \
     mkdir -p ${CMAKE_DIR} && \
     sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \
     rm cmake*
diff --git a/packages/kokkos/scripts/docker/Dockerfile.hipcc b/packages/kokkos/scripts/docker/Dockerfile.hipcc
index 5bef7f2ef814ad7420b8c0d4bdefa0961f4dd211..3274a59c34ea0f421a717540ce38a01c17279e52 100644
--- a/packages/kokkos/scripts/docker/Dockerfile.hipcc
+++ b/packages/kokkos/scripts/docker/Dockerfile.hipcc
@@ -1,4 +1,4 @@
-ARG BASE=rocm/dev-ubuntu-20.04:4.2
+ARG BASE=rocm/dev-ubuntu-20.04:4.5
 FROM $BASE
 
 RUN apt-get update && apt-get install -y \
@@ -29,7 +29,7 @@ RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSIO
     wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && \
     wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && \
     gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && \
-    grep ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sha256sum --check && \
+    grep -i ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sed -e s/linux/Linux/ | sha256sum --check && \
     mkdir -p ${CMAKE_DIR} && \
     sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \
     rm cmake*
diff --git a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject
index 3de9a7f5804f938d3c4056c723f9a25ceb189242..1df32051b40fc17f0dce457d0beb2815d8425e6d 100644
--- a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject
+++ b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject
@@ -28,7 +28,7 @@ RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSIO
     wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && \
     wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && \
     gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && \
-    grep ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sha256sum --check && \
+    grep -i ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sed -e s/linux/Linux/ | sha256sum --check && \
     mkdir -p ${CMAKE_DIR} && \
     sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \
     rm cmake*
diff --git a/packages/kokkos/scripts/docker/Dockerfile.nvcc b/packages/kokkos/scripts/docker/Dockerfile.nvcc
index 8a054066bde8e1983c9b9baf511836c88eabefa5..c3e7a875f3707b601bb3a2a8d379accea352ec5f 100644
--- a/packages/kokkos/scripts/docker/Dockerfile.nvcc
+++ b/packages/kokkos/scripts/docker/Dockerfile.nvcc
@@ -29,7 +29,7 @@ RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSIO
     wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && \
     wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && \
     gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && \
-    grep ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sha256sum --check && \
+    grep -i ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sed -e s/linux/Linux/ | sha256sum --check && \
     mkdir -p ${CMAKE_DIR} && \
     sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \
     rm cmake*
diff --git a/packages/kokkos/scripts/docker/Dockerfile.nvhpc b/packages/kokkos/scripts/docker/Dockerfile.nvhpc
new file mode 100644
index 0000000000000000000000000000000000000000..3e3a32e4dd017a5be6e95ce701cfa6082d5d6b0e
--- /dev/null
+++ b/packages/kokkos/scripts/docker/Dockerfile.nvhpc
@@ -0,0 +1,25 @@
+ARG BASE=nvcr.io/nvidia/nvhpc:21.9-devel-cuda11.4-ubuntu20.04
+FROM $BASE
+
+RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \
+    KEYDUMP_FILE=keydump && \
+    wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \
+    wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \
+    gpg --import ${KEYDUMP_FILE} && \
+    gpg --verify ${KEYDUMP_FILE}.sig ${KEYDUMP_FILE} && \
+    rm ${KEYDUMP_FILE}*
+
+ARG CMAKE_VERSION=3.21.4
+ENV CMAKE_DIR=/opt/cmake
+RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && \
+    CMAKE_SCRIPT=cmake-${CMAKE_VERSION}-Linux-x86_64.sh && \
+    CMAKE_SHA256=cmake-${CMAKE_VERSION}-SHA-256.txt && \
+    wget --quiet ${CMAKE_URL}/${CMAKE_SHA256} && \
+    wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && \
+    wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && \
+    gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && \
+    grep -i ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sed -e s/linux/Linux/ | sha256sum --check && \
+    mkdir -p ${CMAKE_DIR} && \
+    sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \
+    rm cmake*
+ENV PATH=${CMAKE_DIR}/bin:$PATH
diff --git a/packages/kokkos/scripts/docker/Dockerfile.openmptarget b/packages/kokkos/scripts/docker/Dockerfile.openmptarget
index 5a676ca32a484dec5ee6e89ca3b4acf21acbbd81..0599c60857927a68710404a9f3ce597292a58c2f 100644
--- a/packages/kokkos/scripts/docker/Dockerfile.openmptarget
+++ b/packages/kokkos/scripts/docker/Dockerfile.openmptarget
@@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y \
         ccache \
         python3 \
         libelf-dev \
+        g++-multilib \
         && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
@@ -31,15 +32,15 @@ RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSIO
     wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && \
     wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && \
     gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && \
-    grep ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sha256sum --check && \
+    grep -i ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sed -e s/linux/Linux/ | sha256sum --check && \
     mkdir -p ${CMAKE_DIR} && \
     sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \
     rm ${CMAKE_SCRIPT}
 ENV PATH=${CMAKE_DIR}/bin:$PATH
 
+ARG LLVM_VERSION=llvmorg-13.0.1-rc3
 ENV LLVM_DIR=/opt/llvm
-RUN LLVM_VERSION=887c7660bdf3f300bd1997dcfd7ace91787c0584 && \
-    LLVM_URL=https://github.com/llvm/llvm-project/archive &&\
+RUN LLVM_URL=https://github.com/llvm/llvm-project/archive &&\
     LLVM_ARCHIVE=${LLVM_VERSION}.tar.gz &&\
     SCRATCH_DIR=/scratch && mkdir -p ${SCRATCH_DIR} && cd ${SCRATCH_DIR} && \
     wget --quiet ${LLVM_URL}/${LLVM_ARCHIVE} && \
@@ -52,22 +53,13 @@ RUN LLVM_VERSION=887c7660bdf3f300bd1997dcfd7ace91787c0584 && \
       -DCMAKE_INSTALL_PREFIX=$LLVM_DIR \
       -DCMAKE_C_COMPILER=gcc \
       -DCMAKE_CXX_COMPILER=g++ \
-      -DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi;openmp" \
+      -DLLVM_ENABLE_PROJECTS="clang" \
+      -DLLVM_ENABLE_RUNTIMES="openmp" \
       -DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_70 \
       -DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=70 \
     ../llvm && \
     make -j${NPROC} && \
     make install && \
-    rm -rf ../build/* && \
-    cmake \
-      -DCMAKE_BUILD_TYPE=Release \
-      -DCMAKE_INSTALL_PREFIX=$LLVM_DIR \
-      -DCMAKE_C_COMPILER=$LLVM_DIR/bin/clang \
-      -DCMAKE_CXX_COMPILER=$LLVM_DIR/bin/clang++ \
-      -DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=70 \
-    ../openmp && \
-    make -j${NPROC} && \
-    make install && \
-    echo "${LLVM_DIR}/lib" > /etc/ld.so.conf.d/llvm.conf && ldconfig && \
     rm -rf ${SCRATCH_DIR}
 ENV PATH=${LLVM_DIR}/bin:$PATH
+ENV LD_LIBRARY_PATH=${LLVM_DIR}/lib:$LD_LIBRARY_PATH
diff --git a/packages/kokkos/scripts/docker/Dockerfile.sycl b/packages/kokkos/scripts/docker/Dockerfile.sycl
index 3393d0da8a7f257f71d79eb5dfd9f76b5bfd6a31..8f08fe2e528f93b09a8c1a1df4624b3a040f07d6 100644
--- a/packages/kokkos/scripts/docker/Dockerfile.sycl
+++ b/packages/kokkos/scripts/docker/Dockerfile.sycl
@@ -29,15 +29,15 @@ RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSIO
     wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && \
     wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && \
     gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && \
-    grep ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sha256sum --check && \
+    grep -i ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sed -e s/linux/Linux/ | sha256sum --check && \
     mkdir -p ${CMAKE_DIR} && \
     sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \
     rm cmake*
 ENV PATH=${CMAKE_DIR}/bin:$PATH
 
 ENV SYCL_DIR=/opt/sycl
-RUN SYCL_VERSION=20210621 && \
-    SYCL_URL=https://github.com/intel/llvm/archive/sycl-nightly && \
+RUN SYCL_VERSION=2021-09 && \
+    SYCL_URL=https://github.com/intel/llvm/archive/ && \
     SYCL_ARCHIVE=${SYCL_VERSION}.tar.gz && \
     SCRATCH_DIR=/scratch && mkdir -p ${SCRATCH_DIR} && cd ${SCRATCH_DIR} && \
     wget --quiet ${SYCL_URL}/${SYCL_ARCHIVE} && \
diff --git a/packages/kokkos/scripts/testing_scripts/test_all_sandia b/packages/kokkos/scripts/testing_scripts/test_all_sandia
index 3e0295643e48b8af85a8aa39874545f7340b157b..b2c5afe23793c16c5e40c781d2422d358252eded 100755
--- a/packages/kokkos/scripts/testing_scripts/test_all_sandia
+++ b/packages/kokkos/scripts/testing_scripts/test_all_sandia
@@ -91,10 +91,6 @@ if [[ "$HOSTNAME" == *blake* ]]; then # Warning: very generic name
   module load git
 fi
 
-if [[ "$HOSTNAME" == apollo\.* ]]; then
-  MACHINE=apollo
-fi
-
 if [[ "$HOSTNAME" == kokkos-dev-2* ]]; then
   MACHINE=kokkos-dev-2
 fi
@@ -123,7 +119,6 @@ fi
 if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
   if [[ "$MACHINE" = "" ]]; then
     MACHINE=sems
-    module load sems-git
   fi  
 fi
 
@@ -271,15 +266,18 @@ fi
 #
 
 if [ "$MACHINE" = "sems" ]; then
-  source /projects/sems/modulefiles/utils/sems-modules-init.sh
+  source /projects/sems/modulefiles/utils/sems-archive-modules-init.sh
 
   # On unnamed sems machines, assume more restricted rhel7 environment
   # On rhel7 sems machines gcc/7.3.0, clang/4.0.1, and intel/16.0.3 are missing
   # Remove kokkkos-env module use
 
-  module load sems-cmake/3.17.1
-  BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
-  CUDA9_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.2.0"
+  module load sems-archive-cmake/3.17.1
+  BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  OLDINTEL_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/6.4.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  INTEL_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CLANG_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/9.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CUDA9_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
   SKIP_HWLOC=True
   # No sems hwloc module
 
@@ -291,7 +289,7 @@ if [ "$MACHINE" = "sems" ]; then
     # Format: (compiler module-list build-list exe-name warning-flag)
     COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS"
                "gcc/7.2.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
-               "intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.1 $INTEL_BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
                "cuda/9.2 $CUDA9_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
     )
   else
@@ -303,23 +301,26 @@ if [ "$MACHINE" = "sems" ]; then
                "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/8.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/9.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "clang/5.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/7.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/9.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/10.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/18.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/19.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "clang/5.0.1 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/7.0.1 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/9.0.0 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/10.0.0 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "intel/17.0.1 $OLDINTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/18.0.5 $OLDINTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/19.0.5 $INTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "cuda/9.2 $CUDA9_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
     )
   fi
 elif [ "$MACHINE" = "sogpu" ]; then
-  source /projects/sems/modulefiles/utils/sems-modules-init.sh
-
-  module load sems-cmake/3.17.1 sems-git
-  BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
-  CUDA_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.2.0"
-  CUDA11_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/8.3.0"
+  source /projects/sems/modulefiles/utils/sems-archive-modules-init.sh
+
+  module load sems-archive-cmake/3.17.1 sems-archive-git
+  BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  OLDINTEL_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/6.4.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  INTEL_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CLANG_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/9.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CUDA_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CUDA11_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/8.3.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
   SKIP_HWLOC=True
   # No sems hwloc module
 
@@ -335,25 +336,28 @@ elif [ "$MACHINE" = "sogpu" ]; then
                "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/8.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/9.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "clang/5.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/7.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/9.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/10.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/18.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/19.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "clang/5.0.1 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/7.0.1 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/9.0.0 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/10.0.0 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "intel/17.0.1 $OLDINTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/18.0.5 $OLDINTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/19.0.5 $INTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "cuda/10.1 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
                "cuda/11.1 $CUDA11_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
               )
 elif [ "$MACHINE" = "kokkos-dev" ]; then
-  source /projects/sems/modulefiles/utils/sems-modules-init.sh
-
-  module load sems-cmake/3.17.1
-  BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
-  CUDA9_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/6.1.0"
-  CUDA10_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.2.0"
-  CUDA11_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/9.2.0"
-  CLANG7_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-cuda/9.2"
+  source /projects/sems/modulefiles/utils/sems-archive-modules-init.sh
+
+  module load sems-archive-cmake/3.17.1
+  BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  OLDINTEL_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/6.4.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  INTEL_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CLANG_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/9.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CUDA9_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/6.1.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CUDA10_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CUDA11_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/9.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CLANG7_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.2.0,sems-archive-cuda/9.2,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
   SKIP_HWLOC=True
 
   if [ -z "$ARCH_FLAG" ]; then
@@ -364,9 +368,9 @@ elif [ "$MACHINE" = "kokkos-dev" ]; then
     # Format: (compiler module-list build-list exe-name warning-flag)
     COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS"
                "gcc/7.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
-               "intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
-               "intel/18.0.5 $BASE_MODULE_LIST "Serial" icpc $INTEL_WARNING_FLAGS"
-               "intel/19.0.5 $BASE_MODULE_LIST "Pthread_Serial" icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.1 $OLDINTEL_BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
+               "intel/18.0.5 $OLDINTEL_BASE_MODULE_LIST "Serial" icpc $INTEL_WARNING_FLAGS"
+               "intel/19.0.5 $INTEL_BASE_MODULE_LIST "Pthread_Serial" icpc $INTEL_WARNING_FLAGS"
                "clang/5.0.1 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
                "clang/7.0.1 $CLANG7_MODULE_LIST "Cuda_OpenMP" clang++ $CLANG_WARNING_FLAGS"
                "cuda/9.2 $CUDA9_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
@@ -378,13 +382,13 @@ elif [ "$MACHINE" = "kokkos-dev" ]; then
                "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/8.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/9.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/18.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/19.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.1 $OLDINTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/18.0.5 $OLDINTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/19.0.5 $INTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "clang/5.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
                "clang/7.0.1 $CLANG7_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/9.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/10.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/9.0.0 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/10.0.0 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
                "cuda/10.1 $CUDA10_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
                "cuda/11.1 $CUDA11_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
                "cuda/9.2 $CUDA9_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
@@ -508,8 +512,8 @@ elif [ "$MACHINE" = "caraway" ]; then
   HIPCLANG_WARNING_FLAGS="-Werror -Wno-unused-command-line-argument -DNDEBUG"
 
   # Format: (compiler module-list build-list exe-name warning-flag)
-  COMPILERS=("rocm/4.2.0 $BASE_MODULE_LIST $HIPCLANG_BUILD_LIST hipcc $HIPCLANG_WARNING_FLAGS"
-             "rocm/4.3.0 $BASE_MODULE_LIST $HIPCLANG_BUILD_LIST hipcc $HIPCLANG_WARNING_FLAGS"
+  COMPILERS=("rocm/4.3.0 $BASE_MODULE_LIST $HIPCLANG_BUILD_LIST hipcc $HIPCLANG_WARNING_FLAGS"
+             "rocm/4.5.0 $BASE_MODULE_LIST $HIPCLANG_BUILD_LIST hipcc $HIPCLANG_WARNING_FLAGS"
   )
 
   if [ -z "$ARCH_FLAG" ]; then
@@ -556,76 +560,29 @@ elif [ "$MACHINE" = "blake" ]; then
     ARCH_FLAG="--arch=SKX"
   fi
 
-elif [ "$MACHINE" = "apollo" ]; then
-  source /projects/sems/modulefiles/utils/sems-modules-init.sh
-  module use /home/projects/modulefiles/local/x86-64
-
-  module load sems-git
-  module load sems-tex
-  module load sems-cmake/3.17.1
-  module load sems-gdb
-  module load binutils
-
-  SKIP_HWLOC=True
-
-  BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
-  CLANG_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>"
-  CUDA10_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
-  CUDA10X_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.3.0"
-
-  HPX3_MODULE_LIST="sems-env,sems-cmake/3.17.1,compilers/hpx/1.3.0,sems-gcc/6.1.0,binutils"
-
-  BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
-  BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread"
-  BUILD_LIST_CLANG="Serial,Pthread,OpenMP"
-
-  if [ "$SPOT_CHECK" = "True" ]; then
-    # Format: (compiler module-list build-list exe-name warning-flag)
-    COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP,Pthread,Serial" g++ $GCC_WARNING_FLAGS"
-               "hpx/1.3.0 $HPX3_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS"
-    )
-  else
-    # Format: (compiler module-list build-list exe-name warning-flag)
-    COMPILERS=("cuda/10.0 $CUDA10_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
-               "cuda/10.1 $CUDA10X_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
-               "cuda/10.2 $CUDA10X_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
-               "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
-               "clang/7.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
-               "clang/8.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
-               "hpx/1.3.0 $HPX3_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS"
-    )
-  fi
-
-  if [ -z "$ARCH_FLAG" ]; then
-    ARCH_FLAG="--arch=SNB,Volta70"
-  fi
-
 elif [ "$MACHINE" = "kokkos-dev-2" ]; then
-  source /projects/sems/modulefiles/utils/sems-modules-init.sh
+  source /projects/sems/modulefiles/utils/sems-archive-modules-init.sh
   module use /home/projects/x86-64/modulefiles/local
   module purge
-  module load sems-env
+  module load sems-archive-env
 
-  module load sems-git
-  module load sems-tex
-  module load sems-cmake/3.17.1
-  module load sems-gdb
+  module load sems-archive-git
+  module load sems-archive-tex
+  module load sems-archive-cmake/3.17.1
+  module load sems-archive-gdb
 
   SKIP_HWLOC=True
 
-  BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
-  GCC91_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>"
-  NVCC9_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
-  NVCC_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.3.0"
-  NVCC_SEMSMODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.3.0"
-  NVCC11_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/9.2.0"
-
-  CLANG8_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/10.0"
-  PGI_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-gcc/7.3.0,<COMPILER_NAME>/<COMPILER_VERSION>"
+  BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  OLDINTEL_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/6.4.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  INTEL_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CLANG_BASE_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/9.2.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CLANG8_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.2.0,cuda/10.0,<COMPILER_NAME>/<COMPILER_VERSION>"
+  GCC91_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>"
+  NVCC9_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/5.3.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  NVCC_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.3.0,<COMPILER_NAME>/<COMPILER_VERSION>"
+  NVCC_SEMSMODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/7.3.0,sems-archive-<COMPILER_NAME>/<COMPILER_VERSION>"
+  NVCC11_MODULE_LIST="sems-archive-env,sems-archive-cmake/3.17.1,sems-archive-gcc/9.2.0,<COMPILER_NAME>/<COMPILER_VERSION>"
 
   BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_Pthread"
   BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_OpenMP"
@@ -636,7 +593,7 @@ elif [ "$MACHINE" = "kokkos-dev-2" ]; then
     COMPILERS=("gcc/7.3.0 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
                "gcc/8.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS"
                "gcc/9.1 $GCC91_MODULE_LIST "OpenMP,Serial" g++ $GCC_WARNING_FLAGS"
-               "intel/18.0.5 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
+               "intel/18.0.5 $OLDINTEL_BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
                "clang/8.0 $CLANG8_MODULE_LIST "Cuda_OpenMP,Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
                "cuda/10.1 $NVCC_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
     )
@@ -657,14 +614,13 @@ elif [ "$MACHINE" = "kokkos-dev-2" ]; then
                "gcc/8.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/9.1 $GCC91_MODULE_LIST "$GCC_BUILD_LIST" g++ $GCC_WARNING_FLAGS"
                "gcc/9.2.0 $BASE_MODULE_LIST "$GCC_BUILD_LIST" g++ $GCC_WARNING_FLAGS"
-               "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/18.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/19.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "clang/5.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/7.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/9.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/10.0.0 $BASE_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
-               "pgi/19.4 $PGI_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS"
+               "intel/17.0.1 $OLDINTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/18.0.5 $OLDINTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/19.0.5 $INTEL_BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "clang/5.0.1 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/7.0.1 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/9.0.0 $CLANG_BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/10.0.0 $CLANG_BASE_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
     )
   fi
 
diff --git a/packages/kokkos/scripts/testing_scripts/update_lib.sh b/packages/kokkos/scripts/testing_scripts/update_lib.sh
index ee2f66dc407a76dd7f70c6c14b789fd42584fb11..39335f4b231b8fccb35465e65a7fb4bd6dc74482 100755
--- a/packages/kokkos/scripts/testing_scripts/update_lib.sh
+++ b/packages/kokkos/scripts/testing_scripts/update_lib.sh
@@ -3,38 +3,6 @@
 local machine_input="$1"
 local compiler_input="$2"
 
-check_sems_intel() {
-  ICPCVER="$(icpc --version | grep icpc | cut -d ' ' -f 3)"
-  if [[ "${ICPCVER}" = 17.* ]]; then
-    module swap sems-gcc/4.9.3 sems-gcc/6.4.0
-    module list
-  fi
-  if [[ "${ICPCVER}" = 19.* ]]; then
-    # Newer gcc needed for c++ standard beyond c++14
-    module swap sems-gcc/6.1.0 sems-gcc/7.2.0
-    module list
-  fi
-}
-
-check_sems_clang() {
-  CLANGVER=$(clang --version | grep "clang version" | cut -d " " -f 3)
-  if [[ "${CLANGVER}" = 9.* ]] || [[ "${CLANGVER}" = 10.* ]]; then
-    # Newer gcc needed for c++ standard beyond c++14
-    module swap sems-gcc/5.3.0 sems-gcc/8.3.0
-    module list
-  fi
-}
-
-check_compiler_modules() {
-  if [[ "$compiler_input" = clang/* ]]; then
-    echo "  clang compiler - check supporting modules"
-    check_sems_clang
-  elif [[ "$compiler_input" = intel/* ]]; then
-    echo "  intel compiler - check supporting modules"
-    check_sems_intel
-  fi
-}
-
 if [ "$machine_input" = blake ]; then
   ICPCVER="$(icpc --version | grep icpc | cut -d ' ' -f 3)"
   if [[ "${ICPCVER}" = 17.* || "${ICPCVER}" = 18.0.128 ]]; then
@@ -42,12 +10,3 @@ if [ "$machine_input" = blake ]; then
     module list
   fi
 fi
-if [ "$machine_input" = kokkos-dev ]; then
-  check_compiler_modules
-fi
-if [ "$machine_input" = kokkos-dev-2 ]; then
-  check_compiler_modules
-fi
-if [ "$machine_input" = sems ] || [ "$machine_input" = sogpu ]; then
-  check_compiler_modules
-fi
diff --git a/packages/kokkos/tpls/gtest/gtest/README b/packages/kokkos/tpls/gtest/gtest/README
index 82964ecc329b474002c66cf534999519e8fc39a3..4f75dc2db9eed9662934b0fdc015c3595849052a 100644
--- a/packages/kokkos/tpls/gtest/gtest/README
+++ b/packages/kokkos/tpls/gtest/gtest/README
@@ -1,5 +1,5 @@
-This is a fused source version of gtest 1.7.0. All that should be necessary to
-start using gtest in your package is to declare the dependency and include
+This is a fused source version of gtest (v1.11.0). All that should be necessary
+to start using gtest in your package is to declare the dependency and include
 gtest/gtest.h.
 
 However, because some of the packages that are developed in Sierra do not use a
@@ -10,4 +10,3 @@ gtest.h. This will make it possible for them find the headers while still using
 the fuse source version. This should not have any ill effects since the header is
 protected and allows for only using the non-gtest.h headers in their files.
 
-
diff --git a/packages/kokkos/tpls/gtest/gtest/gtest-all.cc b/packages/kokkos/tpls/gtest/gtest/gtest-all.cc
index 7c544a382f2a23364e521104a267cfbbf2da33fb..f4daf6b01d051e59102c1b8618c5d97cb0236907 100644
--- a/packages/kokkos/tpls/gtest/gtest/gtest-all.cc
+++ b/packages/kokkos/tpls/gtest/gtest/gtest-all.cc
@@ -26,10 +26,9 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 //
-// Author: mheule@google.com (Markus Heule)
-//
-// Google C++ Testing Framework (Google Test)
+// Google C++ Testing and Mocking Framework (Google Test)
 //
 // Sometimes it's desirable to build Google Test by compiling a single file.
 // This file serves this purpose.
@@ -67,10 +66,9 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 //
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
+// The Google C++ Testing and Mocking Framework (Google Test)
 
 // Copyright 2007, Google Inc.
 // All rights reserved.
@@ -100,15 +98,19 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
+
 //
 // Utilities for testing Google Test itself and code that uses Google Test
 // (e.g. frameworks built on top of Google Test).
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
-#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+// GOOGLETEST_CM0004 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
+
 
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
 
 namespace testing {
 
@@ -141,14 +143,15 @@ class GTEST_API_ ScopedFakeTestPartResultReporter
                                    TestPartResultArray* result);
 
   // The d'tor restores the previous test part result reporter.
-  virtual ~ScopedFakeTestPartResultReporter();
+  ~ScopedFakeTestPartResultReporter() override;
 
   // Appends the TestPartResult object to the TestPartResultArray
   // received in the constructor.
   //
   // This method is from the TestPartResultReporterInterface
   // interface.
-  virtual void ReportTestPartResult(const TestPartResult& result);
+  void ReportTestPartResult(const TestPartResult& result) override;
+
  private:
   void Init();
 
@@ -170,13 +173,12 @@ class GTEST_API_ SingleFailureChecker {
  public:
   // The constructor remembers the arguments.
   SingleFailureChecker(const TestPartResultArray* results,
-                       TestPartResult::Type type,
-                       const string& substr);
+                       TestPartResult::Type type, const std::string& substr);
   ~SingleFailureChecker();
  private:
   const TestPartResultArray* const results_;
   const TestPartResult::Type type_;
-  const string substr_;
+  const std::string substr_;
 
   GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
 };
@@ -185,6 +187,8 @@ class GTEST_API_ SingleFailureChecker {
 
 }  // namespace testing
 
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
 // A set of macros for testing Google Test assertions or code that's expected
 // to generate Google Test fatal failures.  It verifies that the given
 // statement will cause exactly one fatal Google Test failure with 'substr'
@@ -302,10 +306,9 @@ class GTEST_API_ SingleFailureChecker {
     }\
   } while (::testing::internal::AlwaysFalse())
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
 
 #include <ctype.h>
-#include <math.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -314,18 +317,19 @@ class GTEST_API_ SingleFailureChecker {
 #include <wctype.h>
 
 #include <algorithm>
+#include <chrono>  // NOLINT
+#include <cmath>
+#include <cstdint>
 #include <iomanip>
 #include <limits>
+#include <list>
+#include <map>
 #include <ostream>  // NOLINT
 #include <sstream>
 #include <vector>
 
 #if GTEST_OS_LINUX
 
-// TODO(kenton@google.com): Use autoconf to detect availability of
-// gettimeofday().
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-
 # include <fcntl.h>  // NOLINT
 # include <limits.h>  // NOLINT
 # include <sched.h>  // NOLINT
@@ -336,12 +340,7 @@ class GTEST_API_ SingleFailureChecker {
 # include <unistd.h>  // NOLINT
 # include <string>
 
-#elif GTEST_OS_SYMBIAN
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-# include <sys/time.h>  // NOLINT
-
 #elif GTEST_OS_ZOS
-# define GTEST_HAS_GETTIMEOFDAY_ 1
 # include <sys/time.h>  // NOLINT
 
 // On z/OS we additionally need strings.h for strcasecmp.
@@ -350,36 +349,28 @@ class GTEST_API_ SingleFailureChecker {
 #elif GTEST_OS_WINDOWS_MOBILE  // We are on Windows CE.
 
 # include <windows.h>  // NOLINT
+# undef min
 
 #elif GTEST_OS_WINDOWS  // We are on Windows proper.
 
+# include <windows.h>  // NOLINT
+# undef min
+
+#ifdef _MSC_VER
+# include <crtdbg.h>  // NOLINT
+#endif
+
 # include <io.h>  // NOLINT
 # include <sys/timeb.h>  // NOLINT
 # include <sys/types.h>  // NOLINT
 # include <sys/stat.h>  // NOLINT
 
 # if GTEST_OS_WINDOWS_MINGW
-// MinGW has gettimeofday() but not _ftime64().
-// TODO(kenton@google.com): Use autoconf to detect availability of
-//   gettimeofday().
-// TODO(kenton@google.com): There are other ways to get the time on
-//   Windows, like GetTickCount() or GetSystemTimeAsFileTime().  MinGW
-//   supports these.  consider using them instead.
-#  define GTEST_HAS_GETTIMEOFDAY_ 1
 #  include <sys/time.h>  // NOLINT
 # endif  // GTEST_OS_WINDOWS_MINGW
 
-// cpplint thinks that the header is already included, so we want to
-// silence it.
-# include <windows.h>  // NOLINT
-
 #else
 
-// Assume other platforms have gettimeofday().
-// TODO(kenton@google.com): Use autoconf to detect availability of
-//   gettimeofday().
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-
 // cpplint thinks that the header is already included, so we want to
 // silence it.
 # include <sys/time.h>  // NOLINT
@@ -394,14 +385,10 @@ class GTEST_API_ SingleFailureChecker {
 #if GTEST_CAN_STREAM_RESULTS_
 # include <arpa/inet.h>  // NOLINT
 # include <netdb.h>  // NOLINT
+# include <sys/socket.h>  // NOLINT
+# include <sys/types.h>  // NOLINT
 #endif
 
-// Indicates that this translation unit is part of Google Test's
-// implementation.  It must come before gtest-internal-inl.h is
-// included, or there will be a compiler error.  This trick is to
-// prevent a user from accidentally including gtest-internal-inl.h in
-// his code.
-#define GTEST_IMPLEMENTATION_ 1
 // Copyright 2005, Google Inc.
 // All rights reserved.
 //
@@ -431,23 +418,12 @@ class GTEST_API_ SingleFailureChecker {
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// Utility functions and classes used by the Google C++ testing framework.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
+// Utility functions and classes used by the Google C++ testing framework.//
 // This file contains purely Google Test's internal implementation.  Please
 // DO NOT #INCLUDE IT IN A USER PROGRAM.
 
-#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_
-#define GTEST_SRC_GTEST_INTERNAL_INL_H_
-
-// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is
-// part of Google Test's implementation; otherwise it's undefined.
-#if !GTEST_IMPLEMENTATION_
-// A user is trying to include this from his code - just say no.
-# error "gtest-internal-inl.h is part of Google Test's internal implementation."
-# error "It must not be included except by Google Test itself."
-#endif  // GTEST_IMPLEMENTATION_
+#ifndef GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
+#define GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
 
 #ifndef _WIN32_WCE
 # include <errno.h>
@@ -457,6 +433,8 @@ class GTEST_API_ SingleFailureChecker {
 #include <string.h>  // For memmove.
 
 #include <algorithm>
+#include <cstdint>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -471,6 +449,9 @@ class GTEST_API_ SingleFailureChecker {
 #endif  // GTEST_OS_WINDOWS
 
 
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
 namespace testing {
 
 // Declares the flags.
@@ -491,28 +472,32 @@ const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests";
 const char kBreakOnFailureFlag[] = "break_on_failure";
 const char kCatchExceptionsFlag[] = "catch_exceptions";
 const char kColorFlag[] = "color";
+const char kFailFast[] = "fail_fast";
 const char kFilterFlag[] = "filter";
 const char kListTestsFlag[] = "list_tests";
 const char kOutputFlag[] = "output";
+const char kBriefFlag[] = "brief";
 const char kPrintTimeFlag[] = "print_time";
+const char kPrintUTF8Flag[] = "print_utf8";
 const char kRandomSeedFlag[] = "random_seed";
 const char kRepeatFlag[] = "repeat";
 const char kShuffleFlag[] = "shuffle";
 const char kStackTraceDepthFlag[] = "stack_trace_depth";
 const char kStreamResultToFlag[] = "stream_result_to";
 const char kThrowOnFailureFlag[] = "throw_on_failure";
+const char kFlagfileFlag[] = "flagfile";
 
 // A valid random seed must be in [1, kMaxRandomSeed].
 const int kMaxRandomSeed = 99999;
 
-// g_help_flag is true iff the --help flag or an equivalent form is
-// specified on the command line.
+// g_help_flag is true if and only if the --help flag or an equivalent form
+// is specified on the command line.
 GTEST_API_ extern bool g_help_flag;
 
 // Returns the current time in milliseconds.
 GTEST_API_ TimeInMillis GetTimeInMillis();
 
-// Returns true iff Google Test should use colors in the output.
+// Returns true if and only if Google Test should use colors in the output.
 GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);
 
 // Formats the given time in milliseconds as seconds.
@@ -529,11 +514,11 @@ GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms);
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
 GTEST_API_ bool ParseInt32Flag(
-    const char* str, const char* flag, Int32* value);
+    const char* str, const char* flag, int32_t* value);
 
 // Returns a random seed in range [1, kMaxRandomSeed] based on the
 // given --gtest_random_seed flag value.
-inline int GetRandomSeedFromFlag(Int32 random_seed_flag) {
+inline int GetRandomSeedFromFlag(int32_t random_seed_flag) {
   const unsigned int raw_seed = (random_seed_flag == 0) ?
       static_cast<unsigned int>(GetTimeInMillis()) :
       static_cast<unsigned int>(random_seed_flag);
@@ -569,11 +554,14 @@ class GTestFlagSaver {
     color_ = GTEST_FLAG(color);
     death_test_style_ = GTEST_FLAG(death_test_style);
     death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
+    fail_fast_ = GTEST_FLAG(fail_fast);
     filter_ = GTEST_FLAG(filter);
     internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
     list_tests_ = GTEST_FLAG(list_tests);
     output_ = GTEST_FLAG(output);
+    brief_ = GTEST_FLAG(brief);
     print_time_ = GTEST_FLAG(print_time);
+    print_utf8_ = GTEST_FLAG(print_utf8);
     random_seed_ = GTEST_FLAG(random_seed);
     repeat_ = GTEST_FLAG(repeat);
     shuffle_ = GTEST_FLAG(shuffle);
@@ -591,10 +579,13 @@ class GTestFlagSaver {
     GTEST_FLAG(death_test_style) = death_test_style_;
     GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
     GTEST_FLAG(filter) = filter_;
+    GTEST_FLAG(fail_fast) = fail_fast_;
     GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
     GTEST_FLAG(list_tests) = list_tests_;
     GTEST_FLAG(output) = output_;
+    GTEST_FLAG(brief) = brief_;
     GTEST_FLAG(print_time) = print_time_;
+    GTEST_FLAG(print_utf8) = print_utf8_;
     GTEST_FLAG(random_seed) = random_seed_;
     GTEST_FLAG(repeat) = repeat_;
     GTEST_FLAG(shuffle) = shuffle_;
@@ -611,15 +602,18 @@ class GTestFlagSaver {
   std::string color_;
   std::string death_test_style_;
   bool death_test_use_fork_;
+  bool fail_fast_;
   std::string filter_;
   std::string internal_run_death_test_;
   bool list_tests_;
   std::string output_;
+  bool brief_;
   bool print_time_;
-  internal::Int32 random_seed_;
-  internal::Int32 repeat_;
+  bool print_utf8_;
+  int32_t random_seed_;
+  int32_t repeat_;
   bool shuffle_;
-  internal::Int32 stack_trace_depth_;
+  int32_t stack_trace_depth_;
   std::string stream_result_to_;
   bool throw_on_failure_;
 } GTEST_ATTRIBUTE_UNUSED_;
@@ -630,11 +624,11 @@ class GTestFlagSaver {
 // If the code_point is not a valid Unicode code point
 // (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
 // to "(Invalid Unicode 0xXXXXXXXX)".
-GTEST_API_ std::string CodePointToUtf8(UInt32 code_point);
+GTEST_API_ std::string CodePointToUtf8(uint32_t code_point);
 
 // Converts a wide string to a narrow string in UTF-8 encoding.
 // The wide string is assumed to have the following encoding:
-//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
+//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin)
 //   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
 // Parameter str points to a null-terminated wide string.
 // Parameter num_chars may additionally limit the number
@@ -663,14 +657,14 @@ GTEST_API_ bool ShouldShard(const char* total_shards_str,
                             const char* shard_index_str,
                             bool in_subprocess_for_death_test);
 
-// Parses the environment variable var as an Int32. If it is unset,
-// returns default_val. If it is not an Int32, prints an error and
+// Parses the environment variable var as a 32-bit integer. If it is unset,
+// returns default_val. If it is not a 32-bit integer, prints an error and
 // and aborts.
-GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val);
+GTEST_API_ int32_t Int32FromEnvOrDie(const char* env_var, int32_t default_val);
 
 // Given the total number of shards, the shard index, and the test id,
-// returns true iff the test should be run on this shard. The test id is
-// some arbitrary but unique non-negative integer assigned to each test
+// returns true if and only if the test should be run on this shard. The test id
+// is some arbitrary but unique non-negative integer assigned to each test
 // method. Assumes that 0 <= shard_index < total_shards.
 GTEST_API_ bool ShouldRunTestOnShard(
     int total_shards, int shard_index, int test_id);
@@ -701,7 +695,8 @@ void ForEach(const Container& c, Functor functor) {
 // in range [0, v.size()).
 template <typename E>
 inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
-  return (i < 0 || i >= static_cast<int>(v.size())) ? default_value : v[i];
+  return (i < 0 || i >= static_cast<int>(v.size())) ? default_value
+                                                    : v[static_cast<size_t>(i)];
 }
 
 // Performs an in-place shuffle of a range of the vector's elements.
@@ -723,8 +718,11 @@ void ShuffleRange(internal::Random* random, int begin, int end,
   // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
   for (int range_width = end - begin; range_width >= 2; range_width--) {
     const int last_in_range = begin + range_width - 1;
-    const int selected = begin + random->Generate(range_width);
-    std::swap((*v)[selected], (*v)[last_in_range]);
+    const int selected =
+        begin +
+        static_cast<int>(random->Generate(static_cast<uint32_t>(range_width)));
+    std::swap((*v)[static_cast<size_t>(selected)],
+              (*v)[static_cast<size_t>(last_in_range)]);
   }
 }
 
@@ -751,7 +749,7 @@ class TestPropertyKeyIs {
   // TestPropertyKeyIs has NO default constructor.
   explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}
 
-  // Returns true iff the test name of test property matches on key_.
+  // Returns true if and only if the test name of test property matches on key_.
   bool operator()(const TestProperty& test_property) const {
     return test_property.key() == key_;
   }
@@ -784,17 +782,10 @@ class GTEST_API_ UnitTestOptions {
 
   // Functions for processing the gtest_filter flag.
 
-  // Returns true iff the wildcard pattern matches the string.  The
-  // first ':' or '\0' character in pattern marks the end of it.
-  //
-  // This recursive algorithm isn't very efficient, but is clear and
-  // works well enough for matching test names, which are short.
-  static bool PatternMatchesString(const char *pattern, const char *str);
-
-  // Returns true iff the user-specified filter matches the test case
-  // name and the test name.
-  static bool FilterMatchesTest(const std::string &test_case_name,
-                                const std::string &test_name);
+  // Returns true if and only if the user-specified filter matches the test
+  // suite name and the test name.
+  static bool FilterMatchesTest(const std::string& test_suite_name,
+                                const std::string& test_name);
 
 #if GTEST_OS_WINDOWS
   // Function for supporting the gtest_catch_exception flag.
@@ -826,13 +817,17 @@ class OsStackTraceGetterInterface {
   //                in the trace.
   //   skip_count - the number of top frames to be skipped; doesn't count
   //                against max_depth.
-  virtual string CurrentStackTrace(int max_depth, int skip_count) = 0;
+  virtual std::string CurrentStackTrace(int max_depth, int skip_count) = 0;
 
   // UponLeavingGTest() should be called immediately before Google Test calls
   // user code. It saves some information about the current stack that
   // CurrentStackTrace() will use to find and hide Google Test stack frames.
   virtual void UponLeavingGTest() = 0;
 
+  // This string is inserted in place of stack frames that are part of
+  // Google Test's implementation.
+  static const char* const kElidedFramesMarker;
+
  private:
   GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface);
 };
@@ -840,25 +835,21 @@ class OsStackTraceGetterInterface {
 // A working implementation of the OsStackTraceGetterInterface interface.
 class OsStackTraceGetter : public OsStackTraceGetterInterface {
  public:
-  OsStackTraceGetter() : caller_frame_(NULL) {}
-
-  virtual string CurrentStackTrace(int max_depth, int skip_count)
-      GTEST_LOCK_EXCLUDED_(mutex_);
+  OsStackTraceGetter() {}
 
-  virtual void UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_);
-
-  // This string is inserted in place of stack frames that are part of
-  // Google Test's implementation.
-  static const char* const kElidedFramesMarker;
+  std::string CurrentStackTrace(int max_depth, int skip_count) override;
+  void UponLeavingGTest() override;
 
  private:
-  Mutex mutex_;  // protects all internal state
+#if GTEST_HAS_ABSL
+  Mutex mutex_;  // Protects all internal state.
 
   // We save the stack frame below the frame that calls user code.
   // We do this because the address of the frame immediately below
   // the user code changes between the call to UponLeavingGTest()
-  // and any calls to CurrentStackTrace() from within the user code.
-  void* caller_frame_;
+  // and any calls to the stack trace code from within the user code.
+  void* caller_frame_ = nullptr;
+#endif  // GTEST_HAS_ABSL
 
   GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter);
 };
@@ -878,7 +869,7 @@ class DefaultGlobalTestPartResultReporter
   explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
   // Implements the TestPartResultReporterInterface. Reports the test part
   // result in the current test.
-  virtual void ReportTestPartResult(const TestPartResult& result);
+  void ReportTestPartResult(const TestPartResult& result) override;
 
  private:
   UnitTestImpl* const unit_test_;
@@ -894,7 +885,7 @@ class DefaultPerThreadTestPartResultReporter
   explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
   // Implements the TestPartResultReporterInterface. The implementation just
   // delegates to the current global test part result reporter of *unit_test_.
-  virtual void ReportTestPartResult(const TestPartResult& result);
+  void ReportTestPartResult(const TestPartResult& result) override;
 
  private:
   UnitTestImpl* const unit_test_;
@@ -912,9 +903,9 @@ class GTEST_API_ UnitTestImpl {
   virtual ~UnitTestImpl();
 
   // There are two different ways to register your own TestPartResultReporter.
-  // You can register your own reporter to listen either only for test results
+  // You can register your own repoter to listen either only for test results
   // from the current thread or for results from all threads.
-  // By default, each per-thread test result reporter just passes a new
+  // By default, each per-thread test result repoter just passes a new
   // TestPartResult to the global test result reporter, which registers the
   // test part result for the currently running test.
 
@@ -932,22 +923,25 @@ class GTEST_API_ UnitTestImpl {
   void SetTestPartResultReporterForCurrentThread(
       TestPartResultReporterInterface* reporter);
 
-  // Gets the number of successful test cases.
-  int successful_test_case_count() const;
+  // Gets the number of successful test suites.
+  int successful_test_suite_count() const;
 
-  // Gets the number of failed test cases.
-  int failed_test_case_count() const;
+  // Gets the number of failed test suites.
+  int failed_test_suite_count() const;
 
-  // Gets the number of all test cases.
-  int total_test_case_count() const;
+  // Gets the number of all test suites.
+  int total_test_suite_count() const;
 
-  // Gets the number of all test cases that contain at least one test
+  // Gets the number of all test suites that contain at least one test
   // that should run.
-  int test_case_to_run_count() const;
+  int test_suite_to_run_count() const;
 
   // Gets the number of successful tests.
   int successful_test_count() const;
 
+  // Gets the number of skipped tests.
+  int skipped_test_count() const;
+
   // Gets the number of failed tests.
   int failed_test_count() const;
 
@@ -973,27 +967,33 @@ class GTEST_API_ UnitTestImpl {
   // Gets the elapsed time, in milliseconds.
   TimeInMillis elapsed_time() const { return elapsed_time_; }
 
-  // Returns true iff the unit test passed (i.e. all test cases passed).
+  // Returns true if and only if the unit test passed (i.e. all test suites
+  // passed).
   bool Passed() const { return !Failed(); }
 
-  // Returns true iff the unit test failed (i.e. some test case failed
-  // or something outside of all tests failed).
+  // Returns true if and only if the unit test failed (i.e. some test suite
+  // failed or something outside of all tests failed).
   bool Failed() const {
-    return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed();
+    return failed_test_suite_count() > 0 || ad_hoc_test_result()->Failed();
   }
 
-  // Gets the i-th test case among all the test cases. i can range from 0 to
-  // total_test_case_count() - 1. If i is not in that range, returns NULL.
-  const TestCase* GetTestCase(int i) const {
-    const int index = GetElementOr(test_case_indices_, i, -1);
-    return index < 0 ? NULL : test_cases_[i];
+  // Gets the i-th test suite among all the test suites. i can range from 0 to
+  // total_test_suite_count() - 1. If i is not in that range, returns NULL.
+  const TestSuite* GetTestSuite(int i) const {
+    const int index = GetElementOr(test_suite_indices_, i, -1);
+    return index < 0 ? nullptr : test_suites_[static_cast<size_t>(i)];
   }
 
-  // Gets the i-th test case among all the test cases. i can range from 0 to
-  // total_test_case_count() - 1. If i is not in that range, returns NULL.
-  TestCase* GetMutableTestCase(int i) {
-    const int index = GetElementOr(test_case_indices_, i, -1);
-    return index < 0 ? NULL : test_cases_[index];
+  //  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  const TestCase* GetTestCase(int i) const { return GetTestSuite(i); }
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  // Gets the i-th test suite among all the test suites. i can range from 0 to
+  // total_test_suite_count() - 1. If i is not in that range, returns NULL.
+  TestSuite* GetMutableSuiteCase(int i) {
+    const int index = GetElementOr(test_suite_indices_, i, -1);
+    return index < 0 ? nullptr : test_suites_[static_cast<size_t>(index)];
   }
 
   // Provides access to the event listener list.
@@ -1030,31 +1030,40 @@ class GTEST_API_ UnitTestImpl {
   // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
   std::string CurrentOsStackTraceExceptTop(int skip_count) GTEST_NO_INLINE_;
 
-  // Finds and returns a TestCase with the given name.  If one doesn't
+  // Finds and returns a TestSuite with the given name.  If one doesn't
   // exist, creates one and returns it.
   //
   // Arguments:
   //
-  //   test_case_name: name of the test case
-  //   type_param:     the name of the test's type parameter, or NULL if
-  //                   this is not a typed or a type-parameterized test.
-  //   set_up_tc:      pointer to the function that sets up the test case
-  //   tear_down_tc:   pointer to the function that tears down the test case
-  TestCase* GetTestCase(const char* test_case_name,
-                        const char* type_param,
-                        Test::SetUpTestCaseFunc set_up_tc,
-                        Test::TearDownTestCaseFunc tear_down_tc);
+  //   test_suite_name: name of the test suite
+  //   type_param:      the name of the test's type parameter, or NULL if
+  //                    this is not a typed or a type-parameterized test.
+  //   set_up_tc:       pointer to the function that sets up the test suite
+  //   tear_down_tc:    pointer to the function that tears down the test suite
+  TestSuite* GetTestSuite(const char* test_suite_name, const char* type_param,
+                          internal::SetUpTestSuiteFunc set_up_tc,
+                          internal::TearDownTestSuiteFunc tear_down_tc);
+
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  TestCase* GetTestCase(const char* test_case_name, const char* type_param,
+                        internal::SetUpTestSuiteFunc set_up_tc,
+                        internal::TearDownTestSuiteFunc tear_down_tc) {
+    return GetTestSuite(test_case_name, type_param, set_up_tc, tear_down_tc);
+  }
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   // Adds a TestInfo to the unit test.
   //
   // Arguments:
   //
-  //   set_up_tc:    pointer to the function that sets up the test case
-  //   tear_down_tc: pointer to the function that tears down the test case
+  //   set_up_tc:    pointer to the function that sets up the test suite
+  //   tear_down_tc: pointer to the function that tears down the test suite
   //   test_info:    the TestInfo object
-  void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc,
-                   Test::TearDownTestCaseFunc tear_down_tc,
+  void AddTestInfo(internal::SetUpTestSuiteFunc set_up_tc,
+                   internal::TearDownTestSuiteFunc tear_down_tc,
                    TestInfo* test_info) {
+#if GTEST_HAS_DEATH_TEST
     // In order to support thread-safe death tests, we need to
     // remember the original working directory when the test program
     // was first invoked.  We cannot do this in RUN_ALL_TESTS(), as
@@ -1067,24 +1076,33 @@ class GTEST_API_ UnitTestImpl {
       GTEST_CHECK_(!original_working_dir_.IsEmpty())
           << "Failed to get the current working directory.";
     }
+#endif  // GTEST_HAS_DEATH_TEST
 
-    GetTestCase(test_info->test_case_name(),
-                test_info->type_param(),
-                set_up_tc,
-                tear_down_tc)->AddTestInfo(test_info);
+    GetTestSuite(test_info->test_suite_name(), test_info->type_param(),
+                 set_up_tc, tear_down_tc)
+        ->AddTestInfo(test_info);
   }
 
-#if GTEST_HAS_PARAM_TEST
-  // Returns ParameterizedTestCaseRegistry object used to keep track of
+  // Returns ParameterizedTestSuiteRegistry object used to keep track of
   // value-parameterized tests and instantiate and register them.
-  internal::ParameterizedTestCaseRegistry& parameterized_test_registry() {
+  internal::ParameterizedTestSuiteRegistry& parameterized_test_registry() {
     return parameterized_test_registry_;
   }
-#endif  // GTEST_HAS_PARAM_TEST
 
-  // Sets the TestCase object for the test that's currently running.
-  void set_current_test_case(TestCase* a_current_test_case) {
-    current_test_case_ = a_current_test_case;
+  std::set<std::string>* ignored_parameterized_test_suites() {
+    return &ignored_parameterized_test_suites_;
+  }
+
+  // Returns TypeParameterizedTestSuiteRegistry object used to keep track of
+  // type-parameterized tests and instantiations of them.
+  internal::TypeParameterizedTestSuiteRegistry&
+  type_parameterized_test_registry() {
+    return type_parameterized_test_registry_;
+  }
+
+  // Sets the TestSuite object for the test that's currently running.
+  void set_current_test_suite(TestSuite* a_current_test_suite) {
+    current_test_suite_ = a_current_test_suite;
   }
 
   // Sets the TestInfo object for the test that's currently running.  If
@@ -1095,7 +1113,7 @@ class GTEST_API_ UnitTestImpl {
   }
 
   // Registers all parameterized tests defined using TEST_P and
-  // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter
+  // INSTANTIATE_TEST_SUITE_P, creating regular tests for each test/parameter
   // combination. This method can be called more then once; it has guards
   // protecting from registering the tests more then once.  If
   // value-parameterized tests are disabled, RegisterParameterizedTests is
@@ -1110,7 +1128,7 @@ class GTEST_API_ UnitTestImpl {
 
   // Clears the results of all tests, except the ad hoc tests.
   void ClearNonAdHocTestResult() {
-    ForEach(test_cases_, TestCase::ClearTestCaseResult);
+    ForEach(test_suites_, TestSuite::ClearTestSuiteResult);
   }
 
   // Clears the results of ad-hoc test assertions.
@@ -1119,7 +1137,7 @@ class GTEST_API_ UnitTestImpl {
   }
 
   // Adds a TestProperty to the current TestResult object when invoked in a
-  // context of a test or a test case, or to the global property set. If the
+  // context of a test or a test suite, or to the global property set. If the
   // result already contains a property with the same key, the value will be
   // updated.
   void RecordProperty(const TestProperty& test_property);
@@ -1131,7 +1149,7 @@ class GTEST_API_ UnitTestImpl {
 
   // Matches the full name of each test against the user-specified
   // filter to decide whether the test should run, then records the
-  // result in each TestCase and TestInfo object.
+  // result in each TestSuite and TestInfo object.
   // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests
   // based on sharding variables in the environment.
   // Returns the number of tests that should run.
@@ -1140,7 +1158,7 @@ class GTEST_API_ UnitTestImpl {
   // Prints the names of the tests matching the user-specified filter flag.
   void ListTestsMatchingFilter();
 
-  const TestCase* current_test_case() const { return current_test_case_; }
+  const TestSuite* current_test_suite() const { return current_test_suite_; }
   TestInfo* current_test_info() { return current_test_info_; }
   const TestInfo* current_test_info() const { return current_test_info_; }
 
@@ -1201,11 +1219,11 @@ class GTEST_API_ UnitTestImpl {
   // Gets the random number generator.
   internal::Random* random() { return &random_; }
 
-  // Shuffles all test cases, and the tests within each test case,
+  // Shuffles all test suites, and the tests within each test suite,
   // making sure that death tests are still run first.
   void ShuffleTests();
 
-  // Restores the test cases and tests to their order before the first shuffle.
+  // Restores the test suites and tests to their order before the first shuffle.
   void UnshuffleTests();
 
   // Returns the value of GTEST_FLAG(catch_exceptions) at the moment
@@ -1245,33 +1263,37 @@ class GTEST_API_ UnitTestImpl {
   // before/after the tests are run.
   std::vector<Environment*> environments_;
 
-  // The vector of TestCases in their original order.  It owns the
+  // The vector of TestSuites in their original order.  It owns the
   // elements in the vector.
-  std::vector<TestCase*> test_cases_;
+  std::vector<TestSuite*> test_suites_;
 
-  // Provides a level of indirection for the test case list to allow
-  // easy shuffling and restoring the test case order.  The i-th
-  // element of this vector is the index of the i-th test case in the
+  // Provides a level of indirection for the test suite list to allow
+  // easy shuffling and restoring the test suite order.  The i-th
+  // element of this vector is the index of the i-th test suite in the
   // shuffled order.
-  std::vector<int> test_case_indices_;
+  std::vector<int> test_suite_indices_;
 
-#if GTEST_HAS_PARAM_TEST
   // ParameterizedTestRegistry object used to register value-parameterized
   // tests.
-  internal::ParameterizedTestCaseRegistry parameterized_test_registry_;
+  internal::ParameterizedTestSuiteRegistry parameterized_test_registry_;
+  internal::TypeParameterizedTestSuiteRegistry
+      type_parameterized_test_registry_;
+
+  // The set holding the name of parameterized
+  // test suites that may go uninstantiated.
+  std::set<std::string> ignored_parameterized_test_suites_;
 
   // Indicates whether RegisterParameterizedTests() has been called already.
   bool parameterized_tests_registered_;
-#endif  // GTEST_HAS_PARAM_TEST
 
-  // Index of the last death test case registered.  Initially -1.
-  int last_death_test_case_;
+  // Index of the last death test suite registered.  Initially -1.
+  int last_death_test_suite_;
 
-  // This points to the TestCase for the currently running test.  It
-  // changes as Google Test goes through one test case after another.
+  // This points to the TestSuite for the currently running test.  It
+  // changes as Google Test goes through one test suite after another.
   // When no test is running, this is set to NULL and Google Test
   // stores assertion results in ad_hoc_test_result_.  Initially NULL.
-  TestCase* current_test_case_;
+  TestSuite* current_test_suite_;
 
   // This points to the TestInfo for the currently running test.  It
   // changes as Google Test goes through one test after another.  When
@@ -1299,7 +1321,7 @@ class GTEST_API_ UnitTestImpl {
   // desired.
   OsStackTraceGetterInterface* os_stack_trace_getter_;
 
-  // True iff PostFlagParsingInit() has been called.
+  // True if and only if PostFlagParsingInit() has been called.
   bool post_flag_parse_init_performed_;
 
   // The random number seed used at the beginning of the test run.
@@ -1318,8 +1340,8 @@ class GTEST_API_ UnitTestImpl {
 #if GTEST_HAS_DEATH_TEST
   // The decomposed components of the gtest_internal_run_death_test flag,
   // parsed when RUN_ALL_TESTS is called.
-  internal::scoped_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
-  internal::scoped_ptr<internal::DeathTestFactory> death_test_factory_;
+  std::unique_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
+  std::unique_ptr<internal::DeathTestFactory> death_test_factory_;
 #endif  // GTEST_HAS_DEATH_TEST
 
   // A per-thread stack of traces created by the SCOPED_TRACE() macro.
@@ -1369,32 +1391,6 @@ GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);
 // platform.
 GTEST_API_ std::string GetLastErrnoDescription();
 
-# if GTEST_OS_WINDOWS
-// Provides leak-safe Windows kernel handle ownership.
-class AutoHandle {
- public:
-  AutoHandle() : handle_(INVALID_HANDLE_VALUE) {}
-  explicit AutoHandle(HANDLE handle) : handle_(handle) {}
-
-  ~AutoHandle() { Reset(); }
-
-  HANDLE Get() const { return handle_; }
-  void Reset() { Reset(INVALID_HANDLE_VALUE); }
-  void Reset(HANDLE handle) {
-    if (handle != handle_) {
-      if (handle_ != INVALID_HANDLE_VALUE)
-        ::CloseHandle(handle_);
-      handle_ = handle;
-    }
-  }
-
- private:
-  HANDLE handle_;
-
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
-};
-# endif  // GTEST_OS_WINDOWS
-
 // Attempts to parse a string into a positive integer pointed to by the
 // number parameter.  Returns true if that is possible.
 // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
@@ -1412,24 +1408,11 @@ bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
   char* end;
   // BiggestConvertible is the largest integer type that system-provided
   // string-to-number conversion routines can return.
+  using BiggestConvertible = unsigned long long;  // NOLINT
 
-# if GTEST_OS_WINDOWS && !defined(__GNUC__)
-
-  // MSVC and C++ Builder define __int64 instead of the standard long long.
-  typedef unsigned __int64 BiggestConvertible;
-  const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10);
-
-# else
-
-  typedef unsigned long long BiggestConvertible;  // NOLINT
-  const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);
-
-# endif  // GTEST_OS_WINDOWS && !defined(__GNUC__)
-
+  const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);  // NOLINT
   const bool parse_success = *end == '\0' && errno == 0;
 
-  // TODO(vladl@google.com): Convert this to compile time assertion when it is
-  // available.
   GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));
 
   const Integer result = static_cast<Integer>(parsed);
@@ -1476,37 +1459,35 @@ class StreamingListener : public EmptyTestEventListener {
     virtual ~AbstractSocketWriter() {}
 
     // Sends a string to the socket.
-    virtual void Send(const string& message) = 0;
+    virtual void Send(const std::string& message) = 0;
 
     // Closes the socket.
     virtual void CloseConnection() {}
 
     // Sends a string and a newline to the socket.
-    void SendLn(const string& message) {
-      Send(message + "\n");
-    }
+    void SendLn(const std::string& message) { Send(message + "\n"); }
   };
 
   // Concrete class for actually writing strings to a socket.
   class SocketWriter : public AbstractSocketWriter {
    public:
-    SocketWriter(const string& host, const string& port)
+    SocketWriter(const std::string& host, const std::string& port)
         : sockfd_(-1), host_name_(host), port_num_(port) {
       MakeConnection();
     }
 
-    virtual ~SocketWriter() {
+    ~SocketWriter() override {
       if (sockfd_ != -1)
         CloseConnection();
     }
 
     // Sends a string to the socket.
-    virtual void Send(const string& message) {
+    void Send(const std::string& message) override {
       GTEST_CHECK_(sockfd_ != -1)
           << "Send() can be called only when there is a connection.";
 
-      const int len = static_cast<int>(message.length());
-      if (write(sockfd_, message.c_str(), len) != len) {
+      const auto len = static_cast<size_t>(message.length());
+      if (write(sockfd_, message.c_str(), len) != static_cast<ssize_t>(len)) {
         GTEST_LOG_(WARNING)
             << "stream_result_to: failed to stream to "
             << host_name_ << ":" << port_num_;
@@ -1518,7 +1499,7 @@ class StreamingListener : public EmptyTestEventListener {
     void MakeConnection();
 
     // Closes the socket.
-    void CloseConnection() {
+    void CloseConnection() override {
       GTEST_CHECK_(sockfd_ != -1)
           << "CloseConnection() can be called only when there is a connection.";
 
@@ -1527,26 +1508,28 @@ class StreamingListener : public EmptyTestEventListener {
     }
 
     int sockfd_;  // socket file descriptor
-    const string host_name_;
-    const string port_num_;
+    const std::string host_name_;
+    const std::string port_num_;
 
     GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter);
   };  // class SocketWriter
 
   // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
-  static string UrlEncode(const char* str);
+  static std::string UrlEncode(const char* str);
 
-  StreamingListener(const string& host, const string& port)
-      : socket_writer_(new SocketWriter(host, port)) { Start(); }
+  StreamingListener(const std::string& host, const std::string& port)
+      : socket_writer_(new SocketWriter(host, port)) {
+    Start();
+  }
 
   explicit StreamingListener(AbstractSocketWriter* socket_writer)
       : socket_writer_(socket_writer) { Start(); }
 
-  void OnTestProgramStart(const UnitTest& /* unit_test */) {
+  void OnTestProgramStart(const UnitTest& /* unit_test */) override {
     SendLn("event=TestProgramStart");
   }
 
-  void OnTestProgramEnd(const UnitTest& unit_test) {
+  void OnTestProgramEnd(const UnitTest& unit_test) override {
     // Note that Google Test current only report elapsed time for each
     // test iteration, not for the entire test program.
     SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed()));
@@ -1555,42 +1538,47 @@ class StreamingListener : public EmptyTestEventListener {
     socket_writer_->CloseConnection();
   }
 
-  void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) {
+  void OnTestIterationStart(const UnitTest& /* unit_test */,
+                            int iteration) override {
     SendLn("event=TestIterationStart&iteration=" +
            StreamableToString(iteration));
   }
 
-  void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) {
+  void OnTestIterationEnd(const UnitTest& unit_test,
+                          int /* iteration */) override {
     SendLn("event=TestIterationEnd&passed=" +
            FormatBool(unit_test.Passed()) + "&elapsed_time=" +
            StreamableToString(unit_test.elapsed_time()) + "ms");
   }
 
-  void OnTestCaseStart(const TestCase& test_case) {
+  // Note that "event=TestCaseStart" is a wire format and has to remain
+  // "case" for compatibility
+  void OnTestCaseStart(const TestCase& test_case) override {
     SendLn(std::string("event=TestCaseStart&name=") + test_case.name());
   }
 
-  void OnTestCaseEnd(const TestCase& test_case) {
-    SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed())
-           + "&elapsed_time=" + StreamableToString(test_case.elapsed_time())
-           + "ms");
+  // Note that "event=TestCaseEnd" is a wire format and has to remain
+  // "case" for compatibility
+  void OnTestCaseEnd(const TestCase& test_case) override {
+    SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed()) +
+           "&elapsed_time=" + StreamableToString(test_case.elapsed_time()) +
+           "ms");
   }
 
-  void OnTestStart(const TestInfo& test_info) {
+  void OnTestStart(const TestInfo& test_info) override {
     SendLn(std::string("event=TestStart&name=") + test_info.name());
   }
 
-  void OnTestEnd(const TestInfo& test_info) {
+  void OnTestEnd(const TestInfo& test_info) override {
     SendLn("event=TestEnd&passed=" +
            FormatBool((test_info.result())->Passed()) +
            "&elapsed_time=" +
            StreamableToString((test_info.result())->elapsed_time()) + "ms");
   }
 
-  void OnTestPartResult(const TestPartResult& test_part_result) {
+  void OnTestPartResult(const TestPartResult& test_part_result) override {
     const char* file_name = test_part_result.file_name();
-    if (file_name == NULL)
-      file_name = "";
+    if (file_name == nullptr) file_name = "";
     SendLn("event=TestPartResult&file=" + UrlEncode(file_name) +
            "&line=" + StreamableToString(test_part_result.line_number()) +
            "&message=" + UrlEncode(test_part_result.message()));
@@ -1598,15 +1586,15 @@ class StreamingListener : public EmptyTestEventListener {
 
  private:
   // Sends the given message and a newline to the socket.
-  void SendLn(const string& message) { socket_writer_->SendLn(message); }
+  void SendLn(const std::string& message) { socket_writer_->SendLn(message); }
 
   // Called at the start of streaming to notify the receiver what
   // protocol we are using.
   void Start() { SendLn("gtest_streaming_protocol_version=1.0"); }
 
-  string FormatBool(bool value) { return value ? "1" : "0"; }
+  std::string FormatBool(bool value) { return value ? "1" : "0"; }
 
-  const scoped_ptr<AbstractSocketWriter> socket_writer_;
+  const std::unique_ptr<AbstractSocketWriter> socket_writer_;
 
   GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener);
 };  // class StreamingListener
@@ -1616,13 +1604,27 @@ class StreamingListener : public EmptyTestEventListener {
 }  // namespace internal
 }  // namespace testing
 
-#endif  // GTEST_SRC_GTEST_INTERNAL_INL_H_
-#undef GTEST_IMPLEMENTATION_
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
 
 #if GTEST_OS_WINDOWS
 # define vsnprintf _vsnprintf
 #endif  // GTEST_OS_WINDOWS
 
+#if GTEST_OS_MAC
+#ifndef GTEST_OS_IOS
+#include <crt_externs.h>
+#endif
+#endif
+
+#if GTEST_HAS_ABSL
+#include "absl/debugging/failure_signal_handler.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/debugging/symbolize.h"
+#include "absl/strings/str_cat.h"
+#endif  // GTEST_HAS_ABSL
+
 namespace testing {
 
 using internal::CountIf;
@@ -1632,20 +1634,22 @@ using internal::Shuffle;
 
 // Constants.
 
-// A test whose test case name or test name matches this filter is
+// A test whose test suite name or test name matches this filter is
 // disabled and not run.
 static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";
 
-// A test case whose name matches this filter is considered a death
-// test case and will be run before test cases whose name doesn't
+// A test suite whose name matches this filter is considered a death
+// test suite and will be run before test suites whose name doesn't
 // match this filter.
-static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*";
+static const char kDeathTestSuiteFilter[] = "*DeathTest:*DeathTest/*";
 
 // A test filter that matches everything.
 static const char kUniversalFilter[] = "*";
 
-// The default output file for XML output.
-static const char kDefaultOutputFile[] = "test_detail.xml";
+// The default output format.
+static const char kDefaultOutputFormat[] = "xml";
+// The default output file.
+static const char kDefaultOutputFile[] = "test_detail";
 
 // The environment variable name for the test shard index.
 static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
@@ -1660,31 +1664,67 @@ namespace internal {
 // stack trace.
 const char kStackTraceMarker[] = "\nStack trace:\n";
 
-// g_help_flag is true iff the --help flag or an equivalent form is
-// specified on the command line.
+// g_help_flag is true if and only if the --help flag or an equivalent form
+// is specified on the command line.
 bool g_help_flag = false;
 
+// Utilty function to Open File for Writing
+static FILE* OpenFileForWriting(const std::string& output_file) {
+  FILE* fileout = nullptr;
+  FilePath output_file_path(output_file);
+  FilePath output_dir(output_file_path.RemoveFileName());
+
+  if (output_dir.CreateDirectoriesRecursively()) {
+    fileout = posix::FOpen(output_file.c_str(), "w");
+  }
+  if (fileout == nullptr) {
+    GTEST_LOG_(FATAL) << "Unable to open file \"" << output_file << "\"";
+  }
+  return fileout;
+}
+
 }  // namespace internal
 
+// Bazel passes in the argument to '--test_filter' via the TESTBRIDGE_TEST_ONLY
+// environment variable.
 static const char* GetDefaultFilter() {
+  const char* const testbridge_test_only =
+      internal::posix::GetEnv("TESTBRIDGE_TEST_ONLY");
+  if (testbridge_test_only != nullptr) {
+    return testbridge_test_only;
+  }
   return kUniversalFilter;
 }
 
+// Bazel passes in the argument to '--test_runner_fail_fast' via the
+// TESTBRIDGE_TEST_RUNNER_FAIL_FAST environment variable.
+static bool GetDefaultFailFast() {
+  const char* const testbridge_test_runner_fail_fast =
+      internal::posix::GetEnv("TESTBRIDGE_TEST_RUNNER_FAIL_FAST");
+  if (testbridge_test_runner_fail_fast != nullptr) {
+    return strcmp(testbridge_test_runner_fail_fast, "1") == 0;
+  }
+  return false;
+}
+
+GTEST_DEFINE_bool_(
+    fail_fast, internal::BoolFromGTestEnv("fail_fast", GetDefaultFailFast()),
+    "True if and only if a test failure should stop further test execution.");
+
 GTEST_DEFINE_bool_(
     also_run_disabled_tests,
     internal::BoolFromGTestEnv("also_run_disabled_tests", false),
     "Run disabled tests too, in addition to the tests normally being run.");
 
 GTEST_DEFINE_bool_(
-    break_on_failure,
-    internal::BoolFromGTestEnv("break_on_failure", false),
-    "True iff a failed assertion should be a debugger break-point.");
+    break_on_failure, internal::BoolFromGTestEnv("break_on_failure", false),
+    "True if and only if a failed assertion should be a debugger "
+    "break-point.");
 
-GTEST_DEFINE_bool_(
-    catch_exceptions,
-    internal::BoolFromGTestEnv("catch_exceptions", true),
-    "True iff " GTEST_NAME_
-    " should catch exceptions and treat them as test failures.");
+GTEST_DEFINE_bool_(catch_exceptions,
+                   internal::BoolFromGTestEnv("catch_exceptions", true),
+                   "True if and only if " GTEST_NAME_
+                   " should catch exceptions and treat them as test failures.");
 
 GTEST_DEFINE_string_(
     color,
@@ -1703,15 +1743,28 @@ GTEST_DEFINE_string_(
     "exclude).  A test is run if it matches one of the positive "
     "patterns and does not match any of the negative patterns.");
 
+GTEST_DEFINE_bool_(
+    install_failure_signal_handler,
+    internal::BoolFromGTestEnv("install_failure_signal_handler", false),
+    "If true and supported on the current platform, " GTEST_NAME_ " should "
+    "install a signal handler that dumps debugging information when fatal "
+    "signals are raised.");
+
 GTEST_DEFINE_bool_(list_tests, false,
                    "List all tests without running them.");
 
+// The net priority order after flag processing is thus:
+//   --gtest_output command line flag
+//   GTEST_OUTPUT environment variable
+//   XML_OUTPUT_FILE environment variable
+//   ''
 GTEST_DEFINE_string_(
     output,
-    internal::StringFromGTestEnv("output", ""),
-    "A format (currently must be \"xml\"), optionally followed "
-    "by a colon and an output file name or directory. A directory "
-    "is indicated by a trailing pathname separator. "
+    internal::StringFromGTestEnv("output",
+      internal::OutputFlagAlsoCheckEnvVar().c_str()),
+    "A format (defaults to \"xml\" but can be specified to be \"json\"), "
+    "optionally followed by a colon and an output file name or directory. "
+    "A directory is indicated by a trailing pathname separator. "
     "Examples: \"xml:filename.xml\", \"xml::directoryname/\". "
     "If a directory is specified, output files will be created "
     "within that directory, with file-names based on the test "
@@ -1719,10 +1772,16 @@ GTEST_DEFINE_string_(
     "digits.");
 
 GTEST_DEFINE_bool_(
-    print_time,
-    internal::BoolFromGTestEnv("print_time", true),
-    "True iff " GTEST_NAME_
-    " should display elapsed time in text output.");
+    brief, internal::BoolFromGTestEnv("brief", false),
+    "True if only test failures should be displayed in text output.");
+
+GTEST_DEFINE_bool_(print_time, internal::BoolFromGTestEnv("print_time", true),
+                   "True if and only if " GTEST_NAME_
+                   " should display elapsed time in text output.");
+
+GTEST_DEFINE_bool_(print_utf8, internal::BoolFromGTestEnv("print_utf8", true),
+                   "True if and only if " GTEST_NAME_
+                   " prints UTF8 characters as text.");
 
 GTEST_DEFINE_int32_(
     random_seed,
@@ -1736,16 +1795,14 @@ GTEST_DEFINE_int32_(
     "How many times to repeat each test.  Specify a negative number "
     "for repeating forever.  Useful for shaking out flaky tests.");
 
-GTEST_DEFINE_bool_(
-    show_internal_stack_frames, false,
-    "True iff " GTEST_NAME_ " should include internal stack frames when "
-    "printing test failure stack traces.");
+GTEST_DEFINE_bool_(show_internal_stack_frames, false,
+                   "True if and only if " GTEST_NAME_
+                   " should include internal stack frames when "
+                   "printing test failure stack traces.");
 
-GTEST_DEFINE_bool_(
-    shuffle,
-    internal::BoolFromGTestEnv("shuffle", false),
-    "True iff " GTEST_NAME_
-    " should randomize tests' order on every run.");
+GTEST_DEFINE_bool_(shuffle, internal::BoolFromGTestEnv("shuffle", false),
+                   "True if and only if " GTEST_NAME_
+                   " should randomize tests' order on every run.");
 
 GTEST_DEFINE_int32_(
     stack_trace_depth,
@@ -1765,16 +1822,24 @@ GTEST_DEFINE_bool_(
     internal::BoolFromGTestEnv("throw_on_failure", false),
     "When this flag is specified, a failed assertion will throw an exception "
     "if exceptions are enabled or exit the program with a non-zero code "
-    "otherwise.");
+    "otherwise. For use with an external test framework.");
+
+#if GTEST_USE_OWN_FLAGFILE_FLAG_
+GTEST_DEFINE_string_(
+    flagfile,
+    internal::StringFromGTestEnv("flagfile", ""),
+    "This flag specifies the flagfile to read command-line flags from.");
+#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
 
 namespace internal {
 
 // Generates a random number from [0, range), using a Linear
 // Congruential Generator (LCG).  Crashes if 'range' is 0 or greater
 // than kMaxRange.
-UInt32 Random::Generate(UInt32 range) {
+uint32_t Random::Generate(uint32_t range) {
   // These constants are the same as are used in glibc's rand(3).
-  state_ = (1103515245U*state_ + 12345U) % kMaxRange;
+  // Use wider types than necessary to prevent unsigned overflow diagnostics.
+  state_ = static_cast<uint32_t>(1103515245ULL*state_ + 12345U) % kMaxRange;
 
   GTEST_CHECK_(range > 0)
       << "Cannot generate a number in the range [0, 0).";
@@ -1788,22 +1853,16 @@ UInt32 Random::Generate(UInt32 range) {
   return state_ % range;
 }
 
-// GTestIsInitialized() returns true iff the user has initialized
+// GTestIsInitialized() returns true if and only if the user has initialized
 // Google Test.  Useful for catching the user mistake of not initializing
 // Google Test before calling RUN_ALL_TESTS().
-//
-// A user must call testing::InitGoogleTest() to initialize Google
-// Test.  g_init_gtest_count is set to the number of times
-// InitGoogleTest() has been called.  We don't protect this variable
-// under a mutex as it is only accessed in the main thread.
-GTEST_API_ int g_init_gtest_count = 0;
-static bool GTestIsInitialized() { return g_init_gtest_count != 0; }
-
-// Iterates over a vector of TestCases, keeping a running sum of the
+static bool GTestIsInitialized() { return GetArgvs().size() > 0; }
+
+// Iterates over a vector of TestSuites, keeping a running sum of the
 // results of calling a given int-returning method on each.
 // Returns the sum.
-static int SumOverTestCaseList(const std::vector<TestCase*>& case_list,
-                               int (TestCase::*method)() const) {
+static int SumOverTestSuiteList(const std::vector<TestSuite*>& case_list,
+                                int (TestSuite::*method)() const) {
   int sum = 0;
   for (size_t i = 0; i < case_list.size(); i++) {
     sum += (case_list[i]->*method)();
@@ -1811,20 +1870,20 @@ static int SumOverTestCaseList(const std::vector<TestCase*>& case_list,
   return sum;
 }
 
-// Returns true iff the test case passed.
-static bool TestCasePassed(const TestCase* test_case) {
-  return test_case->should_run() && test_case->Passed();
+// Returns true if and only if the test suite passed.
+static bool TestSuitePassed(const TestSuite* test_suite) {
+  return test_suite->should_run() && test_suite->Passed();
 }
 
-// Returns true iff the test case failed.
-static bool TestCaseFailed(const TestCase* test_case) {
-  return test_case->should_run() && test_case->Failed();
+// Returns true if and only if the test suite failed.
+static bool TestSuiteFailed(const TestSuite* test_suite) {
+  return test_suite->should_run() && test_suite->Failed();
 }
 
-// Returns true iff test_case contains at least one test that should
-// run.
-static bool ShouldRunTestCase(const TestCase* test_case) {
-  return test_case->should_run();
+// Returns true if and only if test_suite contains at least one test that
+// should run.
+static bool ShouldRunTestSuite(const TestSuite* test_suite) {
+  return test_suite->should_run();
 }
 
 // AssertHelper constructor.
@@ -1850,21 +1909,185 @@ void AssertHelper::operator=(const Message& message) const {
                       );  // NOLINT
 }
 
-// Mutex for linked pointers.
-GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex);
+namespace {
+
+// When TEST_P is found without a matching INSTANTIATE_TEST_SUITE_P
+// to creates test cases for it, a syntetic test case is
+// inserted to report ether an error or a log message.
+//
+// This configuration bit will likely be removed at some point.
+constexpr bool kErrorOnUninstantiatedParameterizedTest = true;
+constexpr bool kErrorOnUninstantiatedTypeParameterizedTest = true;
+
+// A test that fails at a given file/line location with a given message.
+class FailureTest : public Test {
+ public:
+  explicit FailureTest(const CodeLocation& loc, std::string error_message,
+                       bool as_error)
+      : loc_(loc),
+        error_message_(std::move(error_message)),
+        as_error_(as_error) {}
+
+  void TestBody() override {
+    if (as_error_) {
+      AssertHelper(TestPartResult::kNonFatalFailure, loc_.file.c_str(),
+                   loc_.line, "") = Message() << error_message_;
+    } else {
+      std::cout << error_message_ << std::endl;
+    }
+  }
+
+ private:
+  const CodeLocation loc_;
+  const std::string error_message_;
+  const bool as_error_;
+};
+
+
+}  // namespace
+
+std::set<std::string>* GetIgnoredParameterizedTestSuites() {
+  return UnitTest::GetInstance()->impl()->ignored_parameterized_test_suites();
+}
+
+// Add a given test_suit to the list of them allow to go un-instantiated.
+MarkAsIgnored::MarkAsIgnored(const char* test_suite) {
+  GetIgnoredParameterizedTestSuites()->insert(test_suite);
+}
+
+// If this parameterized test suite has no instantiations (and that
+// has not been marked as okay), emit a test case reporting that.
+void InsertSyntheticTestCase(const std::string& name, CodeLocation location,
+                             bool has_test_p) {
+  const auto& ignored = *GetIgnoredParameterizedTestSuites();
+  if (ignored.find(name) != ignored.end()) return;
+
+  const char kMissingInstantiation[] =  //
+      " is defined via TEST_P, but never instantiated. None of the test cases "
+      "will run. Either no INSTANTIATE_TEST_SUITE_P is provided or the only "
+      "ones provided expand to nothing."
+      "\n\n"
+      "Ideally, TEST_P definitions should only ever be included as part of "
+      "binaries that intend to use them. (As opposed to, for example, being "
+      "placed in a library that may be linked in to get other utilities.)";
+
+  const char kMissingTestCase[] =  //
+      " is instantiated via INSTANTIATE_TEST_SUITE_P, but no tests are "
+      "defined via TEST_P . No test cases will run."
+      "\n\n"
+      "Ideally, INSTANTIATE_TEST_SUITE_P should only ever be invoked from "
+      "code that always depend on code that provides TEST_P. Failing to do "
+      "so is often an indication of dead code, e.g. the last TEST_P was "
+      "removed but the rest got left behind.";
+
+  std::string message =
+      "Parameterized test suite " + name +
+      (has_test_p ? kMissingInstantiation : kMissingTestCase) +
+      "\n\n"
+      "To suppress this error for this test suite, insert the following line "
+      "(in a non-header) in the namespace it is defined in:"
+      "\n\n"
+      "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" + name + ");";
+
+  std::string full_name = "UninstantiatedParameterizedTestSuite<" + name + ">";
+  RegisterTest(  //
+      "GoogleTestVerification", full_name.c_str(),
+      nullptr,  // No type parameter.
+      nullptr,  // No value parameter.
+      location.file.c_str(), location.line, [message, location] {
+        return new FailureTest(location, message,
+                               kErrorOnUninstantiatedParameterizedTest);
+      });
+}
+
+void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
+                                        CodeLocation code_location) {
+  GetUnitTestImpl()->type_parameterized_test_registry().RegisterTestSuite(
+      test_suite_name, code_location);
+}
+
+void RegisterTypeParameterizedTestSuiteInstantiation(const char* case_name) {
+  GetUnitTestImpl()
+      ->type_parameterized_test_registry()
+      .RegisterInstantiation(case_name);
+}
+
+void TypeParameterizedTestSuiteRegistry::RegisterTestSuite(
+    const char* test_suite_name, CodeLocation code_location) {
+  suites_.emplace(std::string(test_suite_name),
+                 TypeParameterizedTestSuiteInfo(code_location));
+}
+
+void TypeParameterizedTestSuiteRegistry::RegisterInstantiation(
+        const char* test_suite_name) {
+  auto it = suites_.find(std::string(test_suite_name));
+  if (it != suites_.end()) {
+    it->second.instantiated = true;
+  } else {
+    GTEST_LOG_(ERROR) << "Unknown type parameterized test suit '"
+                      << test_suite_name << "'";
+  }
+}
+
+void TypeParameterizedTestSuiteRegistry::CheckForInstantiations() {
+  const auto& ignored = *GetIgnoredParameterizedTestSuites();
+  for (const auto& testcase : suites_) {
+    if (testcase.second.instantiated) continue;
+    if (ignored.find(testcase.first) != ignored.end()) continue;
+
+    std::string message =
+        "Type parameterized test suite " + testcase.first +
+        " is defined via REGISTER_TYPED_TEST_SUITE_P, but never instantiated "
+        "via INSTANTIATE_TYPED_TEST_SUITE_P. None of the test cases will run."
+        "\n\n"
+        "Ideally, TYPED_TEST_P definitions should only ever be included as "
+        "part of binaries that intend to use them. (As opposed to, for "
+        "example, being placed in a library that may be linked in to get other "
+        "utilities.)"
+        "\n\n"
+        "To suppress this error for this test suite, insert the following line "
+        "(in a non-header) in the namespace it is defined in:"
+        "\n\n"
+        "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" +
+        testcase.first + ");";
+
+    std::string full_name =
+        "UninstantiatedTypeParameterizedTestSuite<" + testcase.first + ">";
+    RegisterTest(  //
+        "GoogleTestVerification", full_name.c_str(),
+        nullptr,  // No type parameter.
+        nullptr,  // No value parameter.
+        testcase.second.code_location.file.c_str(),
+        testcase.second.code_location.line, [message, testcase] {
+          return new FailureTest(testcase.second.code_location, message,
+                                 kErrorOnUninstantiatedTypeParameterizedTest);
+        });
+  }
+}
 
-// Application pathname gotten in InitGoogleTest.
-std::string g_executable_path;
+// A copy of all command line arguments.  Set by InitGoogleTest().
+static ::std::vector<std::string> g_argvs;
+
+::std::vector<std::string> GetArgvs() {
+#if defined(GTEST_CUSTOM_GET_ARGVS_)
+  // GTEST_CUSTOM_GET_ARGVS_() may return a container of std::string or
+  // ::string. This code converts it to the appropriate type.
+  const auto& custom = GTEST_CUSTOM_GET_ARGVS_();
+  return ::std::vector<std::string>(custom.begin(), custom.end());
+#else   // defined(GTEST_CUSTOM_GET_ARGVS_)
+  return g_argvs;
+#endif  // defined(GTEST_CUSTOM_GET_ARGVS_)
+}
 
 // Returns the current application's name, removing directory path if that
 // is present.
 FilePath GetCurrentExecutableName() {
   FilePath result;
 
-#if GTEST_OS_WINDOWS
-  result.Set(FilePath(g_executable_path).RemoveExtension("exe"));
+#if GTEST_OS_WINDOWS || GTEST_OS_OS2
+  result.Set(FilePath(GetArgvs()[0]).RemoveExtension("exe"));
 #else
-  result.Set(FilePath(g_executable_path));
+  result.Set(FilePath(GetArgvs()[0]));
 #endif  // GTEST_OS_WINDOWS
 
   return result.RemoveDirectoryName();
@@ -1875,34 +2098,32 @@ FilePath GetCurrentExecutableName() {
 // Returns the output format, or "" for normal printed output.
 std::string UnitTestOptions::GetOutputFormat() {
   const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
-  if (gtest_output_flag == NULL) return std::string("");
-
   const char* const colon = strchr(gtest_output_flag, ':');
-  return (colon == NULL) ?
-      std::string(gtest_output_flag) :
-      std::string(gtest_output_flag, colon - gtest_output_flag);
+  return (colon == nullptr)
+             ? std::string(gtest_output_flag)
+             : std::string(gtest_output_flag,
+                           static_cast<size_t>(colon - gtest_output_flag));
 }
 
 // Returns the name of the requested output file, or the default if none
 // was explicitly specified.
 std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
   const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
-  if (gtest_output_flag == NULL)
-    return "";
+
+  std::string format = GetOutputFormat();
+  if (format.empty())
+    format = std::string(kDefaultOutputFormat);
 
   const char* const colon = strchr(gtest_output_flag, ':');
-  if (colon == NULL)
-    return internal::FilePath::ConcatPaths(
+  if (colon == nullptr)
+    return internal::FilePath::MakeFileName(
         internal::FilePath(
             UnitTest::GetInstance()->original_working_dir()),
-        internal::FilePath(kDefaultOutputFile)).string();
+        internal::FilePath(kDefaultOutputFile), 0,
+        format.c_str()).string();
 
   internal::FilePath output_name(colon + 1);
   if (!output_name.IsAbsolutePath())
-    // TODO(wan@google.com): on Windows \some\path is not an absolute
-    // path (as its meaning depends on the current drive), yet the
-    // following logic for turning it into an absolute path is wrong.
-    // Fix it.
     output_name = internal::FilePath::ConcatPaths(
         internal::FilePath(UnitTest::GetInstance()->original_working_dir()),
         internal::FilePath(colon + 1));
@@ -1916,54 +2137,92 @@ std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
   return result.string();
 }
 
-// Returns true iff the wildcard pattern matches the string.  The
-// first ':' or '\0' character in pattern marks the end of it.
-//
-// This recursive algorithm isn't very efficient, but is clear and
-// works well enough for matching test names, which are short.
-bool UnitTestOptions::PatternMatchesString(const char *pattern,
-                                           const char *str) {
-  switch (*pattern) {
-    case '\0':
-    case ':':  // Either ':' or '\0' marks the end of the pattern.
-      return *str == '\0';
-    case '?':  // Matches any single character.
-      return *str != '\0' && PatternMatchesString(pattern + 1, str + 1);
-    case '*':  // Matches any string (possibly empty) of characters.
-      return (*str != '\0' && PatternMatchesString(pattern, str + 1)) ||
-          PatternMatchesString(pattern + 1, str);
-    default:  // Non-special character.  Matches itself.
-      return *pattern == *str &&
-          PatternMatchesString(pattern + 1, str + 1);
-  }
-}
-
-bool UnitTestOptions::MatchesFilter(
-    const std::string& name, const char* filter) {
-  const char *cur_pattern = filter;
-  for (;;) {
-    if (PatternMatchesString(cur_pattern, name.c_str())) {
-      return true;
+// Returns true if and only if the wildcard pattern matches the string. Each
+// pattern consists of regular characters, single-character wildcards (?), and
+// multi-character wildcards (*).
+//
+// This function implements a linear-time string globbing algorithm based on
+// https://research.swtch.com/glob.
+static bool PatternMatchesString(const std::string& name_str,
+                                 const char* pattern, const char* pattern_end) {
+  const char* name = name_str.c_str();
+  const char* const name_begin = name;
+  const char* const name_end = name + name_str.size();
+
+  const char* pattern_next = pattern;
+  const char* name_next = name;
+
+  while (pattern < pattern_end || name < name_end) {
+    if (pattern < pattern_end) {
+      switch (*pattern) {
+        default:  // Match an ordinary character.
+          if (name < name_end && *name == *pattern) {
+            ++pattern;
+            ++name;
+            continue;
+          }
+          break;
+        case '?':  // Match any single character.
+          if (name < name_end) {
+            ++pattern;
+            ++name;
+            continue;
+          }
+          break;
+        case '*':
+          // Match zero or more characters. Start by skipping over the wildcard
+          // and matching zero characters from name. If that fails, restart and
+          // match one more character than the last attempt.
+          pattern_next = pattern;
+          name_next = name + 1;
+          ++pattern;
+          continue;
+      }
     }
+    // Failed to match a character. Restart if possible.
+    if (name_begin < name_next && name_next <= name_end) {
+      pattern = pattern_next;
+      name = name_next;
+      continue;
+    }
+    return false;
+  }
+  return true;
+}
 
-    // Finds the next pattern in the filter.
-    cur_pattern = strchr(cur_pattern, ':');
+bool UnitTestOptions::MatchesFilter(const std::string& name_str,
+                                    const char* filter) {
+  // The filter is a list of patterns separated by colons (:).
+  const char* pattern = filter;
+  while (true) {
+    // Find the bounds of this pattern.
+    const char* const next_sep = strchr(pattern, ':');
+    const char* const pattern_end =
+        next_sep != nullptr ? next_sep : pattern + strlen(pattern);
 
-    // Returns if no more pattern can be found.
-    if (cur_pattern == NULL) {
-      return false;
+    // Check if this pattern matches name_str.
+    if (PatternMatchesString(name_str, pattern, pattern_end)) {
+      return true;
     }
 
-    // Skips the pattern separater (the ':' character).
-    cur_pattern++;
+    // Give up on this pattern. However, if we found a pattern separator (:),
+    // advance to the next pattern (skipping over the separator) and restart.
+    if (next_sep == nullptr) {
+      return false;
+    }
+    pattern = next_sep + 1;
   }
+#if defined(__EDG__)
+#pragma diag_suppress code_is_unreachable
+#endif
+  return true;
 }
 
-// Returns true iff the user-specified filter matches the test case
-// name and the test name.
-bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name,
-                                        const std::string &test_name) {
-  const std::string& full_name = test_case_name + "." + test_name.c_str();
+// Returns true if and only if the user-specified filter matches the test
+// suite name and the test name.
+bool UnitTestOptions::FilterMatchesTest(const std::string& test_suite_name,
+                                        const std::string& test_name) {
+  const std::string& full_name = test_suite_name + "." + test_name.c_str();
 
   // Split --gtest_filter at '-', if there is one, to separate into
   // positive filter and negative filter portions
@@ -1971,7 +2230,7 @@ bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name,
   const char* const dash = strchr(p, '-');
   std::string positive;
   std::string negative;
-  if (dash == NULL) {
+  if (dash == nullptr) {
     positive = GTEST_FLAG(filter).c_str();  // Whole string is a positive filter
     negative = "";
   } else {
@@ -2090,12 +2349,12 @@ extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();
 // This predicate-formatter checks that 'results' contains a test part
 // failure of the given type and that the failure message contains the
 // given substring.
-AssertionResult HasOneFailure(const char* /* results_expr */,
-                              const char* /* type_expr */,
-                              const char* /* substr_expr */,
-                              const TestPartResultArray& results,
-                              TestPartResult::Type type,
-                              const string& substr) {
+static AssertionResult HasOneFailure(const char* /* results_expr */,
+                                     const char* /* type_expr */,
+                                     const char* /* substr_expr */,
+                                     const TestPartResultArray& results,
+                                     TestPartResult::Type type,
+                                     const std::string& substr) {
   const std::string expected(type == TestPartResult::kFatalFailure ?
                         "1 fatal failure" :
                         "1 non-fatal failure");
@@ -2116,7 +2375,7 @@ AssertionResult HasOneFailure(const char* /* results_expr */,
                               << r;
   }
 
-  if (strstr(r.message(), substr.c_str()) == NULL) {
+  if (strstr(r.message(), substr.c_str()) == nullptr) {
     return AssertionFailure() << "Expected: " << expected << " containing \""
                               << substr << "\"\n"
                               << "  Actual:\n"
@@ -2129,13 +2388,10 @@ AssertionResult HasOneFailure(const char* /* results_expr */,
 // The constructor of SingleFailureChecker remembers where to look up
 // test part results, what type of failure we expect, and what
 // substring the failure message should contain.
-SingleFailureChecker:: SingleFailureChecker(
-    const TestPartResultArray* results,
-    TestPartResult::Type type,
-    const string& substr)
-    : results_(results),
-      type_(type),
-      substr_(substr) {}
+SingleFailureChecker::SingleFailureChecker(const TestPartResultArray* results,
+                                           TestPartResult::Type type,
+                                           const std::string& substr)
+    : results_(results), type_(type), substr_(substr) {}
 
 // The destructor of SingleFailureChecker verifies that the given
 // TestPartResultArray contains exactly one failure that has the given
@@ -2188,61 +2444,66 @@ void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
   per_thread_test_part_result_reporter_.set(reporter);
 }
 
-// Gets the number of successful test cases.
-int UnitTestImpl::successful_test_case_count() const {
-  return CountIf(test_cases_, TestCasePassed);
+// Gets the number of successful test suites.
+int UnitTestImpl::successful_test_suite_count() const {
+  return CountIf(test_suites_, TestSuitePassed);
 }
 
-// Gets the number of failed test cases.
-int UnitTestImpl::failed_test_case_count() const {
-  return CountIf(test_cases_, TestCaseFailed);
+// Gets the number of failed test suites.
+int UnitTestImpl::failed_test_suite_count() const {
+  return CountIf(test_suites_, TestSuiteFailed);
 }
 
-// Gets the number of all test cases.
-int UnitTestImpl::total_test_case_count() const {
-  return static_cast<int>(test_cases_.size());
+// Gets the number of all test suites.
+int UnitTestImpl::total_test_suite_count() const {
+  return static_cast<int>(test_suites_.size());
 }
 
-// Gets the number of all test cases that contain at least one test
+// Gets the number of all test suites that contain at least one test
 // that should run.
-int UnitTestImpl::test_case_to_run_count() const {
-  return CountIf(test_cases_, ShouldRunTestCase);
+int UnitTestImpl::test_suite_to_run_count() const {
+  return CountIf(test_suites_, ShouldRunTestSuite);
 }
 
 // Gets the number of successful tests.
 int UnitTestImpl::successful_test_count() const {
-  return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count);
+  return SumOverTestSuiteList(test_suites_, &TestSuite::successful_test_count);
+}
+
+// Gets the number of skipped tests.
+int UnitTestImpl::skipped_test_count() const {
+  return SumOverTestSuiteList(test_suites_, &TestSuite::skipped_test_count);
 }
 
 // Gets the number of failed tests.
 int UnitTestImpl::failed_test_count() const {
-  return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count);
+  return SumOverTestSuiteList(test_suites_, &TestSuite::failed_test_count);
 }
 
 // Gets the number of disabled tests that will be reported in the XML report.
 int UnitTestImpl::reportable_disabled_test_count() const {
-  return SumOverTestCaseList(test_cases_,
-                             &TestCase::reportable_disabled_test_count);
+  return SumOverTestSuiteList(test_suites_,
+                              &TestSuite::reportable_disabled_test_count);
 }
 
 // Gets the number of disabled tests.
 int UnitTestImpl::disabled_test_count() const {
-  return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count);
+  return SumOverTestSuiteList(test_suites_, &TestSuite::disabled_test_count);
 }
 
 // Gets the number of tests to be printed in the XML report.
 int UnitTestImpl::reportable_test_count() const {
-  return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count);
+  return SumOverTestSuiteList(test_suites_, &TestSuite::reportable_test_count);
 }
 
 // Gets the number of all tests.
 int UnitTestImpl::total_test_count() const {
-  return SumOverTestCaseList(test_cases_, &TestCase::total_test_count);
+  return SumOverTestSuiteList(test_suites_, &TestSuite::total_test_count);
 }
 
 // Gets the number of tests that should run.
 int UnitTestImpl::test_to_run_count() const {
-  return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count);
+  return SumOverTestSuiteList(test_suites_, &TestSuite::test_to_run_count);
 }
 
 // Returns the current OS stack trace as an std::string.
@@ -2256,60 +2517,38 @@ int UnitTestImpl::test_to_run_count() const {
 // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
 // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
 std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
-  (void)skip_count;
-  return "";
+  return os_stack_trace_getter()->CurrentStackTrace(
+      static_cast<int>(GTEST_FLAG(stack_trace_depth)),
+      skip_count + 1
+      // Skips the user-specified number of frames plus this function
+      // itself.
+      );  // NOLINT
 }
 
-// Returns the current time in milliseconds.
-TimeInMillis GetTimeInMillis() {
-#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__)
-  // Difference between 1970-01-01 and 1601-01-01 in milliseconds.
-  // http://analogous.blogspot.com/2005/04/epoch.html
-  const TimeInMillis kJavaEpochToWinFileTimeDelta =
-    static_cast<TimeInMillis>(116444736UL) * 100000UL;
-  const DWORD kTenthMicrosInMilliSecond = 10000;
-
-  SYSTEMTIME now_systime;
-  FILETIME now_filetime;
-  ULARGE_INTEGER now_int64;
-  // TODO(kenton@google.com): Shouldn't this just use
-  //   GetSystemTimeAsFileTime()?
-  GetSystemTime(&now_systime);
-  if (SystemTimeToFileTime(&now_systime, &now_filetime)) {
-    now_int64.LowPart = now_filetime.dwLowDateTime;
-    now_int64.HighPart = now_filetime.dwHighDateTime;
-    now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) -
-      kJavaEpochToWinFileTimeDelta;
-    return now_int64.QuadPart;
-  }
-  return 0;
-#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_
-  __timeb64 now;
-
-# ifdef _MSC_VER
-
-  // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996
-  // (deprecated function) there.
-  // TODO(kenton@google.com): Use GetTickCount()?  Or use
-  //   SystemTimeToFileTime()
-#  pragma warning(push)          // Saves the current warning state.
-#  pragma warning(disable:4996)  // Temporarily disables warning 4996.
-  _ftime64(&now);
-#  pragma warning(pop)           // Restores the warning state.
-# else
+// A helper class for measuring elapsed times.
+class Timer {
+ public:
+  Timer() : start_(std::chrono::steady_clock::now()) {}
 
-  _ftime64(&now);
+  // Return time elapsed in milliseconds since the timer was created.
+  TimeInMillis Elapsed() {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(
+               std::chrono::steady_clock::now() - start_)
+        .count();
+  }
 
-# endif  // _MSC_VER
+ private:
+  std::chrono::steady_clock::time_point start_;
+};
 
-  return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm;
-#elif GTEST_HAS_GETTIMEOFDAY_
-  struct timeval now;
-  gettimeofday(&now, NULL);
-  return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000;
-#else
-# error "Don't know how to get the current time on your system."
-#endif
+// Returns a timestamp as milliseconds since the epoch. Note this time may jump
+// around subject to adjustments by the system, to measure elapsed time use
+// Timer instead.
+TimeInMillis GetTimeInMillis() {
+  return std::chrono::duration_cast<std::chrono::milliseconds>(
+             std::chrono::system_clock::now() -
+             std::chrono::system_clock::from_time_t(0))
+      .count();
 }
 
 // Utilities
@@ -2322,11 +2561,10 @@ TimeInMillis GetTimeInMillis() {
 // value using delete[]. Returns the wide string, or NULL if the
 // input is NULL.
 LPCWSTR String::AnsiToUtf16(const char* ansi) {
-  if (!ansi) return NULL;
+  if (!ansi) return nullptr;
   const int length = strlen(ansi);
   const int unicode_length =
-      MultiByteToWideChar(CP_ACP, 0, ansi, length,
-                          NULL, 0);
+      MultiByteToWideChar(CP_ACP, 0, ansi, length, nullptr, 0);
   WCHAR* unicode = new WCHAR[unicode_length + 1];
   MultiByteToWideChar(CP_ACP, 0, ansi, length,
                       unicode, unicode_length);
@@ -2339,33 +2577,33 @@ LPCWSTR String::AnsiToUtf16(const char* ansi) {
 // value using delete[]. Returns the ANSI string, or NULL if the
 // input is NULL.
 const char* String::Utf16ToAnsi(LPCWSTR utf16_str)  {
-  if (!utf16_str) return NULL;
-  const int ansi_length =
-      WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
-                          NULL, 0, NULL, NULL);
+  if (!utf16_str) return nullptr;
+  const int ansi_length = WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, nullptr,
+                                              0, nullptr, nullptr);
   char* ansi = new char[ansi_length + 1];
-  WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
-                      ansi, ansi_length, NULL, NULL);
+  WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, ansi, ansi_length, nullptr,
+                      nullptr);
   ansi[ansi_length] = 0;
   return ansi;
 }
 
 #endif  // GTEST_OS_WINDOWS_MOBILE
 
-// Compares two C strings.  Returns true iff they have the same content.
+// Compares two C strings.  Returns true if and only if they have the same
+// content.
 //
 // Unlike strcmp(), this function can handle NULL argument(s).  A NULL
 // C string is considered different to any non-NULL C string,
 // including the empty string.
 bool String::CStringEquals(const char * lhs, const char * rhs) {
-  if ( lhs == NULL ) return rhs == NULL;
+  if (lhs == nullptr) return rhs == nullptr;
 
-  if ( rhs == NULL ) return false;
+  if (rhs == nullptr) return false;
 
   return strcmp(lhs, rhs) == 0;
 }
 
-#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING
+#if GTEST_HAS_STD_WSTRING
 
 // Converts an array of wide chars to a narrow string using the UTF-8
 // encoding, and streams the result to the given Message object.
@@ -2383,7 +2621,24 @@ static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
   }
 }
 
-#endif  // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING
+#endif  // GTEST_HAS_STD_WSTRING
+
+void SplitString(const ::std::string& str, char delimiter,
+                 ::std::vector< ::std::string>* dest) {
+  ::std::vector< ::std::string> parsed;
+  ::std::string::size_type pos = 0;
+  while (::testing::internal::AlwaysTrue()) {
+    const ::std::string::size_type colon = str.find(delimiter, pos);
+    if (colon == ::std::string::npos) {
+      parsed.push_back(str.substr(pos));
+      break;
+    } else {
+      parsed.push_back(str.substr(pos, colon - pos));
+      pos = colon + 1;
+    }
+  }
+  dest->swap(parsed);
+}
 
 }  // namespace internal
 
@@ -2416,15 +2671,6 @@ Message& Message::operator <<(const ::std::wstring& wstr) {
 }
 #endif  // GTEST_HAS_STD_WSTRING
 
-#if GTEST_HAS_GLOBAL_WSTRING
-// Converts the given wide string to a narrow string using the UTF-8
-// encoding, and streams the result to this Message object.
-Message& Message::operator <<(const ::wstring& wstr) {
-  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
-  return *this;
-}
-#endif  // GTEST_HAS_GLOBAL_WSTRING
-
 // Gets the text streamed to this object so far as an std::string.
 // Each '\0' character in the buffer is replaced with "\\0".
 std::string Message::GetString() const {
@@ -2435,16 +2681,21 @@ std::string Message::GetString() const {
 // Used in EXPECT_TRUE/FALSE(assertion_result).
 AssertionResult::AssertionResult(const AssertionResult& other)
     : success_(other.success_),
-      message_(other.message_.get() != NULL ?
-               new ::std::string(*other.message_) :
-               static_cast< ::std::string*>(NULL)) {
+      message_(other.message_.get() != nullptr
+                   ? new ::std::string(*other.message_)
+                   : static_cast< ::std::string*>(nullptr)) {}
+
+// Swaps two AssertionResults.
+void AssertionResult::swap(AssertionResult& other) {
+  using std::swap;
+  swap(success_, other.success_);
+  swap(message_, other.message_);
 }
 
 // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
 AssertionResult AssertionResult::operator!() const {
   AssertionResult negation(!success_);
-  if (message_.get() != NULL)
-    negation << *message_;
+  if (message_.get() != nullptr) negation << *message_;
   return negation;
 }
 
@@ -2466,93 +2717,399 @@ AssertionResult AssertionFailure(const Message& message) {
 
 namespace internal {
 
-// Constructs and returns the message for an equality assertion
-// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
-//
-// The first four parameters are the expressions used in the assertion
-// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
-// where foo is 5 and bar is 6, we have:
-//
-//   expected_expression: "foo"
-//   actual_expression:   "bar"
-//   expected_value:      "5"
-//   actual_value:        "6"
-//
-// The ignoring_case parameter is true iff the assertion is a
-// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
-// be inserted into the message.
-AssertionResult EqFailure(const char* expected_expression,
-                          const char* actual_expression,
-                          const std::string& expected_value,
-                          const std::string& actual_value,
-                          bool ignoring_case) {
-  Message msg;
-  msg << "Value of: " << actual_expression;
-  if (actual_value != actual_expression) {
-    msg << "\n  Actual: " << actual_value;
-  }
+namespace edit_distance {
+std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
+                                            const std::vector<size_t>& right) {
+  std::vector<std::vector<double> > costs(
+      left.size() + 1, std::vector<double>(right.size() + 1));
+  std::vector<std::vector<EditType> > best_move(
+      left.size() + 1, std::vector<EditType>(right.size() + 1));
+
+  // Populate for empty right.
+  for (size_t l_i = 0; l_i < costs.size(); ++l_i) {
+    costs[l_i][0] = static_cast<double>(l_i);
+    best_move[l_i][0] = kRemove;
+  }
+  // Populate for empty left.
+  for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) {
+    costs[0][r_i] = static_cast<double>(r_i);
+    best_move[0][r_i] = kAdd;
+  }
+
+  for (size_t l_i = 0; l_i < left.size(); ++l_i) {
+    for (size_t r_i = 0; r_i < right.size(); ++r_i) {
+      if (left[l_i] == right[r_i]) {
+        // Found a match. Consume it.
+        costs[l_i + 1][r_i + 1] = costs[l_i][r_i];
+        best_move[l_i + 1][r_i + 1] = kMatch;
+        continue;
+      }
 
-  msg << "\nExpected: " << expected_expression;
-  if (ignoring_case) {
-    msg << " (ignoring case)";
-  }
-  if (expected_value != expected_expression) {
-    msg << "\nWhich is: " << expected_value;
+      const double add = costs[l_i + 1][r_i];
+      const double remove = costs[l_i][r_i + 1];
+      const double replace = costs[l_i][r_i];
+      if (add < remove && add < replace) {
+        costs[l_i + 1][r_i + 1] = add + 1;
+        best_move[l_i + 1][r_i + 1] = kAdd;
+      } else if (remove < add && remove < replace) {
+        costs[l_i + 1][r_i + 1] = remove + 1;
+        best_move[l_i + 1][r_i + 1] = kRemove;
+      } else {
+        // We make replace a little more expensive than add/remove to lower
+        // their priority.
+        costs[l_i + 1][r_i + 1] = replace + 1.00001;
+        best_move[l_i + 1][r_i + 1] = kReplace;
+      }
+    }
   }
 
-  return AssertionFailure() << msg;
+  // Reconstruct the best path. We do it in reverse order.
+  std::vector<EditType> best_path;
+  for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) {
+    EditType move = best_move[l_i][r_i];
+    best_path.push_back(move);
+    l_i -= move != kAdd;
+    r_i -= move != kRemove;
+  }
+  std::reverse(best_path.begin(), best_path.end());
+  return best_path;
 }
 
-// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
-std::string GetBoolAssertionFailureMessage(
-    const AssertionResult& assertion_result,
-    const char* expression_text,
-    const char* actual_predicate_value,
-    const char* expected_predicate_value) {
-  const char* actual_message = assertion_result.message();
-  Message msg;
-  msg << "Value of: " << expression_text
-      << "\n  Actual: " << actual_predicate_value;
-  if (actual_message[0] != '\0')
-    msg << " (" << actual_message << ")";
-  msg << "\nExpected: " << expected_predicate_value;
-  return msg.GetString();
-}
+namespace {
 
-// Helper function for implementing ASSERT_NEAR.
-AssertionResult DoubleNearPredFormat(const char* expr1,
-                                     const char* expr2,
-                                     const char* abs_error_expr,
-                                     double val1,
-                                     double val2,
-                                     double abs_error) {
-  const double diff = fabs(val1 - val2);
-  if (diff <= abs_error) return AssertionSuccess();
+// Helper class to convert string into ids with deduplication.
+class InternalStrings {
+ public:
+  size_t GetId(const std::string& str) {
+    IdMap::iterator it = ids_.find(str);
+    if (it != ids_.end()) return it->second;
+    size_t id = ids_.size();
+    return ids_[str] = id;
+  }
 
-  // TODO(wan): do not print the value of an expression if it's
-  // already a literal.
-  return AssertionFailure()
-      << "The difference between " << expr1 << " and " << expr2
-      << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n"
-      << expr1 << " evaluates to " << val1 << ",\n"
-      << expr2 << " evaluates to " << val2 << ", and\n"
-      << abs_error_expr << " evaluates to " << abs_error << ".";
-}
+ private:
+  typedef std::map<std::string, size_t> IdMap;
+  IdMap ids_;
+};
 
+}  // namespace
 
-// Helper template for implementing FloatLE() and DoubleLE().
-template <typename RawType>
-AssertionResult FloatingPointLE(const char* expr1,
-                                const char* expr2,
-                                RawType val1,
-                                RawType val2) {
-  // Returns success if val1 is less than val2,
-  if (val1 < val2) {
-    return AssertionSuccess();
+std::vector<EditType> CalculateOptimalEdits(
+    const std::vector<std::string>& left,
+    const std::vector<std::string>& right) {
+  std::vector<size_t> left_ids, right_ids;
+  {
+    InternalStrings intern_table;
+    for (size_t i = 0; i < left.size(); ++i) {
+      left_ids.push_back(intern_table.GetId(left[i]));
+    }
+    for (size_t i = 0; i < right.size(); ++i) {
+      right_ids.push_back(intern_table.GetId(right[i]));
+    }
   }
+  return CalculateOptimalEdits(left_ids, right_ids);
+}
 
-  // or if val1 is almost equal to val2.
-  const FloatingPoint<RawType> lhs(val1), rhs(val2);
+namespace {
+
+// Helper class that holds the state for one hunk and prints it out to the
+// stream.
+// It reorders adds/removes when possible to group all removes before all
+// adds. It also adds the hunk header before printint into the stream.
+class Hunk {
+ public:
+  Hunk(size_t left_start, size_t right_start)
+      : left_start_(left_start),
+        right_start_(right_start),
+        adds_(),
+        removes_(),
+        common_() {}
+
+  void PushLine(char edit, const char* line) {
+    switch (edit) {
+      case ' ':
+        ++common_;
+        FlushEdits();
+        hunk_.push_back(std::make_pair(' ', line));
+        break;
+      case '-':
+        ++removes_;
+        hunk_removes_.push_back(std::make_pair('-', line));
+        break;
+      case '+':
+        ++adds_;
+        hunk_adds_.push_back(std::make_pair('+', line));
+        break;
+    }
+  }
+
+  void PrintTo(std::ostream* os) {
+    PrintHeader(os);
+    FlushEdits();
+    for (std::list<std::pair<char, const char*> >::const_iterator it =
+             hunk_.begin();
+         it != hunk_.end(); ++it) {
+      *os << it->first << it->second << "\n";
+    }
+  }
+
+  bool has_edits() const { return adds_ || removes_; }
+
+ private:
+  void FlushEdits() {
+    hunk_.splice(hunk_.end(), hunk_removes_);
+    hunk_.splice(hunk_.end(), hunk_adds_);
+  }
+
+  // Print a unified diff header for one hunk.
+  // The format is
+  //   "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
+  // where the left/right parts are omitted if unnecessary.
+  void PrintHeader(std::ostream* ss) const {
+    *ss << "@@ ";
+    if (removes_) {
+      *ss << "-" << left_start_ << "," << (removes_ + common_);
+    }
+    if (removes_ && adds_) {
+      *ss << " ";
+    }
+    if (adds_) {
+      *ss << "+" << right_start_ << "," << (adds_ + common_);
+    }
+    *ss << " @@\n";
+  }
+
+  size_t left_start_, right_start_;
+  size_t adds_, removes_, common_;
+  std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
+};
+
+}  // namespace
+
+// Create a list of diff hunks in Unified diff format.
+// Each hunk has a header generated by PrintHeader above plus a body with
+// lines prefixed with ' ' for no change, '-' for deletion and '+' for
+// addition.
+// 'context' represents the desired unchanged prefix/suffix around the diff.
+// If two hunks are close enough that their contexts overlap, then they are
+// joined into one hunk.
+std::string CreateUnifiedDiff(const std::vector<std::string>& left,
+                              const std::vector<std::string>& right,
+                              size_t context) {
+  const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
+
+  size_t l_i = 0, r_i = 0, edit_i = 0;
+  std::stringstream ss;
+  while (edit_i < edits.size()) {
+    // Find first edit.
+    while (edit_i < edits.size() && edits[edit_i] == kMatch) {
+      ++l_i;
+      ++r_i;
+      ++edit_i;
+    }
+
+    // Find the first line to include in the hunk.
+    const size_t prefix_context = std::min(l_i, context);
+    Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
+    for (size_t i = prefix_context; i > 0; --i) {
+      hunk.PushLine(' ', left[l_i - i].c_str());
+    }
+
+    // Iterate the edits until we found enough suffix for the hunk or the input
+    // is over.
+    size_t n_suffix = 0;
+    for (; edit_i < edits.size(); ++edit_i) {
+      if (n_suffix >= context) {
+        // Continue only if the next hunk is very close.
+        auto it = edits.begin() + static_cast<int>(edit_i);
+        while (it != edits.end() && *it == kMatch) ++it;
+        if (it == edits.end() ||
+            static_cast<size_t>(it - edits.begin()) - edit_i >= context) {
+          // There is no next edit or it is too far away.
+          break;
+        }
+      }
+
+      EditType edit = edits[edit_i];
+      // Reset count when a non match is found.
+      n_suffix = edit == kMatch ? n_suffix + 1 : 0;
+
+      if (edit == kMatch || edit == kRemove || edit == kReplace) {
+        hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str());
+      }
+      if (edit == kAdd || edit == kReplace) {
+        hunk.PushLine('+', right[r_i].c_str());
+      }
+
+      // Advance indices, depending on edit type.
+      l_i += edit != kAdd;
+      r_i += edit != kRemove;
+    }
+
+    if (!hunk.has_edits()) {
+      // We are done. We don't want this hunk.
+      break;
+    }
+
+    hunk.PrintTo(&ss);
+  }
+  return ss.str();
+}
+
+}  // namespace edit_distance
+
+namespace {
+
+// The string representation of the values received in EqFailure() are already
+// escaped. Split them on escaped '\n' boundaries. Leave all other escaped
+// characters the same.
+std::vector<std::string> SplitEscapedString(const std::string& str) {
+  std::vector<std::string> lines;
+  size_t start = 0, end = str.size();
+  if (end > 2 && str[0] == '"' && str[end - 1] == '"') {
+    ++start;
+    --end;
+  }
+  bool escaped = false;
+  for (size_t i = start; i + 1 < end; ++i) {
+    if (escaped) {
+      escaped = false;
+      if (str[i] == 'n') {
+        lines.push_back(str.substr(start, i - start - 1));
+        start = i + 1;
+      }
+    } else {
+      escaped = str[i] == '\\';
+    }
+  }
+  lines.push_back(str.substr(start, end - start));
+  return lines;
+}
+
+}  // namespace
+
+// Constructs and returns the message for an equality assertion
+// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
+//
+// The first four parameters are the expressions used in the assertion
+// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
+// where foo is 5 and bar is 6, we have:
+//
+//   lhs_expression: "foo"
+//   rhs_expression: "bar"
+//   lhs_value:      "5"
+//   rhs_value:      "6"
+//
+// The ignoring_case parameter is true if and only if the assertion is a
+// *_STRCASEEQ*.  When it's true, the string "Ignoring case" will
+// be inserted into the message.
+AssertionResult EqFailure(const char* lhs_expression,
+                          const char* rhs_expression,
+                          const std::string& lhs_value,
+                          const std::string& rhs_value,
+                          bool ignoring_case) {
+  Message msg;
+  msg << "Expected equality of these values:";
+  msg << "\n  " << lhs_expression;
+  if (lhs_value != lhs_expression) {
+    msg << "\n    Which is: " << lhs_value;
+  }
+  msg << "\n  " << rhs_expression;
+  if (rhs_value != rhs_expression) {
+    msg << "\n    Which is: " << rhs_value;
+  }
+
+  if (ignoring_case) {
+    msg << "\nIgnoring case";
+  }
+
+  if (!lhs_value.empty() && !rhs_value.empty()) {
+    const std::vector<std::string> lhs_lines =
+        SplitEscapedString(lhs_value);
+    const std::vector<std::string> rhs_lines =
+        SplitEscapedString(rhs_value);
+    if (lhs_lines.size() > 1 || rhs_lines.size() > 1) {
+      msg << "\nWith diff:\n"
+          << edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines);
+    }
+  }
+
+  return AssertionFailure() << msg;
+}
+
+// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
+std::string GetBoolAssertionFailureMessage(
+    const AssertionResult& assertion_result,
+    const char* expression_text,
+    const char* actual_predicate_value,
+    const char* expected_predicate_value) {
+  const char* actual_message = assertion_result.message();
+  Message msg;
+  msg << "Value of: " << expression_text
+      << "\n  Actual: " << actual_predicate_value;
+  if (actual_message[0] != '\0')
+    msg << " (" << actual_message << ")";
+  msg << "\nExpected: " << expected_predicate_value;
+  return msg.GetString();
+}
+
+// Helper function for implementing ASSERT_NEAR.
+AssertionResult DoubleNearPredFormat(const char* expr1,
+                                     const char* expr2,
+                                     const char* abs_error_expr,
+                                     double val1,
+                                     double val2,
+                                     double abs_error) {
+  const double diff = fabs(val1 - val2);
+  if (diff <= abs_error) return AssertionSuccess();
+
+  // Find the value which is closest to zero.
+  const double min_abs = std::min(fabs(val1), fabs(val2));
+  // Find the distance to the next double from that value.
+  const double epsilon =
+      nextafter(min_abs, std::numeric_limits<double>::infinity()) - min_abs;
+  // Detect the case where abs_error is so small that EXPECT_NEAR is
+  // effectively the same as EXPECT_EQUAL, and give an informative error
+  // message so that the situation can be more easily understood without
+  // requiring exotic floating-point knowledge.
+  // Don't do an epsilon check if abs_error is zero because that implies
+  // that an equality check was actually intended.
+  if (!(std::isnan)(val1) && !(std::isnan)(val2) && abs_error > 0 &&
+      abs_error < epsilon) {
+    return AssertionFailure()
+           << "The difference between " << expr1 << " and " << expr2 << " is "
+           << diff << ", where\n"
+           << expr1 << " evaluates to " << val1 << ",\n"
+           << expr2 << " evaluates to " << val2 << ".\nThe abs_error parameter "
+           << abs_error_expr << " evaluates to " << abs_error
+           << " which is smaller than the minimum distance between doubles for "
+              "numbers of this magnitude which is "
+           << epsilon
+           << ", thus making this EXPECT_NEAR check equivalent to "
+              "EXPECT_EQUAL. Consider using EXPECT_DOUBLE_EQ instead.";
+  }
+  return AssertionFailure()
+      << "The difference between " << expr1 << " and " << expr2
+      << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n"
+      << expr1 << " evaluates to " << val1 << ",\n"
+      << expr2 << " evaluates to " << val2 << ", and\n"
+      << abs_error_expr << " evaluates to " << abs_error << ".";
+}
+
+
+// Helper template for implementing FloatLE() and DoubleLE().
+template <typename RawType>
+AssertionResult FloatingPointLE(const char* expr1,
+                                const char* expr2,
+                                RawType val1,
+                                RawType val2) {
+  // Returns success if val1 is less than val2,
+  if (val1 < val2) {
+    return AssertionSuccess();
+  }
+
+  // or if val1 is almost equal to val2.
+  const FloatingPoint<RawType> lhs(val1), rhs(val2);
   if (lhs.AlmostEquals(rhs)) {
     return AssertionSuccess();
   }
@@ -2593,86 +3150,35 @@ AssertionResult DoubleLE(const char* expr1, const char* expr2,
 
 namespace internal {
 
-// The helper function for {ASSERT|EXPECT}_EQ with int or enum
-// arguments.
-AssertionResult CmpHelperEQ(const char* expected_expression,
-                            const char* actual_expression,
-                            BiggestInt expected,
-                            BiggestInt actual) {
-  if (expected == actual) {
-    return AssertionSuccess();
-  }
-
-  return EqFailure(expected_expression,
-                   actual_expression,
-                   FormatForComparisonFailureMessage(expected, actual),
-                   FormatForComparisonFailureMessage(actual, expected),
-                   false);
-}
-
-// A macro for implementing the helper functions needed to implement
-// ASSERT_?? and EXPECT_?? with integer or enum arguments.  It is here
-// just to avoid copy-and-paste of similar code.
-#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
-AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
-                                   BiggestInt val1, BiggestInt val2) {\
-  if (val1 op val2) {\
-    return AssertionSuccess();\
-  } else {\
-    return AssertionFailure() \
-        << "Expected: (" << expr1 << ") " #op " (" << expr2\
-        << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
-        << " vs " << FormatForComparisonFailureMessage(val2, val1);\
-  }\
-}
-
-// Implements the helper function for {ASSERT|EXPECT}_NE with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(NE, !=)
-// Implements the helper function for {ASSERT|EXPECT}_LE with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(LE, <=)
-// Implements the helper function for {ASSERT|EXPECT}_LT with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(LT, < )
-// Implements the helper function for {ASSERT|EXPECT}_GE with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(GE, >=)
-// Implements the helper function for {ASSERT|EXPECT}_GT with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(GT, > )
-
-#undef GTEST_IMPL_CMP_HELPER_
-
 // The helper function for {ASSERT|EXPECT}_STREQ.
-AssertionResult CmpHelperSTREQ(const char* expected_expression,
-                               const char* actual_expression,
-                               const char* expected,
-                               const char* actual) {
-  if (String::CStringEquals(expected, actual)) {
+AssertionResult CmpHelperSTREQ(const char* lhs_expression,
+                               const char* rhs_expression,
+                               const char* lhs,
+                               const char* rhs) {
+  if (String::CStringEquals(lhs, rhs)) {
     return AssertionSuccess();
   }
 
-  return EqFailure(expected_expression,
-                   actual_expression,
-                   PrintToString(expected),
-                   PrintToString(actual),
+  return EqFailure(lhs_expression,
+                   rhs_expression,
+                   PrintToString(lhs),
+                   PrintToString(rhs),
                    false);
 }
 
 // The helper function for {ASSERT|EXPECT}_STRCASEEQ.
-AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
-                                   const char* actual_expression,
-                                   const char* expected,
-                                   const char* actual) {
-  if (String::CaseInsensitiveCStringEquals(expected, actual)) {
+AssertionResult CmpHelperSTRCASEEQ(const char* lhs_expression,
+                                   const char* rhs_expression,
+                                   const char* lhs,
+                                   const char* rhs) {
+  if (String::CaseInsensitiveCStringEquals(lhs, rhs)) {
     return AssertionSuccess();
   }
 
-  return EqFailure(expected_expression,
-                   actual_expression,
-                   PrintToString(expected),
-                   PrintToString(actual),
+  return EqFailure(lhs_expression,
+                   rhs_expression,
+                   PrintToString(lhs),
+                   PrintToString(rhs),
                    true);
 }
 
@@ -2711,22 +3217,20 @@ namespace {
 
 // Helper functions for implementing IsSubString() and IsNotSubstring().
 
-// This group of overloaded functions return true iff needle is a
-// substring of haystack.  NULL is considered a substring of itself
-// only.
+// This group of overloaded functions return true if and only if needle
+// is a substring of haystack.  NULL is considered a substring of
+// itself only.
 
 bool IsSubstringPred(const char* needle, const char* haystack) {
-  if (needle == NULL || haystack == NULL)
-    return needle == haystack;
+  if (needle == nullptr || haystack == nullptr) return needle == haystack;
 
-  return strstr(haystack, needle) != NULL;
+  return strstr(haystack, needle) != nullptr;
 }
 
 bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
-  if (needle == NULL || haystack == NULL)
-    return needle == haystack;
+  if (needle == nullptr || haystack == nullptr) return needle == haystack;
 
-  return wcsstr(haystack, needle) != NULL;
+  return wcsstr(haystack, needle) != nullptr;
 }
 
 // StringType here can be either ::std::string or ::std::wstring.
@@ -2824,7 +3328,7 @@ namespace {
 AssertionResult HRESULTFailureHelper(const char* expr,
                                      const char* expected,
                                      long hr) {  // NOLINT
-# if GTEST_OS_WINDOWS_MOBILE
+# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_TV_TITLE
 
   // Windows CE doesn't support FormatMessage.
   const char error_text[] = "";
@@ -2840,12 +3344,12 @@ AssertionResult HRESULTFailureHelper(const char* expr,
   // Gets the system's human readable message string for this HRESULT.
   char error_text[kBufSize] = { '\0' };
   DWORD message_length = ::FormatMessageA(kFlags,
-                                          0,  // no source, we're asking system
-                                          hr,  // the error
-                                          0,  // no line width restrictions
+                                          0,   // no source, we're asking system
+                                          static_cast<DWORD>(hr),  // the error
+                                          0,   // no line width restrictions
                                           error_text,  // output buffer
-                                          kBufSize,  // buf size
-                                          NULL);  // no arguments for inserts
+                                          kBufSize,    // buf size
+                                          nullptr);  // no arguments for inserts
   // Trims tailing white space (FormatMessage leaves a trailing CR-LF)
   for (; message_length && IsSpace(error_text[message_length - 1]);
           --message_length) {
@@ -2881,7 +3385,7 @@ AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
 // Utility functions for encoding Unicode text (wide strings) in
 // UTF-8.
 
-// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
+// A Unicode code-point can have up to 21 bits, and is encoded in UTF-8
 // like this:
 //
 // Code-point length   Encoding
@@ -2891,35 +3395,35 @@ AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
 //  17 - 21 bits       11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 
 // The maximum code-point a one-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) <<  7) - 1;
+constexpr uint32_t kMaxCodePoint1 = (static_cast<uint32_t>(1) <<  7) - 1;
 
 // The maximum code-point a two-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1;
+constexpr uint32_t kMaxCodePoint2 = (static_cast<uint32_t>(1) << (5 + 6)) - 1;
 
 // The maximum code-point a three-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2*6)) - 1;
+constexpr uint32_t kMaxCodePoint3 = (static_cast<uint32_t>(1) << (4 + 2*6)) - 1;
 
 // The maximum code-point a four-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3*6)) - 1;
+constexpr uint32_t kMaxCodePoint4 = (static_cast<uint32_t>(1) << (3 + 3*6)) - 1;
 
 // Chops off the n lowest bits from a bit pattern.  Returns the n
 // lowest bits.  As a side effect, the original bit pattern will be
 // shifted to the right by n bits.
-inline UInt32 ChopLowBits(UInt32* bits, int n) {
-  const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1);
+inline uint32_t ChopLowBits(uint32_t* bits, int n) {
+  const uint32_t low_bits = *bits & ((static_cast<uint32_t>(1) << n) - 1);
   *bits >>= n;
   return low_bits;
 }
 
 // Converts a Unicode code point to a narrow string in UTF-8 encoding.
-// code_point parameter is of type UInt32 because wchar_t may not be
+// code_point parameter is of type uint32_t because wchar_t may not be
 // wide enough to contain a code point.
 // If the code_point is not a valid Unicode code point
 // (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
 // to "(Invalid Unicode 0xXXXXXXXX)".
-std::string CodePointToUtf8(UInt32 code_point) {
+std::string CodePointToUtf8(uint32_t code_point) {
   if (code_point > kMaxCodePoint4) {
-    return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")";
+    return "(Invalid Unicode 0x" + String::FormatHexUInt32(code_point) + ")";
   }
 
   char str[5];  // Big enough for the largest valid code point.
@@ -2945,9 +3449,9 @@ std::string CodePointToUtf8(UInt32 code_point) {
   return str;
 }
 
-// The following two functions only make sense if the the system
+// The following two functions only make sense if the system
 // uses UTF-16 for wide string encoding. All supported systems
-// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16.
+// with 16 bit wchar_t (Windows, Cygwin) do use UTF-16.
 
 // Determines if the arguments constitute UTF-16 surrogate pair
 // and thus should be combined into a single Unicode code point
@@ -2958,19 +3462,22 @@ inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) {
 }
 
 // Creates a Unicode code point from UTF16 surrogate pair.
-inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first,
-                                                    wchar_t second) {
-  const UInt32 mask = (1 << 10) - 1;
-  return (sizeof(wchar_t) == 2) ?
-      (((first & mask) << 10) | (second & mask)) + 0x10000 :
-      // This function should not be called when the condition is
-      // false, but we provide a sensible default in case it is.
-      static_cast<UInt32>(first);
+inline uint32_t CreateCodePointFromUtf16SurrogatePair(wchar_t first,
+                                                      wchar_t second) {
+  const auto first_u = static_cast<uint32_t>(first);
+  const auto second_u = static_cast<uint32_t>(second);
+  const uint32_t mask = (1 << 10) - 1;
+  return (sizeof(wchar_t) == 2)
+             ? (((first_u & mask) << 10) | (second_u & mask)) + 0x10000
+             :
+             // This function should not be called when the condition is
+             // false, but we provide a sensible default in case it is.
+             first_u;
 }
 
 // Converts a wide string to a narrow string in UTF-8 encoding.
 // The wide string is assumed to have the following encoding:
-//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
+//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin)
 //   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
 // Parameter str points to a null-terminated wide string.
 // Parameter num_chars may additionally limit the number
@@ -2987,7 +3494,7 @@ std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
 
   ::std::stringstream stream;
   for (int i = 0; i < num_chars; ++i) {
-    UInt32 unicode_code_point;
+    uint32_t unicode_code_point;
 
     if (str[i] == L'\0') {
       break;
@@ -2996,7 +3503,7 @@ std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
                                                                  str[i + 1]);
       i++;
     } else {
-      unicode_code_point = static_cast<UInt32>(str[i]);
+      unicode_code_point = static_cast<uint32_t>(str[i]);
     }
 
     stream << CodePointToUtf8(unicode_code_point);
@@ -3007,38 +3514,38 @@ std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
 // Converts a wide C string to an std::string using the UTF-8 encoding.
 // NULL will be converted to "(null)".
 std::string String::ShowWideCString(const wchar_t * wide_c_str) {
-  if (wide_c_str == NULL)  return "(null)";
+  if (wide_c_str == nullptr) return "(null)";
 
   return internal::WideStringToUtf8(wide_c_str, -1);
 }
 
-// Compares two wide C strings.  Returns true iff they have the same
-// content.
+// Compares two wide C strings.  Returns true if and only if they have the
+// same content.
 //
 // Unlike wcscmp(), this function can handle NULL argument(s).  A NULL
 // C string is considered different to any non-NULL C string,
 // including the empty string.
 bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) {
-  if (lhs == NULL) return rhs == NULL;
+  if (lhs == nullptr) return rhs == nullptr;
 
-  if (rhs == NULL) return false;
+  if (rhs == nullptr) return false;
 
   return wcscmp(lhs, rhs) == 0;
 }
 
 // Helper function for *_STREQ on wide strings.
-AssertionResult CmpHelperSTREQ(const char* expected_expression,
-                               const char* actual_expression,
-                               const wchar_t* expected,
-                               const wchar_t* actual) {
-  if (String::WideCStringEquals(expected, actual)) {
+AssertionResult CmpHelperSTREQ(const char* lhs_expression,
+                               const char* rhs_expression,
+                               const wchar_t* lhs,
+                               const wchar_t* rhs) {
+  if (String::WideCStringEquals(lhs, rhs)) {
     return AssertionSuccess();
   }
 
-  return EqFailure(expected_expression,
-                   actual_expression,
-                   PrintToString(expected),
-                   PrintToString(actual),
+  return EqFailure(lhs_expression,
+                   rhs_expression,
+                   PrintToString(lhs),
+                   PrintToString(rhs),
                    false);
 }
 
@@ -3057,37 +3564,35 @@ AssertionResult CmpHelperSTRNE(const char* s1_expression,
                             << " vs " << PrintToString(s2);
 }
 
-// Compares two C strings, ignoring case.  Returns true iff they have
+// Compares two C strings, ignoring case.  Returns true if and only if they have
 // the same content.
 //
 // Unlike strcasecmp(), this function can handle NULL argument(s).  A
 // NULL C string is considered different to any non-NULL C string,
 // including the empty string.
 bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) {
-  if (lhs == NULL)
-    return rhs == NULL;
-  if (rhs == NULL)
-    return false;
+  if (lhs == nullptr) return rhs == nullptr;
+  if (rhs == nullptr) return false;
   return posix::StrCaseCmp(lhs, rhs) == 0;
 }
 
-  // Compares two wide C strings, ignoring case.  Returns true iff they
-  // have the same content.
-  //
-  // Unlike wcscasecmp(), this function can handle NULL argument(s).
-  // A NULL C string is considered different to any non-NULL wide C string,
-  // including the empty string.
-  // NB: The implementations on different platforms slightly differ.
-  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
-  // environment variable. On GNU platform this method uses wcscasecmp
-  // which compares according to LC_CTYPE category of the current locale.
-  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
-  // current locale.
+// Compares two wide C strings, ignoring case.  Returns true if and only if they
+// have the same content.
+//
+// Unlike wcscasecmp(), this function can handle NULL argument(s).
+// A NULL C string is considered different to any non-NULL wide C string,
+// including the empty string.
+// NB: The implementations on different platforms slightly differ.
+// On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+// environment variable. On GNU platform this method uses wcscasecmp
+// which compares according to LC_CTYPE category of the current locale.
+// On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+// current locale.
 bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
                                               const wchar_t* rhs) {
-  if (lhs == NULL) return rhs == NULL;
+  if (lhs == nullptr) return rhs == nullptr;
 
-  if (rhs == NULL) return false;
+  if (rhs == nullptr) return false;
 
 #if GTEST_OS_WINDOWS
   return _wcsicmp(lhs, rhs) == 0;
@@ -3098,14 +3603,14 @@ bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
   // Other unknown OSes may not define it either.
   wint_t left, right;
   do {
-    left = towlower(*lhs++);
-    right = towlower(*rhs++);
+    left = towlower(static_cast<wint_t>(*lhs++));
+    right = towlower(static_cast<wint_t>(*rhs++));
   } while (left && left == right);
   return left == right;
 #endif  // OS selector
 }
 
-// Returns true iff str ends with the given suffix, ignoring case.
+// Returns true if and only if str ends with the given suffix, ignoring case.
 // Any string is considered to end with an empty suffix.
 bool String::EndsWithCaseInsensitive(
     const std::string& str, const std::string& suffix) {
@@ -3118,18 +3623,28 @@ bool String::EndsWithCaseInsensitive(
 
 // Formats an int value as "%02d".
 std::string String::FormatIntWidth2(int value) {
+  return FormatIntWidthN(value, 2);
+}
+
+// Formats an int value to given width with leading zeros.
+std::string String::FormatIntWidthN(int value, int width) {
   std::stringstream ss;
-  ss << std::setfill('0') << std::setw(2) << value;
+  ss << std::setfill('0') << std::setw(width) << value;
   return ss.str();
 }
 
 // Formats an int value as "%X".
-std::string String::FormatHexInt(int value) {
+std::string String::FormatHexUInt32(uint32_t value) {
   std::stringstream ss;
   ss << std::hex << std::uppercase << value;
   return ss.str();
 }
 
+// Formats an int value as "%X".
+std::string String::FormatHexInt(int value) {
+  return FormatHexUInt32(static_cast<uint32_t>(value));
+}
+
 // Formats a byte as "%02X".
 std::string String::FormatByte(unsigned char value) {
   std::stringstream ss;
@@ -3146,7 +3661,7 @@ std::string StringStreamToString(::std::stringstream* ss) {
   const char* const end = start + str.length();
 
   std::string result;
-  result.reserve(2 * (end - start));
+  result.reserve(static_cast<size_t>(2 * (end - start)));
   for (const char* ch = start; ch != end; ++ch) {
     if (*ch == '\0') {
       result += "\\0";  // Replaces NUL with "\\0";
@@ -3166,7 +3681,9 @@ std::string AppendUserMessage(const std::string& gtest_msg,
   if (user_msg_string.empty()) {
     return gtest_msg;
   }
-
+  if (gtest_msg.empty()) {
+    return user_msg_string;
+  }
   return gtest_msg + "\n" + user_msg_string;
 }
 
@@ -3176,9 +3693,7 @@ std::string AppendUserMessage(const std::string& gtest_msg,
 
 // Creates an empty TestResult.
 TestResult::TestResult()
-    : death_test_count_(0),
-      elapsed_time_(0) {
-}
+    : death_test_count_(0), start_timestamp_(0), elapsed_time_(0) {}
 
 // D'tor.
 TestResult::~TestResult() {
@@ -3190,7 +3705,7 @@ TestResult::~TestResult() {
 const TestPartResult& TestResult::GetTestPartResult(int i) const {
   if (i < 0 || i >= total_part_count())
     internal::posix::Abort();
-  return test_part_results_.at(i);
+  return test_part_results_.at(static_cast<size_t>(i));
 }
 
 // Returns the i-th test property. i can range from 0 to
@@ -3199,7 +3714,7 @@ const TestPartResult& TestResult::GetTestPartResult(int i) const {
 const TestProperty& TestResult::GetTestProperty(int i) const {
   if (i < 0 || i >= test_property_count())
     internal::posix::Abort();
-  return test_properties_.at(i);
+  return test_properties_.at(static_cast<size_t>(i));
 }
 
 // Clears the test part results.
@@ -3220,7 +3735,7 @@ void TestResult::RecordProperty(const std::string& xml_element,
   if (!ValidateTestProperty(xml_element, test_property)) {
     return;
   }
-  internal::MutexLock lock(&test_properites_mutex_);
+  internal::MutexLock lock(&test_properties_mutex_);
   const std::vector<TestProperty>::iterator property_with_matching_key =
       std::find_if(test_properties_.begin(), test_properties_.end(),
                    internal::TestPropertyKeyIs(test_property.key()));
@@ -3247,25 +3762,21 @@ static const char* const kReservedTestSuitesAttributes[] = {
 // The list of reserved attributes used in the <testsuite> element of XML
 // output.
 static const char* const kReservedTestSuiteAttributes[] = {
-  "disabled",
-  "errors",
-  "failures",
-  "name",
-  "tests",
-  "time"
-};
+    "disabled", "errors", "failures",  "name",
+    "tests",    "time",   "timestamp", "skipped"};
 
 // The list of reserved attributes used in the <testcase> element of XML output.
 static const char* const kReservedTestCaseAttributes[] = {
-  "classname",
-  "name",
-  "status",
-  "time",
-  "type_param",
-  "value_param"
-};
+    "classname",   "name", "status", "time",  "type_param",
+    "value_param", "file", "line"};
 
-template <int kSize>
+// Use a slightly different set for allowed output to ensure existing tests can
+// still RecordProperty("result") or "RecordProperty(timestamp")
+static const char* const kReservedOutputTestCaseAttributes[] = {
+    "classname",   "name", "status", "time",   "type_param",
+    "value_param", "file", "line",   "result", "timestamp"};
+
+template <size_t kSize>
 std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
   return std::vector<std::string>(array, array + kSize);
 }
@@ -3285,6 +3796,22 @@ static std::vector<std::string> GetReservedAttributesForElement(
   return std::vector<std::string>();
 }
 
+// TODO(jdesprez): Merge the two getReserved attributes once skip is improved
+static std::vector<std::string> GetReservedOutputAttributesForElement(
+    const std::string& xml_element) {
+  if (xml_element == "testsuites") {
+    return ArrayAsVector(kReservedTestSuitesAttributes);
+  } else if (xml_element == "testsuite") {
+    return ArrayAsVector(kReservedTestSuiteAttributes);
+  } else if (xml_element == "testcase") {
+    return ArrayAsVector(kReservedOutputTestCaseAttributes);
+  } else {
+    GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
+  }
+  // This code is unreachable but some compilers may not realizes that.
+  return std::vector<std::string>();
+}
+
 static std::string FormatWordList(const std::vector<std::string>& words) {
   Message word_list;
   for (size_t i = 0; i < words.size(); ++i) {
@@ -3299,8 +3826,9 @@ static std::string FormatWordList(const std::vector<std::string>& words) {
   return word_list.GetString();
 }
 
-bool ValidateTestPropertyName(const std::string& property_name,
-                              const std::vector<std::string>& reserved_names) {
+static bool ValidateTestPropertyName(
+    const std::string& property_name,
+    const std::vector<std::string>& reserved_names) {
   if (std::find(reserved_names.begin(), reserved_names.end(), property_name) !=
           reserved_names.end()) {
     ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name
@@ -3327,7 +3855,17 @@ void TestResult::Clear() {
   elapsed_time_ = 0;
 }
 
-// Returns true iff the test failed.
+// Returns true off the test part was skipped.
+static bool TestPartSkipped(const TestPartResult& result) {
+  return result.skipped();
+}
+
+// Returns true if and only if the test was skipped.
+bool TestResult::Skipped() const {
+  return !Failed() && CountIf(test_part_results_, TestPartSkipped) > 0;
+}
+
+// Returns true if and only if the test failed.
 bool TestResult::Failed() const {
   for (int i = 0; i < total_part_count(); ++i) {
     if (GetTestPartResult(i).failed())
@@ -3336,22 +3874,22 @@ bool TestResult::Failed() const {
   return false;
 }
 
-// Returns true iff the test part fatally failed.
+// Returns true if and only if the test part fatally failed.
 static bool TestPartFatallyFailed(const TestPartResult& result) {
   return result.fatally_failed();
 }
 
-// Returns true iff the test fatally failed.
+// Returns true if and only if the test fatally failed.
 bool TestResult::HasFatalFailure() const {
   return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
 }
 
-// Returns true iff the test part non-fatally failed.
+// Returns true if and only if the test part non-fatally failed.
 static bool TestPartNonfatallyFailed(const TestPartResult& result) {
   return result.nonfatally_failed();
 }
 
-// Returns true iff the test has a non-fatal failure.
+// Returns true if and only if the test has a non-fatal failure.
 bool TestResult::HasNonfatalFailure() const {
   return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
 }
@@ -3371,14 +3909,15 @@ int TestResult::test_property_count() const {
 
 // Creates a Test object.
 
-// The c'tor saves the values of all Google Test flags.
+// The c'tor saves the states of all flags.
 Test::Test()
-    : gtest_flag_saver_(new internal::GTestFlagSaver) {
+    : gtest_flag_saver_(new GTEST_FLAG_SAVER_) {
 }
 
-// The d'tor restores the values of all Google Test flags.
+// The d'tor restores the states of all flags.  The actual work is
+// done by the d'tor of the gtest_flag_saver_ field, and thus not
+// visible here.
 Test::~Test() {
-  delete gtest_flag_saver_;
 }
 
 // Sets up the test fixture.
@@ -3413,25 +3952,25 @@ void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
   // AddTestPartResult.
   UnitTest::GetInstance()->AddTestPartResult(
       result_type,
-      NULL,  // No info about the source file where the exception occurred.
-      -1,    // We have no info on which line caused the exception.
+      nullptr,  // No info about the source file where the exception occurred.
+      -1,       // We have no info on which line caused the exception.
       message,
-      "");   // No stack trace, either.
+      "");  // No stack trace, either.
 }
 
 }  // namespace internal
 
-// Google Test requires all tests in the same test case to use the same test
+// Google Test requires all tests in the same test suite to use the same test
 // fixture class.  This function checks if the current test has the
-// same fixture class as the first test in the current test case.  If
+// same fixture class as the first test in the current test suite.  If
 // yes, it returns true; otherwise it generates a Google Test failure and
 // returns false.
 bool Test::HasSameFixtureClass() {
   internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  const TestCase* const test_case = impl->current_test_case();
+  const TestSuite* const test_suite = impl->current_test_suite();
 
-  // Info about the first test in the current test case.
-  const TestInfo* const first_test_info = test_case->test_info_list()[0];
+  // Info about the first test in the current test suite.
+  const TestInfo* const first_test_info = test_suite->test_info_list()[0];
   const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_;
   const char* const first_test_name = first_test_info->name();
 
@@ -3447,8 +3986,8 @@ bool Test::HasSameFixtureClass() {
     const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId();
 
     if (first_is_TEST || this_is_TEST) {
-      // The user mixed TEST and TEST_F in this test case - we'll tell
-      // him/her how to fix it.
+      // Both TEST and TEST_F appear in same test suite, which is incorrect.
+      // Tell the user how to fix this.
 
       // Gets the name of the TEST and the name of the TEST_F.  Note
       // that first_is_TEST and this_is_TEST cannot both be true, as
@@ -3459,27 +3998,27 @@ bool Test::HasSameFixtureClass() {
           first_is_TEST ? this_test_name : first_test_name;
 
       ADD_FAILURE()
-          << "All tests in the same test case must use the same test fixture\n"
-          << "class, so mixing TEST_F and TEST in the same test case is\n"
-          << "illegal.  In test case " << this_test_info->test_case_name()
+          << "All tests in the same test suite must use the same test fixture\n"
+          << "class, so mixing TEST_F and TEST in the same test suite is\n"
+          << "illegal.  In test suite " << this_test_info->test_suite_name()
           << ",\n"
           << "test " << TEST_F_name << " is defined using TEST_F but\n"
           << "test " << TEST_name << " is defined using TEST.  You probably\n"
           << "want to change the TEST to TEST_F or move it to another test\n"
           << "case.";
     } else {
-      // The user defined two fixture classes with the same name in
-      // two namespaces - we'll tell him/her how to fix it.
+      // Two fixture classes with the same name appear in two different
+      // namespaces, which is not allowed. Tell the user how to fix this.
       ADD_FAILURE()
-          << "All tests in the same test case must use the same test fixture\n"
-          << "class.  However, in test case "
-          << this_test_info->test_case_name() << ",\n"
-          << "you defined test " << first_test_name
-          << " and test " << this_test_name << "\n"
+          << "All tests in the same test suite must use the same test fixture\n"
+          << "class.  However, in test suite "
+          << this_test_info->test_suite_name() << ",\n"
+          << "you defined test " << first_test_name << " and test "
+          << this_test_name << "\n"
           << "using two different test fixture classes.  This can happen if\n"
           << "the two classes are from different namespaces or translation\n"
           << "units and have the same name.  You should probably rename one\n"
-          << "of the classes to put the tests into different test cases.";
+          << "of the classes to put the tests into different test suites.";
     }
     return false;
   }
@@ -3512,7 +4051,7 @@ namespace internal {
 static std::string FormatCxxExceptionMessage(const char* description,
                                              const char* location) {
   Message message;
-  if (description != NULL) {
+  if (description != nullptr) {
     message << "C++ exception with description \"" << description << "\"";
   } else {
     message << "Unknown C++ exception";
@@ -3596,6 +4135,8 @@ Result HandleExceptionsInMethodIfSupported(
 #if GTEST_HAS_EXCEPTIONS
     try {
       return HandleSehExceptionsInMethodIfSupported(object, method, location);
+    } catch (const AssertionException&) {  // NOLINT
+      // This failure was reported already.
     } catch (const internal::GoogleTestFailureException&) {  // NOLINT
       // This exception type can only be thrown by a failed Google
       // Test assertion with the intention of letting another testing
@@ -3608,7 +4149,7 @@ Result HandleExceptionsInMethodIfSupported(
     } catch (...) {  // NOLINT
       internal::ReportFailureInUnknownLocation(
           TestPartResult::kFatalFailure,
-          FormatCxxExceptionMessage(NULL, location));
+          FormatCxxExceptionMessage(nullptr, location));
     }
     return static_cast<Result>(0);
 #else
@@ -3628,8 +4169,9 @@ void Test::Run() {
   internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
   impl->os_stack_trace_getter()->UponLeavingGTest();
   internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()");
-  // We will run the test only if SetUp() was successful.
-  if (!HasFatalFailure()) {
+  // We will run the test only if SetUp() was successful and didn't call
+  // GTEST_SKIP().
+  if (!HasFatalFailure() && !IsSkipped()) {
     impl->os_stack_trace_getter()->UponLeavingGTest();
     internal::HandleExceptionsInMethodIfSupported(
         this, &Test::TestBody, "the test body");
@@ -3643,35 +4185,42 @@ void Test::Run() {
       this, &Test::TearDown, "TearDown()");
 }
 
-// Returns true iff the current test has a fatal failure.
+// Returns true if and only if the current test has a fatal failure.
 bool Test::HasFatalFailure() {
   return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
 }
 
-// Returns true iff the current test has a non-fatal failure.
+// Returns true if and only if the current test has a non-fatal failure.
 bool Test::HasNonfatalFailure() {
   return internal::GetUnitTestImpl()->current_test_result()->
       HasNonfatalFailure();
 }
 
+// Returns true if and only if the current test was skipped.
+bool Test::IsSkipped() {
+  return internal::GetUnitTestImpl()->current_test_result()->Skipped();
+}
+
 // class TestInfo
 
 // Constructs a TestInfo object. It assumes ownership of the test factory
 // object.
-TestInfo::TestInfo(const std::string& a_test_case_name,
-                   const std::string& a_name,
-                   const char* a_type_param,
+TestInfo::TestInfo(const std::string& a_test_suite_name,
+                   const std::string& a_name, const char* a_type_param,
                    const char* a_value_param,
+                   internal::CodeLocation a_code_location,
                    internal::TypeId fixture_class_id,
                    internal::TestFactoryBase* factory)
-    : test_case_name_(a_test_case_name),
+    : test_suite_name_(a_test_suite_name),
       name_(a_name),
-      type_param_(a_type_param ? new std::string(a_type_param) : NULL),
-      value_param_(a_value_param ? new std::string(a_value_param) : NULL),
+      type_param_(a_type_param ? new std::string(a_type_param) : nullptr),
+      value_param_(a_value_param ? new std::string(a_value_param) : nullptr),
+      location_(a_code_location),
       fixture_class_id_(fixture_class_id),
       should_run_(false),
       is_disabled_(false),
       matches_filter_(false),
+      is_in_another_shard_(false),
       factory_(factory),
       result_() {}
 
@@ -3685,53 +4234,48 @@ namespace internal {
 //
 // Arguments:
 //
-//   test_case_name:   name of the test case
+//   test_suite_name:  name of the test suite
 //   name:             name of the test
 //   type_param:       the name of the test's type parameter, or NULL if
 //                     this is not a typed or a type-parameterized test.
 //   value_param:      text representation of the test's value parameter,
 //                     or NULL if this is not a value-parameterized test.
+//   code_location:    code location where the test is defined
 //   fixture_class_id: ID of the test fixture class
-//   set_up_tc:        pointer to the function that sets up the test case
-//   tear_down_tc:     pointer to the function that tears down the test case
+//   set_up_tc:        pointer to the function that sets up the test suite
+//   tear_down_tc:     pointer to the function that tears down the test suite
 //   factory:          pointer to the factory that creates a test object.
 //                     The newly created TestInfo instance will assume
 //                     ownership of the factory object.
 TestInfo* MakeAndRegisterTestInfo(
-    const char* test_case_name,
-    const char* name,
-    const char* type_param,
-    const char* value_param,
-    TypeId fixture_class_id,
-    SetUpTestCaseFunc set_up_tc,
-    TearDownTestCaseFunc tear_down_tc,
-    TestFactoryBase* factory) {
+    const char* test_suite_name, const char* name, const char* type_param,
+    const char* value_param, CodeLocation code_location,
+    TypeId fixture_class_id, SetUpTestSuiteFunc set_up_tc,
+    TearDownTestSuiteFunc tear_down_tc, TestFactoryBase* factory) {
   TestInfo* const test_info =
-      new TestInfo(test_case_name, name, type_param, value_param,
-                   fixture_class_id, factory);
+      new TestInfo(test_suite_name, name, type_param, value_param,
+                   code_location, fixture_class_id, factory);
   GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
   return test_info;
 }
 
-#if GTEST_HAS_PARAM_TEST
-void ReportInvalidTestCaseType(const char* test_case_name,
-                               const char* file, int line) {
+void ReportInvalidTestSuiteType(const char* test_suite_name,
+                                CodeLocation code_location) {
   Message errors;
   errors
-      << "Attempted redefinition of test case " << test_case_name << ".\n"
-      << "All tests in the same test case must use the same test fixture\n"
-      << "class.  However, in test case " << test_case_name << ", you tried\n"
+      << "Attempted redefinition of test suite " << test_suite_name << ".\n"
+      << "All tests in the same test suite must use the same test fixture\n"
+      << "class.  However, in test suite " << test_suite_name << ", you tried\n"
       << "to define a test using a fixture class different from the one\n"
       << "used earlier. This can happen if the two fixture classes are\n"
       << "from different namespaces and have the same name. You should\n"
       << "probably rename one of the classes to put the tests into different\n"
-      << "test cases.";
+      << "test suites.";
 
-  fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
-          errors.GetString().c_str());
+  GTEST_LOG_(ERROR) << FormatFileLocation(code_location.file.c_str(),
+                                          code_location.line)
+                    << " " << errors.GetString();
 }
-#endif  // GTEST_HAS_PARAM_TEST
-
 }  // namespace internal
 
 namespace {
@@ -3739,44 +4283,43 @@ namespace {
 // A predicate that checks the test name of a TestInfo against a known
 // value.
 //
-// This is used for implementation of the TestCase class only.  We put
+// This is used for implementation of the TestSuite class only.  We put
 // it in the anonymous namespace to prevent polluting the outer
 // namespace.
 //
 // TestNameIs is copyable.
+class TestNameIs {
+ public:
+  // Constructor.
+  //
+  // TestNameIs has NO default constructor.
+  explicit TestNameIs(const char* name)
+      : name_(name) {}
+#if defined(__EDG__)
+#pragma diag_suppress declared_but_not_referenced
+#endif
+  // Returns true if and only if the test name of test_info matches name_.
+  bool operator()(const TestInfo * test_info) const {
+    return test_info && test_info->name() == name_;
+  }
 
-//Commenting out this class since its not used and wherefor produces warnings
-// class TestNameIs {
-// public:
-//  // Constructor.
-//  //
-//  // TestNameIs has NO default constructor.
-//  explicit TestNameIs(const char* name)
-//      : name_(name) {}
-//
-//  // Returns true iff the test name of test_info matches name_.
-//  bool operator()(const TestInfo * test_info) const {
-//    return test_info && test_info->name() == name_;
-//  }
-//
-// private:
-//  std::string name_;
-//};
+ private:
+  std::string name_;
+};
 
 }  // namespace
 
 namespace internal {
 
 // This method expands all parameterized tests registered with macros TEST_P
-// and INSTANTIATE_TEST_CASE_P into regular tests and registers those.
+// and INSTANTIATE_TEST_SUITE_P into regular tests and registers those.
 // This will be done just once during the program runtime.
 void UnitTestImpl::RegisterParameterizedTests() {
-#if GTEST_HAS_PARAM_TEST
   if (!parameterized_tests_registered_) {
     parameterized_test_registry_.RegisterTests();
+    type_parameterized_test_registry_.CheckForInstantiations();
     parameterized_tests_registered_ = true;
   }
-#endif
 }
 
 }  // namespace internal
@@ -3795,7 +4338,8 @@ void TestInfo::Run() {
   // Notifies the unit test event listeners that a test is about to start.
   repeater->OnTestStart(*this);
 
-  const TimeInMillis start = internal::GetTimeInMillis();
+  result_.set_start_timestamp(internal::GetTimeInMillis());
+  internal::Timer timer;
 
   impl->os_stack_trace_getter()->UponLeavingGTest();
 
@@ -3804,154 +4348,233 @@ void TestInfo::Run() {
       factory_, &internal::TestFactoryBase::CreateTest,
       "the test fixture's constructor");
 
-  // Runs the test only if the test object was created and its
-  // constructor didn't generate a fatal failure.
-  if ((test != NULL) && !Test::HasFatalFailure()) {
+  // Runs the test if the constructor didn't generate a fatal failure or invoke
+  // GTEST_SKIP().
+  // Note that the object will not be null
+  if (!Test::HasFatalFailure() && !Test::IsSkipped()) {
     // This doesn't throw as all user code that can throw are wrapped into
     // exception handling code.
     test->Run();
   }
 
-  // Deletes the test object.
-  impl->os_stack_trace_getter()->UponLeavingGTest();
-  internal::HandleExceptionsInMethodIfSupported(
-      test, &Test::DeleteSelf_, "the test fixture's destructor");
+  if (test != nullptr) {
+    // Deletes the test object.
+    impl->os_stack_trace_getter()->UponLeavingGTest();
+    internal::HandleExceptionsInMethodIfSupported(
+        test, &Test::DeleteSelf_, "the test fixture's destructor");
+  }
 
-  result_.set_elapsed_time(internal::GetTimeInMillis() - start);
+  result_.set_elapsed_time(timer.Elapsed());
 
   // Notifies the unit test event listener that a test has just finished.
   repeater->OnTestEnd(*this);
 
   // Tells UnitTest to stop associating assertion results to this
   // test.
-  impl->set_current_test_info(NULL);
+  impl->set_current_test_info(nullptr);
+}
+
+// Skip and records a skipped test result for this object.
+void TestInfo::Skip() {
+  if (!should_run_) return;
+
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  impl->set_current_test_info(this);
+
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+  // Notifies the unit test event listeners that a test is about to start.
+  repeater->OnTestStart(*this);
+
+  const TestPartResult test_part_result =
+      TestPartResult(TestPartResult::kSkip, this->file(), this->line(), "");
+  impl->GetTestPartResultReporterForCurrentThread()->ReportTestPartResult(
+      test_part_result);
+
+  // Notifies the unit test event listener that a test has just finished.
+  repeater->OnTestEnd(*this);
+  impl->set_current_test_info(nullptr);
 }
 
-// class TestCase
+// class TestSuite
 
-// Gets the number of successful tests in this test case.
-int TestCase::successful_test_count() const {
+// Gets the number of successful tests in this test suite.
+int TestSuite::successful_test_count() const {
   return CountIf(test_info_list_, TestPassed);
 }
 
-// Gets the number of failed tests in this test case.
-int TestCase::failed_test_count() const {
+// Gets the number of successful tests in this test suite.
+int TestSuite::skipped_test_count() const {
+  return CountIf(test_info_list_, TestSkipped);
+}
+
+// Gets the number of failed tests in this test suite.
+int TestSuite::failed_test_count() const {
   return CountIf(test_info_list_, TestFailed);
 }
 
 // Gets the number of disabled tests that will be reported in the XML report.
-int TestCase::reportable_disabled_test_count() const {
+int TestSuite::reportable_disabled_test_count() const {
   return CountIf(test_info_list_, TestReportableDisabled);
 }
 
-// Gets the number of disabled tests in this test case.
-int TestCase::disabled_test_count() const {
+// Gets the number of disabled tests in this test suite.
+int TestSuite::disabled_test_count() const {
   return CountIf(test_info_list_, TestDisabled);
 }
 
 // Gets the number of tests to be printed in the XML report.
-int TestCase::reportable_test_count() const {
+int TestSuite::reportable_test_count() const {
   return CountIf(test_info_list_, TestReportable);
 }
 
-// Get the number of tests in this test case that should run.
-int TestCase::test_to_run_count() const {
+// Get the number of tests in this test suite that should run.
+int TestSuite::test_to_run_count() const {
   return CountIf(test_info_list_, ShouldRunTest);
 }
 
 // Gets the number of all tests.
-int TestCase::total_test_count() const {
+int TestSuite::total_test_count() const {
   return static_cast<int>(test_info_list_.size());
 }
 
-// Creates a TestCase with the given name.
+// Creates a TestSuite with the given name.
 //
 // Arguments:
 //
-//   name:         name of the test case
-//   a_type_param: the name of the test case's type parameter, or NULL if
-//                 this is not a typed or a type-parameterized test case.
-//   set_up_tc:    pointer to the function that sets up the test case
-//   tear_down_tc: pointer to the function that tears down the test case
-TestCase::TestCase(const char* a_name, const char* a_type_param,
-                   Test::SetUpTestCaseFunc set_up_tc,
-                   Test::TearDownTestCaseFunc tear_down_tc)
+//   a_name:       name of the test suite
+//   a_type_param: the name of the test suite's type parameter, or NULL if
+//                 this is not a typed or a type-parameterized test suite.
+//   set_up_tc:    pointer to the function that sets up the test suite
+//   tear_down_tc: pointer to the function that tears down the test suite
+TestSuite::TestSuite(const char* a_name, const char* a_type_param,
+                     internal::SetUpTestSuiteFunc set_up_tc,
+                     internal::TearDownTestSuiteFunc tear_down_tc)
     : name_(a_name),
-      type_param_(a_type_param ? new std::string(a_type_param) : NULL),
+      type_param_(a_type_param ? new std::string(a_type_param) : nullptr),
       set_up_tc_(set_up_tc),
       tear_down_tc_(tear_down_tc),
       should_run_(false),
-      elapsed_time_(0) {
-}
+      start_timestamp_(0),
+      elapsed_time_(0) {}
 
-// Destructor of TestCase.
-TestCase::~TestCase() {
+// Destructor of TestSuite.
+TestSuite::~TestSuite() {
   // Deletes every Test in the collection.
   ForEach(test_info_list_, internal::Delete<TestInfo>);
 }
 
 // Returns the i-th test among all the tests. i can range from 0 to
 // total_test_count() - 1. If i is not in that range, returns NULL.
-const TestInfo* TestCase::GetTestInfo(int i) const {
+const TestInfo* TestSuite::GetTestInfo(int i) const {
   const int index = GetElementOr(test_indices_, i, -1);
-  return index < 0 ? NULL : test_info_list_[index];
+  return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
 }
 
 // Returns the i-th test among all the tests. i can range from 0 to
 // total_test_count() - 1. If i is not in that range, returns NULL.
-TestInfo* TestCase::GetMutableTestInfo(int i) {
+TestInfo* TestSuite::GetMutableTestInfo(int i) {
   const int index = GetElementOr(test_indices_, i, -1);
-  return index < 0 ? NULL : test_info_list_[index];
+  return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
 }
 
-// Adds a test to this test case.  Will delete the test upon
-// destruction of the TestCase object.
-void TestCase::AddTestInfo(TestInfo * test_info) {
+// Adds a test to this test suite.  Will delete the test upon
+// destruction of the TestSuite object.
+void TestSuite::AddTestInfo(TestInfo* test_info) {
   test_info_list_.push_back(test_info);
   test_indices_.push_back(static_cast<int>(test_indices_.size()));
 }
 
-// Runs every test in this TestCase.
-void TestCase::Run() {
+// Runs every test in this TestSuite.
+void TestSuite::Run() {
   if (!should_run_) return;
 
   internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  impl->set_current_test_case(this);
+  impl->set_current_test_suite(this);
 
   TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
 
+  // Call both legacy and the new API
+  repeater->OnTestSuiteStart(*this);
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   repeater->OnTestCaseStart(*this);
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
   impl->os_stack_trace_getter()->UponLeavingGTest();
   internal::HandleExceptionsInMethodIfSupported(
-      this, &TestCase::RunSetUpTestCase, "SetUpTestCase()");
+      this, &TestSuite::RunSetUpTestSuite, "SetUpTestSuite()");
 
-  const internal::TimeInMillis start = internal::GetTimeInMillis();
+  start_timestamp_ = internal::GetTimeInMillis();
+  internal::Timer timer;
   for (int i = 0; i < total_test_count(); i++) {
     GetMutableTestInfo(i)->Run();
+    if (GTEST_FLAG(fail_fast) && GetMutableTestInfo(i)->result()->Failed()) {
+      for (int j = i + 1; j < total_test_count(); j++) {
+        GetMutableTestInfo(j)->Skip();
+      }
+      break;
+    }
   }
-  elapsed_time_ = internal::GetTimeInMillis() - start;
+  elapsed_time_ = timer.Elapsed();
 
   impl->os_stack_trace_getter()->UponLeavingGTest();
   internal::HandleExceptionsInMethodIfSupported(
-      this, &TestCase::RunTearDownTestCase, "TearDownTestCase()");
+      this, &TestSuite::RunTearDownTestSuite, "TearDownTestSuite()");
+
+  // Call both legacy and the new API
+  repeater->OnTestSuiteEnd(*this);
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  repeater->OnTestCaseEnd(*this);
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  impl->set_current_test_suite(nullptr);
+}
+
+// Skips all tests under this TestSuite.
+void TestSuite::Skip() {
+  if (!should_run_) return;
+
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  impl->set_current_test_suite(this);
+
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+  // Call both legacy and the new API
+  repeater->OnTestSuiteStart(*this);
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  repeater->OnTestCaseStart(*this);
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  for (int i = 0; i < total_test_count(); i++) {
+    GetMutableTestInfo(i)->Skip();
+  }
 
+  // Call both legacy and the new API
+  repeater->OnTestSuiteEnd(*this);
+  // Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   repeater->OnTestCaseEnd(*this);
-  impl->set_current_test_case(NULL);
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  impl->set_current_test_suite(nullptr);
 }
 
-// Clears the results of all tests in this test case.
-void TestCase::ClearResult() {
+// Clears the results of all tests in this test suite.
+void TestSuite::ClearResult() {
   ad_hoc_test_result_.Clear();
   ForEach(test_info_list_, TestInfo::ClearTestResult);
 }
 
-// Shuffles the tests in this test case.
-void TestCase::ShuffleTests(internal::Random* random) {
+// Shuffles the tests in this test suite.
+void TestSuite::ShuffleTests(internal::Random* random) {
   Shuffle(random, &test_indices_);
 }
 
 // Restores the test order to before the first shuffle.
-void TestCase::UnshuffleTests() {
+void TestSuite::UnshuffleTests() {
   for (size_t i = 0; i < test_indices_.size(); i++) {
     test_indices_[i] = static_cast<int>(i);
   }
@@ -3974,9 +4597,9 @@ static std::string FormatTestCount(int test_count) {
   return FormatCountableNoun(test_count, "test", "tests");
 }
 
-// Formats the count of test cases.
-static std::string FormatTestCaseCount(int test_case_count) {
-  return FormatCountableNoun(test_case_count, "test case", "test cases");
+// Formats the count of test suites.
+static std::string FormatTestSuiteCount(int test_suite_count) {
+  return FormatCountableNoun(test_suite_count, "test suite", "test suites");
 }
 
 // Converts a TestPartResult::Type enum to human-friendly string
@@ -3985,6 +4608,8 @@ static std::string FormatTestCaseCount(int test_case_count) {
 // between the two when viewing the test result.
 static const char * TestPartResultTypeToString(TestPartResult::Type type) {
   switch (type) {
+    case TestPartResult::kSkip:
+      return "Skipped\n";
     case TestPartResult::kSuccess:
       return "Success";
 
@@ -4001,6 +4626,9 @@ static const char * TestPartResultTypeToString(TestPartResult::Type type) {
 }
 
 namespace internal {
+namespace {
+enum class GTestColor { kDefault, kRed, kGreen, kYellow };
+}  // namespace
 
 // Prints a TestPartResult to an std::string.
 static std::string PrintTestPartResultToString(
@@ -4032,48 +4660,79 @@ static void PrintTestPartResult(const TestPartResult& test_part_result) {
 }
 
 // class PrettyUnitTestResultPrinter
-
-enum GTestColor {
-  COLOR_DEFAULT,
-  COLOR_RED,
-  COLOR_GREEN,
-  COLOR_YELLOW
-};
-
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \
+    !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW
 
 // Returns the character attribute for the given color.
-WORD GetColorAttribute(GTestColor color) {
+static WORD GetColorAttribute(GTestColor color) {
   switch (color) {
-    case COLOR_RED:    return FOREGROUND_RED;
-    case COLOR_GREEN:  return FOREGROUND_GREEN;
-    case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
+    case GTestColor::kRed:
+      return FOREGROUND_RED;
+    case GTestColor::kGreen:
+      return FOREGROUND_GREEN;
+    case GTestColor::kYellow:
+      return FOREGROUND_RED | FOREGROUND_GREEN;
     default:           return 0;
   }
 }
 
-#else
+static int GetBitOffset(WORD color_mask) {
+  if (color_mask == 0) return 0;
 
-// Returns the ANSI color code for the given color.  COLOR_DEFAULT is
-// an invalid input.
-const char* GetAnsiColorCode(GTestColor color) {
-  switch (color) {
-    case COLOR_RED:     return "1";
-    case COLOR_GREEN:   return "2";
-    case COLOR_YELLOW:  return "3";
-    default:            return NULL;
-  };
+  int bitOffset = 0;
+  while ((color_mask & 1) == 0) {
+    color_mask >>= 1;
+    ++bitOffset;
+  }
+  return bitOffset;
 }
 
-#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+static WORD GetNewColor(GTestColor color, WORD old_color_attrs) {
+  // Let's reuse the BG
+  static const WORD background_mask = BACKGROUND_BLUE | BACKGROUND_GREEN |
+                                      BACKGROUND_RED | BACKGROUND_INTENSITY;
+  static const WORD foreground_mask = FOREGROUND_BLUE | FOREGROUND_GREEN |
+                                      FOREGROUND_RED | FOREGROUND_INTENSITY;
+  const WORD existing_bg = old_color_attrs & background_mask;
 
-// Returns true iff Google Test should use colors in the output.
-bool ShouldUseColor(bool stdout_is_tty) {
-  const char* const gtest_color = GTEST_FLAG(color).c_str();
+  WORD new_color =
+      GetColorAttribute(color) | existing_bg | FOREGROUND_INTENSITY;
+  static const int bg_bitOffset = GetBitOffset(background_mask);
+  static const int fg_bitOffset = GetBitOffset(foreground_mask);
 
-  if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
-#if GTEST_OS_WINDOWS
-    // On Windows the TERM variable is usually not set, but the
+  if (((new_color & background_mask) >> bg_bitOffset) ==
+      ((new_color & foreground_mask) >> fg_bitOffset)) {
+    new_color ^= FOREGROUND_INTENSITY;  // invert intensity
+  }
+  return new_color;
+}
+
+#else
+
+// Returns the ANSI color code for the given color. GTestColor::kDefault is
+// an invalid input.
+static const char* GetAnsiColorCode(GTestColor color) {
+  switch (color) {
+    case GTestColor::kRed:
+      return "1";
+    case GTestColor::kGreen:
+      return "2";
+    case GTestColor::kYellow:
+      return "3";
+    default:
+      return nullptr;
+  }
+}
+
+#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+
+// Returns true if and only if Google Test should use colors in the output.
+bool ShouldUseColor(bool stdout_is_tty) {
+  const char* const gtest_color = GTEST_FLAG(color).c_str();
+
+  if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+    // On Windows the TERM variable is usually not set, but the
     // console there does support colors.
     return stdout_is_tty;
 #else
@@ -4085,6 +4744,10 @@ bool ShouldUseColor(bool stdout_is_tty) {
         String::CStringEquals(term, "xterm-256color") ||
         String::CStringEquals(term, "screen") ||
         String::CStringEquals(term, "screen-256color") ||
+        String::CStringEquals(term, "tmux") ||
+        String::CStringEquals(term, "tmux-256color") ||
+        String::CStringEquals(term, "rxvt-unicode") ||
+        String::CStringEquals(term, "rxvt-unicode-256color") ||
         String::CStringEquals(term, "linux") ||
         String::CStringEquals(term, "cygwin");
     return stdout_is_tty && term_supports_color;
@@ -4104,18 +4767,20 @@ bool ShouldUseColor(bool stdout_is_tty) {
 // cannot simply emit special characters and have the terminal change colors.
 // This routine must actually emit the characters rather than return a string
 // that would be colored when printed, as can be done on Linux.
-void ColoredPrintf(GTestColor color, const char* fmt, ...) {
+
+GTEST_ATTRIBUTE_PRINTF_(2, 3)
+static void ColoredPrintf(GTestColor color, const char *fmt, ...) {
   va_list args;
   va_start(args, fmt);
 
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS || GTEST_OS_IOS
-  const bool use_color = false;
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_ZOS || GTEST_OS_IOS || \
+    GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT || defined(ESP_PLATFORM)
+  const bool use_color = AlwaysFalse();
 #else
   static const bool in_color_mode =
       ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
-  const bool use_color = in_color_mode && (color != COLOR_DEFAULT);
-#endif  // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS
-  // The '!= 0' comparison is necessary to satisfy MSVC 7.1.
+  const bool use_color = in_color_mode && (color != GTestColor::kDefault);
+#endif  // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_ZOS
 
   if (!use_color) {
     vprintf(fmt, args);
@@ -4123,20 +4788,22 @@ void ColoredPrintf(GTestColor color, const char* fmt, ...) {
     return;
   }
 
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \
+    !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW
   const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
 
   // Gets the current text color.
   CONSOLE_SCREEN_BUFFER_INFO buffer_info;
   GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
   const WORD old_color_attrs = buffer_info.wAttributes;
+  const WORD new_color = GetNewColor(color, old_color_attrs);
 
   // We need to flush the stream buffers into the console before each
   // SetConsoleTextAttribute call lest it affect the text that is already
   // printed but has not yet reached the console.
   fflush(stdout);
-  SetConsoleTextAttribute(stdout_handle,
-                          GetColorAttribute(color) | FOREGROUND_INTENSITY);
+  SetConsoleTextAttribute(stdout_handle, new_color);
+
   vprintf(fmt, args);
 
   fflush(stdout);
@@ -4150,23 +4817,22 @@ void ColoredPrintf(GTestColor color, const char* fmt, ...) {
   va_end(args);
 }
 
-// Text printed in Google Test's text output and --gunit_list_tests
+// Text printed in Google Test's text output and --gtest_list_tests
 // output to label the type parameter and value parameter for a test.
 static const char kTypeParamLabel[] = "TypeParam";
 static const char kValueParamLabel[] = "GetParam()";
 
-void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
+static void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
   const char* const type_param = test_info.type_param();
   const char* const value_param = test_info.value_param();
 
-  if (type_param != NULL || value_param != NULL) {
+  if (type_param != nullptr || value_param != nullptr) {
     printf(", where ");
-    if (type_param != NULL) {
+    if (type_param != nullptr) {
       printf("%s = %s", kTypeParamLabel, type_param);
-      if (value_param != NULL)
-        printf(" and ");
+      if (value_param != nullptr) printf(" and ");
     }
-    if (value_param != NULL) {
+    if (value_param != nullptr) {
       printf("%s = %s", kValueParamLabel, value_param);
     }
   }
@@ -4178,27 +4844,40 @@ void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
 class PrettyUnitTestResultPrinter : public TestEventListener {
  public:
   PrettyUnitTestResultPrinter() {}
-  static void PrintTestName(const char * test_case, const char * test) {
-    printf("%s.%s", test_case, test);
+  static void PrintTestName(const char* test_suite, const char* test) {
+    printf("%s.%s", test_suite, test);
   }
 
   // The following methods override what's in the TestEventListener class.
-  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
-  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
-  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestCaseStart(const TestCase& test_case);
-  virtual void OnTestStart(const TestInfo& test_info);
-  virtual void OnTestPartResult(const TestPartResult& result);
-  virtual void OnTestEnd(const TestInfo& test_info);
-  virtual void OnTestCaseEnd(const TestCase& test_case);
-  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
-  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
-  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
+  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationStart(const UnitTest& unit_test, int iteration) override;
+  void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
+  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseStart(const TestCase& test_case) override;
+#else
+  void OnTestSuiteStart(const TestSuite& test_suite) override;
+#endif  // OnTestCaseStart
+
+  void OnTestStart(const TestInfo& test_info) override;
+
+  void OnTestPartResult(const TestPartResult& result) override;
+  void OnTestEnd(const TestInfo& test_info) override;
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseEnd(const TestCase& test_case) override;
+#else
+  void OnTestSuiteEnd(const TestSuite& test_suite) override;
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
+  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
 
  private:
   static void PrintFailedTests(const UnitTest& unit_test);
+  static void PrintFailedTestSuites(const UnitTest& unit_test);
+  static void PrintSkippedTests(const UnitTest& unit_test);
 };
 
   // Fired before each iteration of tests starts.
@@ -4212,54 +4891,69 @@ void PrettyUnitTestResultPrinter::OnTestIterationStart(
   // Prints the filter if it's not *.  This reminds the user that some
   // tests may be skipped.
   if (!String::CStringEquals(filter, kUniversalFilter)) {
-    ColoredPrintf(COLOR_YELLOW,
-                  "Note: %s filter = %s\n", GTEST_NAME_, filter);
+    ColoredPrintf(GTestColor::kYellow, "Note: %s filter = %s\n", GTEST_NAME_,
+                  filter);
   }
 
   if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
-    const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
-    ColoredPrintf(COLOR_YELLOW,
-                  "Note: This is test shard %d of %s.\n",
+    const int32_t shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
+    ColoredPrintf(GTestColor::kYellow, "Note: This is test shard %d of %s.\n",
                   static_cast<int>(shard_index) + 1,
                   internal::posix::GetEnv(kTestTotalShards));
   }
 
   if (GTEST_FLAG(shuffle)) {
-    ColoredPrintf(COLOR_YELLOW,
+    ColoredPrintf(GTestColor::kYellow,
                   "Note: Randomizing tests' orders with a seed of %d .\n",
                   unit_test.random_seed());
   }
 
-  ColoredPrintf(COLOR_GREEN,  "[==========] ");
+  ColoredPrintf(GTestColor::kGreen, "[==========] ");
   printf("Running %s from %s.\n",
          FormatTestCount(unit_test.test_to_run_count()).c_str(),
-         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
+         FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
   fflush(stdout);
 }
 
 void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
     const UnitTest& /*unit_test*/) {
-  ColoredPrintf(COLOR_GREEN,  "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("Global test environment set-up.\n");
   fflush(stdout);
 }
 
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
   const std::string counts =
       FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("%s from %s", counts.c_str(), test_case.name());
-  if (test_case.type_param() == NULL) {
+  if (test_case.type_param() == nullptr) {
     printf("\n");
   } else {
     printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
   }
   fflush(stdout);
 }
+#else
+void PrettyUnitTestResultPrinter::OnTestSuiteStart(
+    const TestSuite& test_suite) {
+  const std::string counts =
+      FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
+  printf("%s from %s", counts.c_str(), test_suite.name());
+  if (test_suite.type_param() == nullptr) {
+    printf("\n");
+  } else {
+    printf(", where %s = %s\n", kTypeParamLabel, test_suite.type_param());
+  }
+  fflush(stdout);
+}
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
-  ColoredPrintf(COLOR_GREEN,  "[ RUN      ] ");
-  PrintTestName(test_info.test_case_name(), test_info.name());
+  ColoredPrintf(GTestColor::kGreen, "[ RUN      ] ");
+  PrintTestName(test_info.test_suite_name(), test_info.name());
   printf("\n");
   fflush(stdout);
 }
@@ -4267,22 +4961,27 @@ void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
 // Called after an assertion failure.
 void PrettyUnitTestResultPrinter::OnTestPartResult(
     const TestPartResult& result) {
-  // If the test part succeeded, we don't need to do anything.
-  if (result.type() == TestPartResult::kSuccess)
-    return;
-
-  // Print failure message from the assertion (e.g. expected this and got that).
-  PrintTestPartResult(result);
-  fflush(stdout);
+  switch (result.type()) {
+    // If the test part succeeded, we don't need to do anything.
+    case TestPartResult::kSuccess:
+      return;
+    default:
+      // Print failure message from the assertion
+      // (e.g. expected this and got that).
+      PrintTestPartResult(result);
+      fflush(stdout);
+  }
 }
 
 void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
   if (test_info.result()->Passed()) {
-    ColoredPrintf(COLOR_GREEN, "[       OK ] ");
+    ColoredPrintf(GTestColor::kGreen, "[       OK ] ");
+  } else if (test_info.result()->Skipped()) {
+    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
   } else {
-    ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
+    ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
   }
-  PrintTestName(test_info.test_case_name(), test_info.name());
+  PrintTestName(test_info.test_suite_name(), test_info.name());
   if (test_info.result()->Failed())
     PrintFullTestCommentIfPresent(test_info);
 
@@ -4295,21 +4994,33 @@ void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
   fflush(stdout);
 }
 
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
   if (!GTEST_FLAG(print_time)) return;
 
   const std::string counts =
       FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
-  printf("%s from %s (%s ms total)\n\n",
-         counts.c_str(), test_case.name(),
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
+  printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_case.name(),
          internal::StreamableToString(test_case.elapsed_time()).c_str());
   fflush(stdout);
 }
+#else
+void PrettyUnitTestResultPrinter::OnTestSuiteEnd(const TestSuite& test_suite) {
+  if (!GTEST_FLAG(print_time)) return;
+
+  const std::string counts =
+      FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
+  printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_suite.name(),
+         internal::StreamableToString(test_suite.elapsed_time()).c_str());
+  fflush(stdout);
+}
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
     const UnitTest& /*unit_test*/) {
-  ColoredPrintf(COLOR_GREEN,  "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("Global test environment tear-down\n");
   fflush(stdout);
 }
@@ -4317,23 +5028,70 @@ void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
 // Internal helper for printing the list of failed tests.
 void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
   const int failed_test_count = unit_test.failed_test_count();
-  if (failed_test_count == 0) {
+  ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
+  printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
+
+  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
+    if (!test_suite.should_run() || (test_suite.failed_test_count() == 0)) {
+      continue;
+    }
+    for (int j = 0; j < test_suite.total_test_count(); ++j) {
+      const TestInfo& test_info = *test_suite.GetTestInfo(j);
+      if (!test_info.should_run() || !test_info.result()->Failed()) {
+        continue;
+      }
+      ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
+      printf("%s.%s", test_suite.name(), test_info.name());
+      PrintFullTestCommentIfPresent(test_info);
+      printf("\n");
+    }
+  }
+  printf("\n%2d FAILED %s\n", failed_test_count,
+         failed_test_count == 1 ? "TEST" : "TESTS");
+}
+
+// Internal helper for printing the list of test suite failures not covered by
+// PrintFailedTests.
+void PrettyUnitTestResultPrinter::PrintFailedTestSuites(
+    const UnitTest& unit_test) {
+  int suite_failure_count = 0;
+  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
+    if (!test_suite.should_run()) {
+      continue;
+    }
+    if (test_suite.ad_hoc_test_result().Failed()) {
+      ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
+      printf("%s: SetUpTestSuite or TearDownTestSuite\n", test_suite.name());
+      ++suite_failure_count;
+    }
+  }
+  if (suite_failure_count > 0) {
+    printf("\n%2d FAILED TEST %s\n", suite_failure_count,
+           suite_failure_count == 1 ? "SUITE" : "SUITES");
+  }
+}
+
+// Internal helper for printing the list of skipped tests.
+void PrettyUnitTestResultPrinter::PrintSkippedTests(const UnitTest& unit_test) {
+  const int skipped_test_count = unit_test.skipped_test_count();
+  if (skipped_test_count == 0) {
     return;
   }
 
-  for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
-    const TestCase& test_case = *unit_test.GetTestCase(i);
-    if (!test_case.should_run() || (test_case.failed_test_count() == 0)) {
+  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
+    if (!test_suite.should_run() || (test_suite.skipped_test_count() == 0)) {
       continue;
     }
-    for (int j = 0; j < test_case.total_test_count(); ++j) {
-      const TestInfo& test_info = *test_case.GetTestInfo(j);
-      if (!test_info.should_run() || test_info.result()->Passed()) {
+    for (int j = 0; j < test_suite.total_test_count(); ++j) {
+      const TestInfo& test_info = *test_suite.GetTestInfo(j);
+      if (!test_info.should_run() || !test_info.result()->Skipped()) {
         continue;
       }
-      ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
-      printf("%s.%s", test_case.name(), test_info.name());
-      PrintFullTestCommentIfPresent(test_info);
+      ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
+      printf("%s.%s", test_suite.name(), test_info.name());
       printf("\n");
     }
   }
@@ -4341,37 +5099,37 @@ void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
 
 void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                      int /*iteration*/) {
-  ColoredPrintf(COLOR_GREEN,  "[==========] ");
+  ColoredPrintf(GTestColor::kGreen, "[==========] ");
   printf("%s from %s ran.",
          FormatTestCount(unit_test.test_to_run_count()).c_str(),
-         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
+         FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
   if (GTEST_FLAG(print_time)) {
     printf(" (%s ms total)",
            internal::StreamableToString(unit_test.elapsed_time()).c_str());
   }
   printf("\n");
-  ColoredPrintf(COLOR_GREEN,  "[  PASSED  ] ");
+  ColoredPrintf(GTestColor::kGreen, "[  PASSED  ] ");
   printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
 
-  int num_failures = unit_test.failed_test_count();
+  const int skipped_test_count = unit_test.skipped_test_count();
+  if (skipped_test_count > 0) {
+    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
+    printf("%s, listed below:\n", FormatTestCount(skipped_test_count).c_str());
+    PrintSkippedTests(unit_test);
+  }
+
   if (!unit_test.Passed()) {
-    const int failed_test_count = unit_test.failed_test_count();
-    ColoredPrintf(COLOR_RED,  "[  FAILED  ] ");
-    printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
     PrintFailedTests(unit_test);
-    printf("\n%2d FAILED %s\n", num_failures,
-                        num_failures == 1 ? "TEST" : "TESTS");
+    PrintFailedTestSuites(unit_test);
   }
 
   int num_disabled = unit_test.reportable_disabled_test_count();
   if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
-    if (!num_failures) {
+    if (unit_test.Passed()) {
       printf("\n");  // Add a spacer if no FAILURE banner is displayed.
     }
-    ColoredPrintf(COLOR_YELLOW,
-                  "  YOU HAVE %d DISABLED %s\n\n",
-                  num_disabled,
-                  num_disabled == 1 ? "TEST" : "TESTS");
+    ColoredPrintf(GTestColor::kYellow, "  YOU HAVE %d DISABLED %s\n\n",
+                  num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
   }
   // Ensure that Google Test output is printed before, e.g., heapchecker output.
   fflush(stdout);
@@ -4379,13 +5137,117 @@ void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
 
 // End PrettyUnitTestResultPrinter
 
+// This class implements the TestEventListener interface.
+//
+// Class BriefUnitTestResultPrinter is copyable.
+class BriefUnitTestResultPrinter : public TestEventListener {
+ public:
+  BriefUnitTestResultPrinter() {}
+  static void PrintTestName(const char* test_suite, const char* test) {
+    printf("%s.%s", test_suite, test);
+  }
+
+  // The following methods override what's in the TestEventListener class.
+  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationStart(const UnitTest& /*unit_test*/,
+                            int /*iteration*/) override {}
+  void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseStart(const TestCase& /*test_case*/) override {}
+#else
+  void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {}
+#endif  // OnTestCaseStart
+
+  void OnTestStart(const TestInfo& /*test_info*/) override {}
+
+  void OnTestPartResult(const TestPartResult& result) override;
+  void OnTestEnd(const TestInfo& test_info) override;
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseEnd(const TestCase& /*test_case*/) override {}
+#else
+  void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {}
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
+};
+
+// Called after an assertion failure.
+void BriefUnitTestResultPrinter::OnTestPartResult(
+    const TestPartResult& result) {
+  switch (result.type()) {
+    // If the test part succeeded, we don't need to do anything.
+    case TestPartResult::kSuccess:
+      return;
+    default:
+      // Print failure message from the assertion
+      // (e.g. expected this and got that).
+      PrintTestPartResult(result);
+      fflush(stdout);
+  }
+}
+
+void BriefUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
+  if (test_info.result()->Failed()) {
+    ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
+    PrintTestName(test_info.test_suite_name(), test_info.name());
+    PrintFullTestCommentIfPresent(test_info);
+
+    if (GTEST_FLAG(print_time)) {
+      printf(" (%s ms)\n",
+             internal::StreamableToString(test_info.result()->elapsed_time())
+                 .c_str());
+    } else {
+      printf("\n");
+    }
+    fflush(stdout);
+  }
+}
+
+void BriefUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+                                                    int /*iteration*/) {
+  ColoredPrintf(GTestColor::kGreen, "[==========] ");
+  printf("%s from %s ran.",
+         FormatTestCount(unit_test.test_to_run_count()).c_str(),
+         FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
+  if (GTEST_FLAG(print_time)) {
+    printf(" (%s ms total)",
+           internal::StreamableToString(unit_test.elapsed_time()).c_str());
+  }
+  printf("\n");
+  ColoredPrintf(GTestColor::kGreen, "[  PASSED  ] ");
+  printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
+
+  const int skipped_test_count = unit_test.skipped_test_count();
+  if (skipped_test_count > 0) {
+    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
+    printf("%s.\n", FormatTestCount(skipped_test_count).c_str());
+  }
+
+  int num_disabled = unit_test.reportable_disabled_test_count();
+  if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
+    if (unit_test.Passed()) {
+      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
+    }
+    ColoredPrintf(GTestColor::kYellow, "  YOU HAVE %d DISABLED %s\n\n",
+                  num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
+  }
+  // Ensure that Google Test output is printed before, e.g., heapchecker output.
+  fflush(stdout);
+}
+
+// End BriefUnitTestResultPrinter
+
 // class TestEventRepeater
 //
 // This class forwards events to other event listeners.
 class TestEventRepeater : public TestEventListener {
  public:
   TestEventRepeater() : forwarding_enabled_(true) {}
-  virtual ~TestEventRepeater();
+  ~TestEventRepeater() override;
   void Append(TestEventListener *listener);
   TestEventListener* Release(TestEventListener* listener);
 
@@ -4394,19 +5256,27 @@ class TestEventRepeater : public TestEventListener {
   bool forwarding_enabled() const { return forwarding_enabled_; }
   void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }
 
-  virtual void OnTestProgramStart(const UnitTest& unit_test);
-  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
-  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
-  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test);
-  virtual void OnTestCaseStart(const TestCase& test_case);
-  virtual void OnTestStart(const TestInfo& test_info);
-  virtual void OnTestPartResult(const TestPartResult& result);
-  virtual void OnTestEnd(const TestInfo& test_info);
-  virtual void OnTestCaseEnd(const TestCase& test_case);
-  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
-  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test);
-  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
-  virtual void OnTestProgramEnd(const UnitTest& unit_test);
+  void OnTestProgramStart(const UnitTest& unit_test) override;
+  void OnTestIterationStart(const UnitTest& unit_test, int iteration) override;
+  void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
+  void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) override;
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseStart(const TestSuite& parameter) override;
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestSuiteStart(const TestSuite& parameter) override;
+  void OnTestStart(const TestInfo& test_info) override;
+  void OnTestPartResult(const TestPartResult& result) override;
+  void OnTestEnd(const TestInfo& test_info) override;
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseEnd(const TestCase& parameter) override;
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestSuiteEnd(const TestSuite& parameter) override;
+  void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
+  void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) override;
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+  void OnTestProgramEnd(const UnitTest& unit_test) override;
 
  private:
   // Controls whether events will be forwarded to listeners_. Set to false
@@ -4426,16 +5296,15 @@ void TestEventRepeater::Append(TestEventListener *listener) {
   listeners_.push_back(listener);
 }
 
-// TODO(vladl@google.com): Factor the search functionality into Vector::Find.
 TestEventListener* TestEventRepeater::Release(TestEventListener *listener) {
   for (size_t i = 0; i < listeners_.size(); ++i) {
     if (listeners_[i] == listener) {
-      listeners_.erase(listeners_.begin() + i);
+      listeners_.erase(listeners_.begin() + static_cast<int>(i));
       return listener;
     }
   }
 
-  return NULL;
+  return nullptr;
 }
 
 // Since most methods are very similar, use macros to reduce boilerplate.
@@ -4450,25 +5319,33 @@ void TestEventRepeater::Name(const Type& parameter) { \
 }
 // This defines a member that forwards the call to all listeners in reverse
 // order.
-#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \
-void TestEventRepeater::Name(const Type& parameter) { \
-  if (forwarding_enabled_) { \
-    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { \
-      listeners_[i]->Name(parameter); \
-    } \
-  } \
-}
+#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type)      \
+  void TestEventRepeater::Name(const Type& parameter) { \
+    if (forwarding_enabled_) {                          \
+      for (size_t i = listeners_.size(); i != 0; i--) { \
+        listeners_[i - 1]->Name(parameter);             \
+      }                                                 \
+    }                                                   \
+  }
 
 GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
 GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
-GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase)
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+GTEST_REPEATER_METHOD_(OnTestCaseStart, TestSuite)
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+GTEST_REPEATER_METHOD_(OnTestSuiteStart, TestSuite)
 GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
 GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
 GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
 GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
 GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest)
 GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo)
-GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase)
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestSuite)
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+GTEST_REVERSE_REPEATER_METHOD_(OnTestSuiteEnd, TestSuite)
 GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)
 
 #undef GTEST_REPEATER_METHOD_
@@ -4486,8 +5363,8 @@ void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
 void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
                                            int iteration) {
   if (forwarding_enabled_) {
-    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) {
-      listeners_[i]->OnTestIterationEnd(unit_test, iteration);
+    for (size_t i = listeners_.size(); i > 0; i--) {
+      listeners_[i - 1]->OnTestIterationEnd(unit_test, iteration);
     }
   }
 }
@@ -4499,7 +5376,12 @@ class XmlUnitTestResultPrinter : public EmptyTestEventListener {
  public:
   explicit XmlUnitTestResultPrinter(const char* output_file);
 
-  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+  void ListTestsMatchingFilter(const std::vector<TestSuite*>& test_suites);
+
+  // Prints an XML summary of all unit tests.
+  static void PrintXmlTestsList(std::ostream* stream,
+                                const std::vector<TestSuite*>& test_suites);
 
  private:
   // Is c a whitespace character that is normalized to a space character
@@ -4542,14 +5424,24 @@ class XmlUnitTestResultPrinter : public EmptyTestEventListener {
   // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
   static void OutputXmlCDataSection(::std::ostream* stream, const char* data);
 
+  // Streams a test suite XML stanza containing the given test result.
+  //
+  // Requires: result.Failed()
+  static void OutputXmlTestSuiteForTestResult(::std::ostream* stream,
+                                              const TestResult& result);
+
+  // Streams an XML representation of a TestResult object.
+  static void OutputXmlTestResult(::std::ostream* stream,
+                                  const TestResult& result);
+
   // Streams an XML representation of a TestInfo object.
   static void OutputXmlTestInfo(::std::ostream* stream,
-                                const char* test_case_name,
+                                const char* test_suite_name,
                                 const TestInfo& test_info);
 
-  // Prints an XML representation of a TestCase object
-  static void PrintXmlTestCase(::std::ostream* stream,
-                               const TestCase& test_case);
+  // Prints an XML representation of a TestSuite object
+  static void PrintXmlTestSuite(::std::ostream* stream,
+                                const TestSuite& test_suite);
 
   // Prints an XML summary of unit_test to output stream out.
   static void PrintXmlUnitTest(::std::ostream* stream,
@@ -4561,6 +5453,11 @@ class XmlUnitTestResultPrinter : public EmptyTestEventListener {
   // to delimit this attribute from prior attributes.
   static std::string TestPropertiesAsXmlAttributes(const TestResult& result);
 
+  // Streams an XML representation of the test properties of a TestResult
+  // object.
+  static void OutputXmlTestProperties(std::ostream* stream,
+                                      const TestResult& result);
+
   // The output file.
   const std::string output_file_;
 
@@ -4570,46 +5467,30 @@ class XmlUnitTestResultPrinter : public EmptyTestEventListener {
 // Creates a new XmlUnitTestResultPrinter.
 XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
     : output_file_(output_file) {
-  if (output_file_.c_str() == NULL || output_file_.empty()) {
-    fprintf(stderr, "XML output file may not be null\n");
-    fflush(stderr);
-    exit(EXIT_FAILURE);
+  if (output_file_.empty()) {
+    GTEST_LOG_(FATAL) << "XML output file may not be null";
   }
 }
 
 // Called after the unit test ends.
 void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                   int /*iteration*/) {
-  FILE* xmlout = NULL;
-  FilePath output_file(output_file_);
-  FilePath output_dir(output_file.RemoveFileName());
-
-  if (output_dir.CreateDirectoriesRecursively()) {
-    xmlout = posix::FOpen(output_file_.c_str(), "w");
-  }
-  if (xmlout == NULL) {
-    // TODO(wan): report the reason of the failure.
-    //
-    // We don't do it for now as:
-    //
-    //   1. There is no urgent need for it.
-    //   2. It's a bit involved to make the errno variable thread-safe on
-    //      all three operating systems (Linux, Windows, and Mac OS).
-    //   3. To interpret the meaning of errno in a thread-safe way,
-    //      we need the strerror_r() function, which is not available on
-    //      Windows.
-    fprintf(stderr,
-            "Unable to open file \"%s\"\n",
-            output_file_.c_str());
-    fflush(stderr);
-    exit(EXIT_FAILURE);
-  }
+  FILE* xmlout = OpenFileForWriting(output_file_);
   std::stringstream stream;
   PrintXmlUnitTest(&stream, unit_test);
   fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
   fclose(xmlout);
 }
 
+void XmlUnitTestResultPrinter::ListTestsMatchingFilter(
+    const std::vector<TestSuite*>& test_suites) {
+  FILE* xmlout = OpenFileForWriting(output_file_);
+  std::stringstream stream;
+  PrintXmlTestsList(&stream, test_suites);
+  fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
+  fclose(xmlout);
+}
+
 // Returns an XML-escaped copy of the input string str.  If is_attribute
 // is true, the text is meant to appear as an attribute value, and
 // normalizable whitespace is preserved by replacing it with character
@@ -4620,8 +5501,6 @@ void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
 // module will consist of ordinary English text.
 // If this module is ever modified to produce version 1.1 XML output,
 // most invalid characters can be retained using character references.
-// TODO(wan): It might be nice to have a minimally invasive, human-readable
-// escaping scheme for invalid characters, rather than dropping them.
 std::string XmlUnitTestResultPrinter::EscapeXml(
     const std::string& str, bool is_attribute) {
   Message m;
@@ -4681,11 +5560,12 @@ std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
 
 // The following routines generate an XML representation of a UnitTest
 // object.
+// GOOGLETEST_CM0009 DO NOT DELETE
 //
 // This is how Google Test concepts map to the DTD:
 //
 // <testsuites name="AllTests">        <-- corresponds to a UnitTest object
-//   <testsuite name="testcase-name">  <-- corresponds to a TestCase object
+//   <testsuite name="testcase-name">  <-- corresponds to a TestSuite object
 //     <testcase name="test-name">     <-- corresponds to a TestInfo object
 //       <failure message="...">...</failure>
 //       <failure message="...">...</failure>
@@ -4698,34 +5578,43 @@ std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
 // Formats the given time in milliseconds as seconds.
 std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
   ::std::stringstream ss;
-  ss << ms/1000.0;
+  ss << (static_cast<double>(ms) * 1e-3);
   return ss.str();
 }
 
-// Converts the given epoch time in milliseconds to a date string in the ISO
-// 8601 format, without the timezone information.
-std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) {
-  // Using non-reentrant version as localtime_r is not portable.
-  time_t seconds = static_cast<time_t>(ms / 1000);
-#ifdef _MSC_VER
-# pragma warning(push)          // Saves the current warning state.
-# pragma warning(disable:4996)  // Temporarily disables warning 4996
-                                // (function or variable may be unsafe).
-  const struct tm* const time_struct = localtime(&seconds);  // NOLINT
-# pragma warning(pop)           // Restores the warning state again.
+static bool PortableLocaltime(time_t seconds, struct tm* out) {
+#if defined(_MSC_VER)
+  return localtime_s(out, &seconds) == 0;
+#elif defined(__MINGW32__) || defined(__MINGW64__)
+  // MINGW <time.h> provides neither localtime_r nor localtime_s, but uses
+  // Windows' localtime(), which has a thread-local tm buffer.
+  struct tm* tm_ptr = localtime(&seconds);  // NOLINT
+  if (tm_ptr == nullptr) return false;
+  *out = *tm_ptr;
+  return true;
+#elif defined(__STDC_LIB_EXT1__)
+  // Uses localtime_s when available as localtime_r is only available from
+  // C23 standard.
+  return localtime_s(&seconds, out) != nullptr;
 #else
-  const struct tm* const time_struct = localtime(&seconds);  // NOLINT
+  return localtime_r(&seconds, out) != nullptr;
 #endif
-  if (time_struct == NULL)
-    return "";  // Invalid ms value
+}
 
-  // YYYY-MM-DDThh:mm:ss
-  return StreamableToString(time_struct->tm_year + 1900) + "-" +
-      String::FormatIntWidth2(time_struct->tm_mon + 1) + "-" +
-      String::FormatIntWidth2(time_struct->tm_mday) + "T" +
-      String::FormatIntWidth2(time_struct->tm_hour) + ":" +
-      String::FormatIntWidth2(time_struct->tm_min) + ":" +
-      String::FormatIntWidth2(time_struct->tm_sec);
+// Converts the given epoch time in milliseconds to a date string in the ISO
+// 8601 format, without the timezone information.
+std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) {
+  struct tm time_struct;
+  if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
+    return "";
+  // YYYY-MM-DDThh:mm:ss.sss
+  return StreamableToString(time_struct.tm_year + 1900) + "-" +
+      String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
+      String::FormatIntWidth2(time_struct.tm_mday) + "T" +
+      String::FormatIntWidth2(time_struct.tm_hour) + ":" +
+      String::FormatIntWidth2(time_struct.tm_min) + ":" +
+      String::FormatIntWidth2(time_struct.tm_sec) + "." +
+      String::FormatIntWidthN(static_cast<int>(ms % 1000), 3);
 }
 
 // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
@@ -4735,7 +5624,7 @@ void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
   *stream << "<![CDATA[";
   for (;;) {
     const char* const next_segment = strstr(segment, "]]>");
-    if (next_segment != NULL) {
+    if (next_segment != nullptr) {
       stream->write(
           segment, static_cast<std::streamsize>(next_segment - segment));
       *stream << "]]>]]&gt;<![CDATA[";
@@ -4754,7 +5643,7 @@ void XmlUnitTestResultPrinter::OutputXmlAttribute(
     const std::string& name,
     const std::string& value) {
   const std::vector<std::string>& allowed_names =
-      GetReservedAttributesForElement(element_name);
+      GetReservedOutputAttributesForElement(element_name);
 
   GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
                    allowed_names.end())
@@ -4764,79 +5653,166 @@ void XmlUnitTestResultPrinter::OutputXmlAttribute(
   *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\"";
 }
 
+// Streams a test suite XML stanza containing the given test result.
+void XmlUnitTestResultPrinter::OutputXmlTestSuiteForTestResult(
+    ::std::ostream* stream, const TestResult& result) {
+  // Output the boilerplate for a minimal test suite with one test.
+  *stream << "  <testsuite";
+  OutputXmlAttribute(stream, "testsuite", "name", "NonTestSuiteFailure");
+  OutputXmlAttribute(stream, "testsuite", "tests", "1");
+  OutputXmlAttribute(stream, "testsuite", "failures", "1");
+  OutputXmlAttribute(stream, "testsuite", "disabled", "0");
+  OutputXmlAttribute(stream, "testsuite", "skipped", "0");
+  OutputXmlAttribute(stream, "testsuite", "errors", "0");
+  OutputXmlAttribute(stream, "testsuite", "time",
+                     FormatTimeInMillisAsSeconds(result.elapsed_time()));
+  OutputXmlAttribute(
+      stream, "testsuite", "timestamp",
+      FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
+  *stream << ">";
+
+  // Output the boilerplate for a minimal test case with a single test.
+  *stream << "    <testcase";
+  OutputXmlAttribute(stream, "testcase", "name", "");
+  OutputXmlAttribute(stream, "testcase", "status", "run");
+  OutputXmlAttribute(stream, "testcase", "result", "completed");
+  OutputXmlAttribute(stream, "testcase", "classname", "");
+  OutputXmlAttribute(stream, "testcase", "time",
+                     FormatTimeInMillisAsSeconds(result.elapsed_time()));
+  OutputXmlAttribute(
+      stream, "testcase", "timestamp",
+      FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
+
+  // Output the actual test result.
+  OutputXmlTestResult(stream, result);
+
+  // Complete the test suite.
+  *stream << "  </testsuite>\n";
+}
+
 // Prints an XML representation of a TestInfo object.
-// TODO(wan): There is also value in printing properties with the plain printer.
 void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
-                                                 const char* test_case_name,
+                                                 const char* test_suite_name,
                                                  const TestInfo& test_info) {
   const TestResult& result = *test_info.result();
-  const std::string kTestcase = "testcase";
+  const std::string kTestsuite = "testcase";
+
+  if (test_info.is_in_another_shard()) {
+    return;
+  }
 
   *stream << "    <testcase";
-  OutputXmlAttribute(stream, kTestcase, "name", test_info.name());
+  OutputXmlAttribute(stream, kTestsuite, "name", test_info.name());
 
-  if (test_info.value_param() != NULL) {
-    OutputXmlAttribute(stream, kTestcase, "value_param",
+  if (test_info.value_param() != nullptr) {
+    OutputXmlAttribute(stream, kTestsuite, "value_param",
                        test_info.value_param());
   }
-  if (test_info.type_param() != NULL) {
-    OutputXmlAttribute(stream, kTestcase, "type_param", test_info.type_param());
+  if (test_info.type_param() != nullptr) {
+    OutputXmlAttribute(stream, kTestsuite, "type_param",
+                       test_info.type_param());
+  }
+  if (GTEST_FLAG(list_tests)) {
+    OutputXmlAttribute(stream, kTestsuite, "file", test_info.file());
+    OutputXmlAttribute(stream, kTestsuite, "line",
+                       StreamableToString(test_info.line()));
+    *stream << " />\n";
+    return;
   }
 
-  OutputXmlAttribute(stream, kTestcase, "status",
+  OutputXmlAttribute(stream, kTestsuite, "status",
                      test_info.should_run() ? "run" : "notrun");
-  OutputXmlAttribute(stream, kTestcase, "time",
+  OutputXmlAttribute(stream, kTestsuite, "result",
+                     test_info.should_run()
+                         ? (result.Skipped() ? "skipped" : "completed")
+                         : "suppressed");
+  OutputXmlAttribute(stream, kTestsuite, "time",
                      FormatTimeInMillisAsSeconds(result.elapsed_time()));
-  OutputXmlAttribute(stream, kTestcase, "classname", test_case_name);
-  *stream << TestPropertiesAsXmlAttributes(result);
+  OutputXmlAttribute(
+      stream, kTestsuite, "timestamp",
+      FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
+  OutputXmlAttribute(stream, kTestsuite, "classname", test_suite_name);
+
+  OutputXmlTestResult(stream, result);
+}
 
+void XmlUnitTestResultPrinter::OutputXmlTestResult(::std::ostream* stream,
+                                                   const TestResult& result) {
   int failures = 0;
+  int skips = 0;
   for (int i = 0; i < result.total_part_count(); ++i) {
     const TestPartResult& part = result.GetTestPartResult(i);
     if (part.failed()) {
-      if (++failures == 1) {
+      if (++failures == 1 && skips == 0) {
         *stream << ">\n";
       }
-      const string location = internal::FormatCompilerIndependentFileLocation(
-          part.file_name(), part.line_number());
-      const string summary = location + "\n" + part.summary();
+      const std::string location =
+          internal::FormatCompilerIndependentFileLocation(part.file_name(),
+                                                          part.line_number());
+      const std::string summary = location + "\n" + part.summary();
       *stream << "      <failure message=\""
-              << EscapeXmlAttribute(summary.c_str())
+              << EscapeXmlAttribute(summary)
               << "\" type=\"\">";
-      const string detail = location + "\n" + part.message();
+      const std::string detail = location + "\n" + part.message();
       OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
       *stream << "</failure>\n";
+    } else if (part.skipped()) {
+      if (++skips == 1 && failures == 0) {
+        *stream << ">\n";
+      }
+      const std::string location =
+          internal::FormatCompilerIndependentFileLocation(part.file_name(),
+                                                          part.line_number());
+      const std::string summary = location + "\n" + part.summary();
+      *stream << "      <skipped message=\""
+              << EscapeXmlAttribute(summary.c_str()) << "\">";
+      const std::string detail = location + "\n" + part.message();
+      OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
+      *stream << "</skipped>\n";
     }
   }
 
-  if (failures == 0)
+  if (failures == 0 && skips == 0 && result.test_property_count() == 0) {
     *stream << " />\n";
-  else
+  } else {
+    if (failures == 0 && skips == 0) {
+      *stream << ">\n";
+    }
+    OutputXmlTestProperties(stream, result);
     *stream << "    </testcase>\n";
+  }
 }
 
-// Prints an XML representation of a TestCase object
-void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream,
-                                                const TestCase& test_case) {
+// Prints an XML representation of a TestSuite object
+void XmlUnitTestResultPrinter::PrintXmlTestSuite(std::ostream* stream,
+                                                 const TestSuite& test_suite) {
   const std::string kTestsuite = "testsuite";
   *stream << "  <" << kTestsuite;
-  OutputXmlAttribute(stream, kTestsuite, "name", test_case.name());
+  OutputXmlAttribute(stream, kTestsuite, "name", test_suite.name());
   OutputXmlAttribute(stream, kTestsuite, "tests",
-                     StreamableToString(test_case.reportable_test_count()));
-  OutputXmlAttribute(stream, kTestsuite, "failures",
-                     StreamableToString(test_case.failed_test_count()));
-  OutputXmlAttribute(
-      stream, kTestsuite, "disabled",
-      StreamableToString(test_case.reportable_disabled_test_count()));
-  OutputXmlAttribute(stream, kTestsuite, "errors", "0");
-  OutputXmlAttribute(stream, kTestsuite, "time",
-                     FormatTimeInMillisAsSeconds(test_case.elapsed_time()));
-  *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result())
-          << ">\n";
-
-  for (int i = 0; i < test_case.total_test_count(); ++i) {
-    if (test_case.GetTestInfo(i)->is_reportable())
-      OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i));
+                     StreamableToString(test_suite.reportable_test_count()));
+  if (!GTEST_FLAG(list_tests)) {
+    OutputXmlAttribute(stream, kTestsuite, "failures",
+                       StreamableToString(test_suite.failed_test_count()));
+    OutputXmlAttribute(
+        stream, kTestsuite, "disabled",
+        StreamableToString(test_suite.reportable_disabled_test_count()));
+    OutputXmlAttribute(stream, kTestsuite, "skipped",
+                       StreamableToString(test_suite.skipped_test_count()));
+
+    OutputXmlAttribute(stream, kTestsuite, "errors", "0");
+
+    OutputXmlAttribute(stream, kTestsuite, "time",
+                       FormatTimeInMillisAsSeconds(test_suite.elapsed_time()));
+    OutputXmlAttribute(
+        stream, kTestsuite, "timestamp",
+        FormatEpochTimeInMillisAsIso8601(test_suite.start_timestamp()));
+    *stream << TestPropertiesAsXmlAttributes(test_suite.ad_hoc_test_result());
+  }
+  *stream << ">\n";
+  for (int i = 0; i < test_suite.total_test_count(); ++i) {
+    if (test_suite.GetTestInfo(i)->is_reportable())
+      OutputXmlTestInfo(stream, test_suite.name(), *test_suite.GetTestInfo(i));
   }
   *stream << "  </" << kTestsuite << ">\n";
 }
@@ -4857,25 +5833,53 @@ void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream,
       stream, kTestsuites, "disabled",
       StreamableToString(unit_test.reportable_disabled_test_count()));
   OutputXmlAttribute(stream, kTestsuites, "errors", "0");
+  OutputXmlAttribute(stream, kTestsuites, "time",
+                     FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));
   OutputXmlAttribute(
       stream, kTestsuites, "timestamp",
       FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
-  OutputXmlAttribute(stream, kTestsuites, "time",
-                     FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));
 
   if (GTEST_FLAG(shuffle)) {
     OutputXmlAttribute(stream, kTestsuites, "random_seed",
                        StreamableToString(unit_test.random_seed()));
   }
-
   *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result());
 
   OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
   *stream << ">\n";
 
-  for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
-    if (unit_test.GetTestCase(i)->reportable_test_count() > 0)
-      PrintXmlTestCase(stream, *unit_test.GetTestCase(i));
+  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+    if (unit_test.GetTestSuite(i)->reportable_test_count() > 0)
+      PrintXmlTestSuite(stream, *unit_test.GetTestSuite(i));
+  }
+
+  // If there was a test failure outside of one of the test suites (like in a
+  // test environment) include that in the output.
+  if (unit_test.ad_hoc_test_result().Failed()) {
+    OutputXmlTestSuiteForTestResult(stream, unit_test.ad_hoc_test_result());
+  }
+
+  *stream << "</" << kTestsuites << ">\n";
+}
+
+void XmlUnitTestResultPrinter::PrintXmlTestsList(
+    std::ostream* stream, const std::vector<TestSuite*>& test_suites) {
+  const std::string kTestsuites = "testsuites";
+
+  *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
+  *stream << "<" << kTestsuites;
+
+  int total_tests = 0;
+  for (auto test_suite : test_suites) {
+    total_tests += test_suite->total_test_count();
+  }
+  OutputXmlAttribute(stream, kTestsuites, "tests",
+                     StreamableToString(total_tests));
+  OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
+  *stream << ">\n";
+
+  for (auto test_suite : test_suites) {
+    PrintXmlTestSuite(stream, *test_suite);
   }
   *stream << "</" << kTestsuites << ">\n";
 }
@@ -4893,55 +5897,515 @@ std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
   return attributes.GetString();
 }
 
-// End XmlUnitTestResultPrinter
+void XmlUnitTestResultPrinter::OutputXmlTestProperties(
+    std::ostream* stream, const TestResult& result) {
+  const std::string kProperties = "properties";
+  const std::string kProperty = "property";
 
-#if GTEST_CAN_STREAM_RESULTS_
+  if (result.test_property_count() <= 0) {
+    return;
+  }
 
-// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
-// replaces them by "%xx" where xx is their hexadecimal value. For
-// example, replaces "=" with "%3D".  This algorithm is O(strlen(str))
-// in both time and space -- important as the input str may contain an
-// arbitrarily long test failure message and stack trace.
-string StreamingListener::UrlEncode(const char* str) {
-  string result;
-  result.reserve(strlen(str) + 1);
-  for (char ch = *str; ch != '\0'; ch = *++str) {
-    switch (ch) {
-      case '%':
-      case '=':
-      case '&':
-      case '\n':
-        result.append("%" + String::FormatByte(static_cast<unsigned char>(ch)));
-        break;
-      default:
-        result.push_back(ch);
-        break;
-    }
+  *stream << "<" << kProperties << ">\n";
+  for (int i = 0; i < result.test_property_count(); ++i) {
+    const TestProperty& property = result.GetTestProperty(i);
+    *stream << "<" << kProperty;
+    *stream << " name=\"" << EscapeXmlAttribute(property.key()) << "\"";
+    *stream << " value=\"" << EscapeXmlAttribute(property.value()) << "\"";
+    *stream << "/>\n";
   }
-  return result;
+  *stream << "</" << kProperties << ">\n";
 }
 
-void StreamingListener::SocketWriter::MakeConnection() {
-  GTEST_CHECK_(sockfd_ == -1)
-      << "MakeConnection() can't be called when there is already a connection.";
+// End XmlUnitTestResultPrinter
 
-  addrinfo hints;
-  memset(&hints, 0, sizeof(hints));
-  hints.ai_family = AF_UNSPEC;    // To allow both IPv4 and IPv6 addresses.
-  hints.ai_socktype = SOCK_STREAM;
-  addrinfo* servinfo = NULL;
+// This class generates an JSON output file.
+class JsonUnitTestResultPrinter : public EmptyTestEventListener {
+ public:
+  explicit JsonUnitTestResultPrinter(const char* output_file);
 
-  // Use the getaddrinfo() to get a linked list of IP addresses for
-  // the given host name.
-  const int error_num = getaddrinfo(
-      host_name_.c_str(), port_num_.c_str(), &hints, &servinfo);
-  if (error_num != 0) {
-    GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: "
-                        << gai_strerror(error_num);
-  }
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+
+  // Prints an JSON summary of all unit tests.
+  static void PrintJsonTestList(::std::ostream* stream,
+                                const std::vector<TestSuite*>& test_suites);
+
+ private:
+  // Returns an JSON-escaped copy of the input string str.
+  static std::string EscapeJson(const std::string& str);
+
+  //// Verifies that the given attribute belongs to the given element and
+  //// streams the attribute as JSON.
+  static void OutputJsonKey(std::ostream* stream,
+                            const std::string& element_name,
+                            const std::string& name,
+                            const std::string& value,
+                            const std::string& indent,
+                            bool comma = true);
+  static void OutputJsonKey(std::ostream* stream,
+                            const std::string& element_name,
+                            const std::string& name,
+                            int value,
+                            const std::string& indent,
+                            bool comma = true);
+
+  // Streams a test suite JSON stanza containing the given test result.
+  //
+  // Requires: result.Failed()
+  static void OutputJsonTestSuiteForTestResult(::std::ostream* stream,
+                                               const TestResult& result);
+
+  // Streams a JSON representation of a TestResult object.
+  static void OutputJsonTestResult(::std::ostream* stream,
+                                   const TestResult& result);
+
+  // Streams a JSON representation of a TestInfo object.
+  static void OutputJsonTestInfo(::std::ostream* stream,
+                                 const char* test_suite_name,
+                                 const TestInfo& test_info);
+
+  // Prints a JSON representation of a TestSuite object
+  static void PrintJsonTestSuite(::std::ostream* stream,
+                                 const TestSuite& test_suite);
+
+  // Prints a JSON summary of unit_test to output stream out.
+  static void PrintJsonUnitTest(::std::ostream* stream,
+                                const UnitTest& unit_test);
+
+  // Produces a string representing the test properties in a result as
+  // a JSON dictionary.
+  static std::string TestPropertiesAsJson(const TestResult& result,
+                                          const std::string& indent);
+
+  // The output file.
+  const std::string output_file_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(JsonUnitTestResultPrinter);
+};
+
+// Creates a new JsonUnitTestResultPrinter.
+JsonUnitTestResultPrinter::JsonUnitTestResultPrinter(const char* output_file)
+    : output_file_(output_file) {
+  if (output_file_.empty()) {
+    GTEST_LOG_(FATAL) << "JSON output file may not be null";
+  }
+}
+
+void JsonUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+                                                  int /*iteration*/) {
+  FILE* jsonout = OpenFileForWriting(output_file_);
+  std::stringstream stream;
+  PrintJsonUnitTest(&stream, unit_test);
+  fprintf(jsonout, "%s", StringStreamToString(&stream).c_str());
+  fclose(jsonout);
+}
+
+// Returns an JSON-escaped copy of the input string str.
+std::string JsonUnitTestResultPrinter::EscapeJson(const std::string& str) {
+  Message m;
+
+  for (size_t i = 0; i < str.size(); ++i) {
+    const char ch = str[i];
+    switch (ch) {
+      case '\\':
+      case '"':
+      case '/':
+        m << '\\' << ch;
+        break;
+      case '\b':
+        m << "\\b";
+        break;
+      case '\t':
+        m << "\\t";
+        break;
+      case '\n':
+        m << "\\n";
+        break;
+      case '\f':
+        m << "\\f";
+        break;
+      case '\r':
+        m << "\\r";
+        break;
+      default:
+        if (ch < ' ') {
+          m << "\\u00" << String::FormatByte(static_cast<unsigned char>(ch));
+        } else {
+          m << ch;
+        }
+        break;
+    }
+  }
+
+  return m.GetString();
+}
+
+// The following routines generate an JSON representation of a UnitTest
+// object.
+
+// Formats the given time in milliseconds as seconds.
+static std::string FormatTimeInMillisAsDuration(TimeInMillis ms) {
+  ::std::stringstream ss;
+  ss << (static_cast<double>(ms) * 1e-3) << "s";
+  return ss.str();
+}
+
+// Converts the given epoch time in milliseconds to a date string in the
+// RFC3339 format, without the timezone information.
+static std::string FormatEpochTimeInMillisAsRFC3339(TimeInMillis ms) {
+  struct tm time_struct;
+  if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
+    return "";
+  // YYYY-MM-DDThh:mm:ss
+  return StreamableToString(time_struct.tm_year + 1900) + "-" +
+      String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
+      String::FormatIntWidth2(time_struct.tm_mday) + "T" +
+      String::FormatIntWidth2(time_struct.tm_hour) + ":" +
+      String::FormatIntWidth2(time_struct.tm_min) + ":" +
+      String::FormatIntWidth2(time_struct.tm_sec) + "Z";
+}
+
+static inline std::string Indent(size_t width) {
+  return std::string(width, ' ');
+}
+
+void JsonUnitTestResultPrinter::OutputJsonKey(
+    std::ostream* stream,
+    const std::string& element_name,
+    const std::string& name,
+    const std::string& value,
+    const std::string& indent,
+    bool comma) {
+  const std::vector<std::string>& allowed_names =
+      GetReservedOutputAttributesForElement(element_name);
+
+  GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
+                   allowed_names.end())
+      << "Key \"" << name << "\" is not allowed for value \"" << element_name
+      << "\".";
+
+  *stream << indent << "\"" << name << "\": \"" << EscapeJson(value) << "\"";
+  if (comma)
+    *stream << ",\n";
+}
+
+void JsonUnitTestResultPrinter::OutputJsonKey(
+    std::ostream* stream,
+    const std::string& element_name,
+    const std::string& name,
+    int value,
+    const std::string& indent,
+    bool comma) {
+  const std::vector<std::string>& allowed_names =
+      GetReservedOutputAttributesForElement(element_name);
+
+  GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
+                   allowed_names.end())
+      << "Key \"" << name << "\" is not allowed for value \"" << element_name
+      << "\".";
+
+  *stream << indent << "\"" << name << "\": " << StreamableToString(value);
+  if (comma)
+    *stream << ",\n";
+}
+
+// Streams a test suite JSON stanza containing the given test result.
+void JsonUnitTestResultPrinter::OutputJsonTestSuiteForTestResult(
+    ::std::ostream* stream, const TestResult& result) {
+  // Output the boilerplate for a new test suite.
+  *stream << Indent(4) << "{\n";
+  OutputJsonKey(stream, "testsuite", "name", "NonTestSuiteFailure", Indent(6));
+  OutputJsonKey(stream, "testsuite", "tests", 1, Indent(6));
+  if (!GTEST_FLAG(list_tests)) {
+    OutputJsonKey(stream, "testsuite", "failures", 1, Indent(6));
+    OutputJsonKey(stream, "testsuite", "disabled", 0, Indent(6));
+    OutputJsonKey(stream, "testsuite", "skipped", 0, Indent(6));
+    OutputJsonKey(stream, "testsuite", "errors", 0, Indent(6));
+    OutputJsonKey(stream, "testsuite", "time",
+                  FormatTimeInMillisAsDuration(result.elapsed_time()),
+                  Indent(6));
+    OutputJsonKey(stream, "testsuite", "timestamp",
+                  FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
+                  Indent(6));
+  }
+  *stream << Indent(6) << "\"testsuite\": [\n";
+
+  // Output the boilerplate for a new test case.
+  *stream << Indent(8) << "{\n";
+  OutputJsonKey(stream, "testcase", "name", "", Indent(10));
+  OutputJsonKey(stream, "testcase", "status", "RUN", Indent(10));
+  OutputJsonKey(stream, "testcase", "result", "COMPLETED", Indent(10));
+  OutputJsonKey(stream, "testcase", "timestamp",
+                FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
+                Indent(10));
+  OutputJsonKey(stream, "testcase", "time",
+                FormatTimeInMillisAsDuration(result.elapsed_time()),
+                Indent(10));
+  OutputJsonKey(stream, "testcase", "classname", "", Indent(10), false);
+  *stream << TestPropertiesAsJson(result, Indent(10));
+
+  // Output the actual test result.
+  OutputJsonTestResult(stream, result);
+
+  // Finish the test suite.
+  *stream << "\n" << Indent(6) << "]\n" << Indent(4) << "}";
+}
+
+// Prints a JSON representation of a TestInfo object.
+void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream* stream,
+                                                   const char* test_suite_name,
+                                                   const TestInfo& test_info) {
+  const TestResult& result = *test_info.result();
+  const std::string kTestsuite = "testcase";
+  const std::string kIndent = Indent(10);
+
+  *stream << Indent(8) << "{\n";
+  OutputJsonKey(stream, kTestsuite, "name", test_info.name(), kIndent);
+
+  if (test_info.value_param() != nullptr) {
+    OutputJsonKey(stream, kTestsuite, "value_param", test_info.value_param(),
+                  kIndent);
+  }
+  if (test_info.type_param() != nullptr) {
+    OutputJsonKey(stream, kTestsuite, "type_param", test_info.type_param(),
+                  kIndent);
+  }
+  if (GTEST_FLAG(list_tests)) {
+    OutputJsonKey(stream, kTestsuite, "file", test_info.file(), kIndent);
+    OutputJsonKey(stream, kTestsuite, "line", test_info.line(), kIndent, false);
+    *stream << "\n" << Indent(8) << "}";
+    return;
+  }
+
+  OutputJsonKey(stream, kTestsuite, "status",
+                test_info.should_run() ? "RUN" : "NOTRUN", kIndent);
+  OutputJsonKey(stream, kTestsuite, "result",
+                test_info.should_run()
+                    ? (result.Skipped() ? "SKIPPED" : "COMPLETED")
+                    : "SUPPRESSED",
+                kIndent);
+  OutputJsonKey(stream, kTestsuite, "timestamp",
+                FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
+                kIndent);
+  OutputJsonKey(stream, kTestsuite, "time",
+                FormatTimeInMillisAsDuration(result.elapsed_time()), kIndent);
+  OutputJsonKey(stream, kTestsuite, "classname", test_suite_name, kIndent,
+                false);
+  *stream << TestPropertiesAsJson(result, kIndent);
+
+  OutputJsonTestResult(stream, result);
+}
+
+void JsonUnitTestResultPrinter::OutputJsonTestResult(::std::ostream* stream,
+                                                     const TestResult& result) {
+  const std::string kIndent = Indent(10);
+
+  int failures = 0;
+  for (int i = 0; i < result.total_part_count(); ++i) {
+    const TestPartResult& part = result.GetTestPartResult(i);
+    if (part.failed()) {
+      *stream << ",\n";
+      if (++failures == 1) {
+        *stream << kIndent << "\"" << "failures" << "\": [\n";
+      }
+      const std::string location =
+          internal::FormatCompilerIndependentFileLocation(part.file_name(),
+                                                          part.line_number());
+      const std::string message = EscapeJson(location + "\n" + part.message());
+      *stream << kIndent << "  {\n"
+              << kIndent << "    \"failure\": \"" << message << "\",\n"
+              << kIndent << "    \"type\": \"\"\n"
+              << kIndent << "  }";
+    }
+  }
+
+  if (failures > 0)
+    *stream << "\n" << kIndent << "]";
+  *stream << "\n" << Indent(8) << "}";
+}
+
+// Prints an JSON representation of a TestSuite object
+void JsonUnitTestResultPrinter::PrintJsonTestSuite(
+    std::ostream* stream, const TestSuite& test_suite) {
+  const std::string kTestsuite = "testsuite";
+  const std::string kIndent = Indent(6);
+
+  *stream << Indent(4) << "{\n";
+  OutputJsonKey(stream, kTestsuite, "name", test_suite.name(), kIndent);
+  OutputJsonKey(stream, kTestsuite, "tests", test_suite.reportable_test_count(),
+                kIndent);
+  if (!GTEST_FLAG(list_tests)) {
+    OutputJsonKey(stream, kTestsuite, "failures",
+                  test_suite.failed_test_count(), kIndent);
+    OutputJsonKey(stream, kTestsuite, "disabled",
+                  test_suite.reportable_disabled_test_count(), kIndent);
+    OutputJsonKey(stream, kTestsuite, "errors", 0, kIndent);
+    OutputJsonKey(
+        stream, kTestsuite, "timestamp",
+        FormatEpochTimeInMillisAsRFC3339(test_suite.start_timestamp()),
+        kIndent);
+    OutputJsonKey(stream, kTestsuite, "time",
+                  FormatTimeInMillisAsDuration(test_suite.elapsed_time()),
+                  kIndent, false);
+    *stream << TestPropertiesAsJson(test_suite.ad_hoc_test_result(), kIndent)
+            << ",\n";
+  }
+
+  *stream << kIndent << "\"" << kTestsuite << "\": [\n";
+
+  bool comma = false;
+  for (int i = 0; i < test_suite.total_test_count(); ++i) {
+    if (test_suite.GetTestInfo(i)->is_reportable()) {
+      if (comma) {
+        *stream << ",\n";
+      } else {
+        comma = true;
+      }
+      OutputJsonTestInfo(stream, test_suite.name(), *test_suite.GetTestInfo(i));
+    }
+  }
+  *stream << "\n" << kIndent << "]\n" << Indent(4) << "}";
+}
+
+// Prints a JSON summary of unit_test to output stream out.
+void JsonUnitTestResultPrinter::PrintJsonUnitTest(std::ostream* stream,
+                                                  const UnitTest& unit_test) {
+  const std::string kTestsuites = "testsuites";
+  const std::string kIndent = Indent(2);
+  *stream << "{\n";
+
+  OutputJsonKey(stream, kTestsuites, "tests", unit_test.reportable_test_count(),
+                kIndent);
+  OutputJsonKey(stream, kTestsuites, "failures", unit_test.failed_test_count(),
+                kIndent);
+  OutputJsonKey(stream, kTestsuites, "disabled",
+                unit_test.reportable_disabled_test_count(), kIndent);
+  OutputJsonKey(stream, kTestsuites, "errors", 0, kIndent);
+  if (GTEST_FLAG(shuffle)) {
+    OutputJsonKey(stream, kTestsuites, "random_seed", unit_test.random_seed(),
+                  kIndent);
+  }
+  OutputJsonKey(stream, kTestsuites, "timestamp",
+                FormatEpochTimeInMillisAsRFC3339(unit_test.start_timestamp()),
+                kIndent);
+  OutputJsonKey(stream, kTestsuites, "time",
+                FormatTimeInMillisAsDuration(unit_test.elapsed_time()), kIndent,
+                false);
+
+  *stream << TestPropertiesAsJson(unit_test.ad_hoc_test_result(), kIndent)
+          << ",\n";
+
+  OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent);
+  *stream << kIndent << "\"" << kTestsuites << "\": [\n";
+
+  bool comma = false;
+  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+    if (unit_test.GetTestSuite(i)->reportable_test_count() > 0) {
+      if (comma) {
+        *stream << ",\n";
+      } else {
+        comma = true;
+      }
+      PrintJsonTestSuite(stream, *unit_test.GetTestSuite(i));
+    }
+  }
+
+  // If there was a test failure outside of one of the test suites (like in a
+  // test environment) include that in the output.
+  if (unit_test.ad_hoc_test_result().Failed()) {
+    OutputJsonTestSuiteForTestResult(stream, unit_test.ad_hoc_test_result());
+  }
+
+  *stream << "\n" << kIndent << "]\n" << "}\n";
+}
+
+void JsonUnitTestResultPrinter::PrintJsonTestList(
+    std::ostream* stream, const std::vector<TestSuite*>& test_suites) {
+  const std::string kTestsuites = "testsuites";
+  const std::string kIndent = Indent(2);
+  *stream << "{\n";
+  int total_tests = 0;
+  for (auto test_suite : test_suites) {
+    total_tests += test_suite->total_test_count();
+  }
+  OutputJsonKey(stream, kTestsuites, "tests", total_tests, kIndent);
+
+  OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent);
+  *stream << kIndent << "\"" << kTestsuites << "\": [\n";
+
+  for (size_t i = 0; i < test_suites.size(); ++i) {
+    if (i != 0) {
+      *stream << ",\n";
+    }
+    PrintJsonTestSuite(stream, *test_suites[i]);
+  }
+
+  *stream << "\n"
+          << kIndent << "]\n"
+          << "}\n";
+}
+// Produces a string representing the test properties in a result as
+// a JSON dictionary.
+std::string JsonUnitTestResultPrinter::TestPropertiesAsJson(
+    const TestResult& result, const std::string& indent) {
+  Message attributes;
+  for (int i = 0; i < result.test_property_count(); ++i) {
+    const TestProperty& property = result.GetTestProperty(i);
+    attributes << ",\n" << indent << "\"" << property.key() << "\": "
+               << "\"" << EscapeJson(property.value()) << "\"";
+  }
+  return attributes.GetString();
+}
+
+// End JsonUnitTestResultPrinter
+
+#if GTEST_CAN_STREAM_RESULTS_
+
+// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
+// replaces them by "%xx" where xx is their hexadecimal value. For
+// example, replaces "=" with "%3D".  This algorithm is O(strlen(str))
+// in both time and space -- important as the input str may contain an
+// arbitrarily long test failure message and stack trace.
+std::string StreamingListener::UrlEncode(const char* str) {
+  std::string result;
+  result.reserve(strlen(str) + 1);
+  for (char ch = *str; ch != '\0'; ch = *++str) {
+    switch (ch) {
+      case '%':
+      case '=':
+      case '&':
+      case '\n':
+        result.append("%" + String::FormatByte(static_cast<unsigned char>(ch)));
+        break;
+      default:
+        result.push_back(ch);
+        break;
+    }
+  }
+  return result;
+}
+
+void StreamingListener::SocketWriter::MakeConnection() {
+  GTEST_CHECK_(sockfd_ == -1)
+      << "MakeConnection() can't be called when there is already a connection.";
+
+  addrinfo hints;
+  memset(&hints, 0, sizeof(hints));
+  hints.ai_family = AF_UNSPEC;    // To allow both IPv4 and IPv6 addresses.
+  hints.ai_socktype = SOCK_STREAM;
+  addrinfo* servinfo = nullptr;
+
+  // Use the getaddrinfo() to get a linked list of IP addresses for
+  // the given host name.
+  const int error_num = getaddrinfo(
+      host_name_.c_str(), port_num_.c_str(), &hints, &servinfo);
+  if (error_num != 0) {
+    GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: "
+                        << gai_strerror(error_num);
+  }
 
   // Loop through all the results and connect to the first we can.
-  for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL;
+  for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != nullptr;
        cur_addr = cur_addr->ai_next) {
     sockfd_ = socket(
         cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol);
@@ -4965,58 +6429,82 @@ void StreamingListener::SocketWriter::MakeConnection() {
 // End of class Streaming Listener
 #endif  // GTEST_CAN_STREAM_RESULTS__
 
-// Class ScopedTrace
+// class OsStackTraceGetter
 
-// Pushes the given source file location and message onto a per-thread
-// trace stack maintained by Google Test.
-ScopedTrace::ScopedTrace(const char* file, int line, const Message& message)
-    GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
-  TraceInfo trace;
-  trace.file = file;
-  trace.line = line;
-  trace.message = message.GetString();
+const char* const OsStackTraceGetterInterface::kElidedFramesMarker =
+    "... " GTEST_NAME_ " internal frames ...";
 
-  UnitTest::GetInstance()->PushGTestTrace(trace);
-}
+std::string OsStackTraceGetter::CurrentStackTrace(int max_depth, int skip_count)
+    GTEST_LOCK_EXCLUDED_(mutex_) {
+#if GTEST_HAS_ABSL
+  std::string result;
 
-// Pops the info pushed by the c'tor.
-ScopedTrace::~ScopedTrace()
-    GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
-  UnitTest::GetInstance()->PopGTestTrace();
-}
+  if (max_depth <= 0) {
+    return result;
+  }
 
+  max_depth = std::min(max_depth, kMaxStackTraceDepth);
 
-// class OsStackTraceGetter
+  std::vector<void*> raw_stack(max_depth);
+  // Skips the frames requested by the caller, plus this function.
+  const int raw_stack_size =
+      absl::GetStackTrace(&raw_stack[0], max_depth, skip_count + 1);
 
-// Returns the current OS stack trace as an std::string.  Parameters:
-//
-//   max_depth  - the maximum number of stack frames to be included
-//                in the trace.
-//   skip_count - the number of top frames to be skipped; doesn't count
-//                against max_depth.
-//
-string OsStackTraceGetter::CurrentStackTrace(int /* max_depth */,
-                                             int /* skip_count */)
-    GTEST_LOCK_EXCLUDED_(mutex_) {
+  void* caller_frame = nullptr;
+  {
+    MutexLock lock(&mutex_);
+    caller_frame = caller_frame_;
+  }
+
+  for (int i = 0; i < raw_stack_size; ++i) {
+    if (raw_stack[i] == caller_frame &&
+        !GTEST_FLAG(show_internal_stack_frames)) {
+      // Add a marker to the trace and stop adding frames.
+      absl::StrAppend(&result, kElidedFramesMarker, "\n");
+      break;
+    }
+
+    char tmp[1024];
+    const char* symbol = "(unknown)";
+    if (absl::Symbolize(raw_stack[i], tmp, sizeof(tmp))) {
+      symbol = tmp;
+    }
+
+    char line[1024];
+    snprintf(line, sizeof(line), "  %p: %s\n", raw_stack[i], symbol);
+    result += line;
+  }
+
+  return result;
+
+#else  // !GTEST_HAS_ABSL
+  static_cast<void>(max_depth);
+  static_cast<void>(skip_count);
   return "";
+#endif  // GTEST_HAS_ABSL
 }
 
-void OsStackTraceGetter::UponLeavingGTest()
-    GTEST_LOCK_EXCLUDED_(mutex_) {
-}
+void OsStackTraceGetter::UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_) {
+#if GTEST_HAS_ABSL
+  void* caller_frame = nullptr;
+  if (absl::GetStackTrace(&caller_frame, 1, 3) <= 0) {
+    caller_frame = nullptr;
+  }
 
-const char* const
-OsStackTraceGetter::kElidedFramesMarker =
-    "... " GTEST_NAME_ " internal frames ...";
+  MutexLock lock(&mutex_);
+  caller_frame_ = caller_frame;
+#endif  // GTEST_HAS_ABSL
+}
 
 // A helper class that creates the premature-exit file in its
 // constructor and deletes the file in its destructor.
 class ScopedPrematureExitFile {
  public:
   explicit ScopedPrematureExitFile(const char* premature_exit_filepath)
-      : premature_exit_filepath_(premature_exit_filepath) {
+      : premature_exit_filepath_(premature_exit_filepath ?
+                                 premature_exit_filepath : "") {
     // If a path to the premature-exit file is specified...
-    if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') {
+    if (!premature_exit_filepath_.empty()) {
       // create the file with a single "0" character in it.  I/O
       // errors are ignored as there's nothing better we can do and we
       // don't want to fail the test because of this.
@@ -5027,13 +6515,20 @@ class ScopedPrematureExitFile {
   }
 
   ~ScopedPrematureExitFile() {
-    if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') {
-      remove(premature_exit_filepath_);
+#if !defined GTEST_OS_ESP8266
+    if (!premature_exit_filepath_.empty()) {
+      int retval = remove(premature_exit_filepath_.c_str());
+      if (retval) {
+        GTEST_LOG_(ERROR) << "Failed to remove premature exit filepath \""
+                          << premature_exit_filepath_ << "\" with error "
+                          << retval;
+      }
     }
+#endif
   }
 
  private:
-  const char* const premature_exit_filepath_;
+  const std::string premature_exit_filepath_;
 
   GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile);
 };
@@ -5044,9 +6539,8 @@ class ScopedPrematureExitFile {
 
 TestEventListeners::TestEventListeners()
     : repeater_(new internal::TestEventRepeater()),
-      default_result_printer_(NULL),
-      default_xml_generator_(NULL) {
-}
+      default_result_printer_(nullptr),
+      default_xml_generator_(nullptr) {}
 
 TestEventListeners::~TestEventListeners() { delete repeater_; }
 
@@ -5063,9 +6557,9 @@ void TestEventListeners::Append(TestEventListener* listener) {
 // NULL if the listener is not found in the list.
 TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
   if (listener == default_result_printer_)
-    default_result_printer_ = NULL;
+    default_result_printer_ = nullptr;
   else if (listener == default_xml_generator_)
-    default_xml_generator_ = NULL;
+    default_xml_generator_ = nullptr;
   return repeater_->Release(listener);
 }
 
@@ -5084,8 +6578,7 @@ void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
     // list.
     delete Release(default_result_printer_);
     default_result_printer_ = listener;
-    if (listener != NULL)
-      Append(listener);
+    if (listener != nullptr) Append(listener);
   }
 }
 
@@ -5100,8 +6593,7 @@ void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
     // list.
     delete Release(default_xml_generator_);
     default_xml_generator_ = listener;
-    if (listener != NULL)
-      Append(listener);
+    if (listener != nullptr) Append(listener);
   }
 }
 
@@ -5125,52 +6617,66 @@ void TestEventListeners::SuppressEventForwarding() {
 // call this before main() starts, from which point on the return
 // value will never change.
 UnitTest* UnitTest::GetInstance() {
-  // When compiled with MSVC 7.1 in optimized mode, destroying the
-  // UnitTest object upon exiting the program messes up the exit code,
-  // causing successful tests to appear failed.  We have to use a
-  // different implementation in this case to bypass the compiler bug.
-  // This implementation makes the compiler happy, at the cost of
-  // leaking the UnitTest object.
-
   // CodeGear C++Builder insists on a public destructor for the
   // default implementation.  Use this implementation to keep good OO
   // design with private destructor.
 
-#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
+#if defined(__BORLANDC__)
   static UnitTest* const instance = new UnitTest;
   return instance;
 #else
   static UnitTest instance;
   return &instance;
-#endif  // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
+#endif  // defined(__BORLANDC__)
 }
 
-// Gets the number of successful test cases.
-int UnitTest::successful_test_case_count() const {
-  return impl()->successful_test_case_count();
+// Gets the number of successful test suites.
+int UnitTest::successful_test_suite_count() const {
+  return impl()->successful_test_suite_count();
 }
 
-// Gets the number of failed test cases.
-int UnitTest::failed_test_case_count() const {
-  return impl()->failed_test_case_count();
+// Gets the number of failed test suites.
+int UnitTest::failed_test_suite_count() const {
+  return impl()->failed_test_suite_count();
 }
 
-// Gets the number of all test cases.
-int UnitTest::total_test_case_count() const {
-  return impl()->total_test_case_count();
+// Gets the number of all test suites.
+int UnitTest::total_test_suite_count() const {
+  return impl()->total_test_suite_count();
 }
 
-// Gets the number of all test cases that contain at least one test
+// Gets the number of all test suites that contain at least one test
 // that should run.
+int UnitTest::test_suite_to_run_count() const {
+  return impl()->test_suite_to_run_count();
+}
+
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+int UnitTest::successful_test_case_count() const {
+  return impl()->successful_test_suite_count();
+}
+int UnitTest::failed_test_case_count() const {
+  return impl()->failed_test_suite_count();
+}
+int UnitTest::total_test_case_count() const {
+  return impl()->total_test_suite_count();
+}
 int UnitTest::test_case_to_run_count() const {
-  return impl()->test_case_to_run_count();
+  return impl()->test_suite_to_run_count();
 }
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 // Gets the number of successful tests.
 int UnitTest::successful_test_count() const {
   return impl()->successful_test_count();
 }
 
+// Gets the number of skipped tests.
+int UnitTest::skipped_test_count() const {
+  return impl()->skipped_test_count();
+}
+
 // Gets the number of failed tests.
 int UnitTest::failed_test_count() const { return impl()->failed_test_count(); }
 
@@ -5206,29 +6712,37 @@ internal::TimeInMillis UnitTest::elapsed_time() const {
   return impl()->elapsed_time();
 }
 
-// Returns true iff the unit test passed (i.e. all test cases passed).
+// Returns true if and only if the unit test passed (i.e. all test suites
+// passed).
 bool UnitTest::Passed() const { return impl()->Passed(); }
 
-// Returns true iff the unit test failed (i.e. some test case failed
-// or something outside of all tests failed).
+// Returns true if and only if the unit test failed (i.e. some test suite
+// failed or something outside of all tests failed).
 bool UnitTest::Failed() const { return impl()->Failed(); }
 
-// Gets the i-th test case among all the test cases. i can range from 0 to
-// total_test_case_count() - 1. If i is not in that range, returns NULL.
+// Gets the i-th test suite among all the test suites. i can range from 0 to
+// total_test_suite_count() - 1. If i is not in that range, returns NULL.
+const TestSuite* UnitTest::GetTestSuite(int i) const {
+  return impl()->GetTestSuite(i);
+}
+
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 const TestCase* UnitTest::GetTestCase(int i) const {
   return impl()->GetTestCase(i);
 }
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 // Returns the TestResult containing information on test failures and
-// properties logged outside of individual test cases.
+// properties logged outside of individual test suites.
 const TestResult& UnitTest::ad_hoc_test_result() const {
   return *impl()->ad_hoc_test_result();
 }
 
-// Gets the i-th test case among all the test cases. i can range from 0 to
-// total_test_case_count() - 1. If i is not in that range, returns NULL.
-TestCase* UnitTest::GetMutableTestCase(int i) {
-  return impl()->GetMutableTestCase(i);
+// Gets the i-th test suite among all the test suites. i can range from 0 to
+// total_test_suite_count() - 1. If i is not in that range, returns NULL.
+TestSuite* UnitTest::GetMutableTestSuite(int i) {
+  return impl()->GetMutableSuiteCase(i);
 }
 
 // Returns the list of event listeners that can be used to track events
@@ -5248,8 +6762,8 @@ TestEventListeners& UnitTest::listeners() {
 // We don't protect this under mutex_, as we only support calling it
 // from the main thread.
 Environment* UnitTest::AddEnvironment(Environment* env) {
-  if (env == NULL) {
-    return NULL;
+  if (env == nullptr) {
+    return nullptr;
   }
 
   impl_->environments().push_back(env);
@@ -5273,42 +6787,45 @@ void UnitTest::AddTestPartResult(
   if (impl_->gtest_trace_stack().size() > 0) {
     msg << "\n" << GTEST_NAME_ << " trace:";
 
-    for (int i = static_cast<int>(impl_->gtest_trace_stack().size());
-         i > 0; --i) {
+    for (size_t i = impl_->gtest_trace_stack().size(); i > 0; --i) {
       const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
       msg << "\n" << internal::FormatFileLocation(trace.file, trace.line)
           << " " << trace.message;
     }
   }
 
-  if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) {
+  if (os_stack_trace.c_str() != nullptr && !os_stack_trace.empty()) {
     msg << internal::kStackTraceMarker << os_stack_trace;
   }
 
-  const TestPartResult result =
-    TestPartResult(result_type, file_name, line_number,
-                   msg.GetString().c_str());
+  const TestPartResult result = TestPartResult(
+      result_type, file_name, line_number, msg.GetString().c_str());
   impl_->GetTestPartResultReporterForCurrentThread()->
       ReportTestPartResult(result);
 
-  if (result_type != TestPartResult::kSuccess) {
+  if (result_type != TestPartResult::kSuccess &&
+      result_type != TestPartResult::kSkip) {
     // gtest_break_on_failure takes precedence over
     // gtest_throw_on_failure.  This allows a user to set the latter
     // in the code (perhaps in order to use Google Test assertions
     // with another testing framework) and specify the former on the
     // command line for debugging.
     if (GTEST_FLAG(break_on_failure)) {
-#if GTEST_OS_WINDOWS
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
       // Using DebugBreak on Windows allows gtest to still break into a debugger
       // when a failure happens and both the --gtest_break_on_failure and
       // the --gtest_catch_exceptions flags are specified.
       DebugBreak();
+#elif (!defined(__native_client__)) &&            \
+    ((defined(__clang__) || defined(__GNUC__)) && \
+     (defined(__x86_64__) || defined(__i386__)))
+      // with clang/gcc we can achieve the same effect on x86 by invoking int3
+      asm("int3");
 #else
-      // Dereference NULL through a volatile pointer to prevent the compiler
+      // Dereference nullptr through a volatile pointer to prevent the compiler
       // from removing. We use this rather than abort() or __builtin_trap() for
-      // portability: Symbian doesn't implement abort() well, and some debuggers
-      // don't correctly trap abort().
-      *static_cast<volatile int*>(NULL) = 1;
+      // portability: some debuggers don't correctly trap abort().
+      *static_cast<volatile int*>(nullptr) = 1;
 #endif  // GTEST_OS_WINDOWS
     } else if (GTEST_FLAG(throw_on_failure)) {
 #if GTEST_HAS_EXCEPTIONS
@@ -5323,8 +6840,8 @@ void UnitTest::AddTestPartResult(
 }
 
 // Adds a TestProperty to the current TestResult object when invoked from
-// inside a test, to current TestCase's ad_hoc_test_result_ when invoked
-// from SetUpTestCase or TearDownTestCase, or to the global property set
+// inside a test, to current TestSuite's ad_hoc_test_result_ when invoked
+// from SetUpTestSuite or TearDownTestSuite, or to the global property set
 // when invoked elsewhere.  If the result already contains a property with
 // the same key, the value will be updated.
 void UnitTest::RecordProperty(const std::string& key,
@@ -5363,20 +6880,21 @@ int UnitTest::Run() {
   // that understands the premature-exit-file protocol to report the
   // test as having failed.
   const internal::ScopedPrematureExitFile premature_exit_file(
-      in_death_test_child_process ?
-      NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE"));
+      in_death_test_child_process
+          ? nullptr
+          : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE"));
 
   // Captures the value of GTEST_FLAG(catch_exceptions).  This value will be
   // used for the duration of the program.
   impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions));
 
-#if GTEST_HAS_SEH
+#if GTEST_OS_WINDOWS
   // Either the user wants Google Test to catch exceptions thrown by the
   // tests or this is executing in the context of death test child
   // process. In either case the user does not want to see pop-up dialogs
   // about crashes - they are expected.
   if (impl()->catch_exceptions() || in_death_test_child_process) {
-# if !GTEST_OS_WINDOWS_MOBILE
+# if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
     // SetErrorMode doesn't exist on CE.
     SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
                  SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
@@ -5389,25 +6907,29 @@ int UnitTest::Run() {
     _set_error_mode(_OUT_TO_STDERR);
 # endif
 
-# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
+# if defined(_MSC_VER) && !GTEST_OS_WINDOWS_MOBILE
     // In the debug version, Visual Studio pops up a separate dialog
     // offering a choice to debug the aborted program. We need to suppress
     // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
     // executed. Google Test will notify the user of any unexpected
     // failure via stderr.
-    //
-    // VC++ doesn't define _set_abort_behavior() prior to the version 8.0.
-    // Users of prior VC versions shall suffer the agony and pain of
-    // clicking through the countless debug dialogs.
-    // TODO(vladl@google.com): find a way to suppress the abort dialog() in the
-    // debug mode when compiled with VC 7.1 or lower.
     if (!GTEST_FLAG(break_on_failure))
       _set_abort_behavior(
           0x0,                                    // Clear the following flags:
           _WRITE_ABORT_MSG | _CALL_REPORTFAULT);  // pop-up window, core dump.
+
+    // In debug mode, the Windows CRT can crash with an assertion over invalid
+    // input (e.g. passing an invalid file descriptor).  The default handling
+    // for these assertions is to pop up a dialog and wait for user input.
+    // Instead ask the CRT to dump such assertions to stderr non-interactively.
+    if (!IsDebuggerPresent()) {
+      (void)_CrtSetReportMode(_CRT_ASSERT,
+                              _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+      (void)_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
+    }
 # endif
   }
-#endif  // GTEST_HAS_SEH
+#endif  // GTEST_OS_WINDOWS
 
   return internal::HandleExceptionsInMethodIfSupported(
       impl(),
@@ -5421,13 +6943,22 @@ const char* UnitTest::original_working_dir() const {
   return impl_->original_working_dir_.c_str();
 }
 
-// Returns the TestCase object for the test that's currently running,
+// Returns the TestSuite object for the test that's currently running,
 // or NULL if no test is running.
+const TestSuite* UnitTest::current_test_suite() const
+    GTEST_LOCK_EXCLUDED_(mutex_) {
+  internal::MutexLock lock(&mutex_);
+  return impl_->current_test_suite();
+}
+
+// Legacy API is still available but deprecated
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 const TestCase* UnitTest::current_test_case() const
     GTEST_LOCK_EXCLUDED_(mutex_) {
   internal::MutexLock lock(&mutex_);
-  return impl_->current_test_case();
+  return impl_->current_test_suite();
 }
+#endif
 
 // Returns the TestInfo object for the test that's currently running,
 // or NULL if no test is running.
@@ -5440,15 +6971,12 @@ const TestInfo* UnitTest::current_test_info() const
 // Returns the random seed used at the start of the current test run.
 int UnitTest::random_seed() const { return impl_->random_seed(); }
 
-#if GTEST_HAS_PARAM_TEST
-// Returns ParameterizedTestCaseRegistry object used to keep track of
+// Returns ParameterizedTestSuiteRegistry object used to keep track of
 // value-parameterized tests and instantiate and register them.
-internal::ParameterizedTestCaseRegistry&
-    UnitTest::parameterized_test_registry()
-        GTEST_LOCK_EXCLUDED_(mutex_) {
+internal::ParameterizedTestSuiteRegistry&
+UnitTest::parameterized_test_registry() GTEST_LOCK_EXCLUDED_(mutex_) {
   return impl_->parameterized_test_registry();
 }
-#endif  // GTEST_HAS_PARAM_TEST
 
 // Creates an empty UnitTest.
 UnitTest::UnitTest() {
@@ -5479,33 +7007,23 @@ namespace internal {
 
 UnitTestImpl::UnitTestImpl(UnitTest* parent)
     : parent_(parent),
-#ifdef _MSC_VER
-# pragma warning(push)                    // Saves the current warning state.
-# pragma warning(disable:4355)            // Temporarily disables warning 4355
-                                         // (using this in initializer).
-      default_global_test_part_result_reporter_(this),
-      default_per_thread_test_part_result_reporter_(this),
-# pragma warning(pop)                     // Restores the warning state again.
-#else
-      default_global_test_part_result_reporter_(this),
+      GTEST_DISABLE_MSC_WARNINGS_PUSH_(4355 /* using this in initializer */)
+          default_global_test_part_result_reporter_(this),
       default_per_thread_test_part_result_reporter_(this),
-#endif  // _MSC_VER
-      global_test_part_result_repoter_(
+      GTEST_DISABLE_MSC_WARNINGS_POP_() global_test_part_result_repoter_(
           &default_global_test_part_result_reporter_),
       per_thread_test_part_result_reporter_(
           &default_per_thread_test_part_result_reporter_),
-#if GTEST_HAS_PARAM_TEST
       parameterized_test_registry_(),
       parameterized_tests_registered_(false),
-#endif  // GTEST_HAS_PARAM_TEST
-      last_death_test_case_(-1),
-      current_test_case_(NULL),
-      current_test_info_(NULL),
+      last_death_test_suite_(-1),
+      current_test_suite_(nullptr),
+      current_test_info_(nullptr),
       ad_hoc_test_result_(),
-      os_stack_trace_getter_(NULL),
+      os_stack_trace_getter_(nullptr),
       post_flag_parse_init_performed_(false),
       random_seed_(0),  // Will be overridden by the flag before first use.
-      random_(0),  // Will be reseeded before first use.
+      random_(0),       // Will be reseeded before first use.
       start_timestamp_(0),
       elapsed_time_(0),
 #if GTEST_HAS_DEATH_TEST
@@ -5517,8 +7035,8 @@ UnitTestImpl::UnitTestImpl(UnitTest* parent)
 }
 
 UnitTestImpl::~UnitTestImpl() {
-  // Deletes every TestCase.
-  ForEach(test_cases_, internal::Delete<TestCase>);
+  // Deletes every TestSuite.
+  ForEach(test_suites_, internal::Delete<TestSuite>);
 
   // Deletes every Environment.
   ForEach(environments_, internal::Delete<Environment>);
@@ -5527,20 +7045,20 @@ UnitTestImpl::~UnitTestImpl() {
 }
 
 // Adds a TestProperty to the current TestResult object when invoked in a
-// context of a test, to current test case's ad_hoc_test_result when invoke
-// from SetUpTestCase/TearDownTestCase, or to the global property set
+// context of a test, to current test suite's ad_hoc_test_result when invoke
+// from SetUpTestSuite/TearDownTestSuite, or to the global property set
 // otherwise.  If the result already contains a property with the same key,
 // the value will be updated.
 void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
   std::string xml_element;
   TestResult* test_result;  // TestResult appropriate for property recording.
 
-  if (current_test_info_ != NULL) {
+  if (current_test_info_ != nullptr) {
     xml_element = "testcase";
     test_result = &(current_test_info_->result_);
-  } else if (current_test_case_ != NULL) {
+  } else if (current_test_suite_ != nullptr) {
     xml_element = "testsuite";
-    test_result = &(current_test_case_->ad_hoc_test_result_);
+    test_result = &(current_test_suite_->ad_hoc_test_result_);
   } else {
     xml_element = "testsuites";
     test_result = &ad_hoc_test_result_;
@@ -5552,7 +7070,7 @@ void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
 // Disables event forwarding if the control is currently in a death test
 // subprocess. Must not be called before InitGoogleTest.
 void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
-  if (internal_run_death_test_flag_.get() != NULL)
+  if (internal_run_death_test_flag_.get() != nullptr)
     listeners()->SuppressEventForwarding();
 }
 #endif  // GTEST_HAS_DEATH_TEST
@@ -5564,10 +7082,12 @@ void UnitTestImpl::ConfigureXmlOutput() {
   if (output_format == "xml") {
     listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
         UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
+  } else if (output_format == "json") {
+    listeners()->SetDefaultXmlGenerator(new JsonUnitTestResultPrinter(
+        UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
   } else if (output_format != "") {
-    printf("WARNING: unrecognized output format \"%s\" ignored.\n",
-           output_format.c_str());
-    fflush(stdout);
+    GTEST_LOG_(WARNING) << "WARNING: unrecognized output format \""
+                        << output_format << "\" ignored.";
   }
 }
 
@@ -5582,9 +7102,8 @@ void UnitTestImpl::ConfigureStreamingOutput() {
       listeners()->Append(new StreamingListener(target.substr(0, pos),
                                                 target.substr(pos+1)));
     } else {
-      printf("WARNING: unrecognized streaming target \"%s\" ignored.\n",
-             target.c_str());
-      fflush(stdout);
+      GTEST_LOG_(WARNING) << "unrecognized streaming target \"" << target
+                          << "\" ignored.";
     }
   }
 }
@@ -5600,6 +7119,11 @@ void UnitTestImpl::PostFlagParsingInit() {
   if (!post_flag_parse_init_performed_) {
     post_flag_parse_init_performed_ = true;
 
+#if defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)
+    // Register to send notifications about key process state changes.
+    listeners()->Append(new GTEST_CUSTOM_TEST_EVENT_LISTENER_());
+#endif  // defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)
+
 #if GTEST_HAS_DEATH_TEST
     InitDeathTestSubprocessControlInfo();
     SuppressTestEventsIfInSubprocess();
@@ -5614,81 +7138,91 @@ void UnitTestImpl::PostFlagParsingInit() {
     // to shut down the default XML output before invoking RUN_ALL_TESTS.
     ConfigureXmlOutput();
 
+    if (GTEST_FLAG(brief)) {
+      listeners()->SetDefaultResultPrinter(new BriefUnitTestResultPrinter);
+    }
+
 #if GTEST_CAN_STREAM_RESULTS_
     // Configures listeners for streaming test results to the specified server.
     ConfigureStreamingOutput();
 #endif  // GTEST_CAN_STREAM_RESULTS_
+
+#if GTEST_HAS_ABSL
+    if (GTEST_FLAG(install_failure_signal_handler)) {
+      absl::FailureSignalHandlerOptions options;
+      absl::InstallFailureSignalHandler(options);
+    }
+#endif  // GTEST_HAS_ABSL
   }
 }
 
-// A predicate that checks the name of a TestCase against a known
+// A predicate that checks the name of a TestSuite against a known
 // value.
 //
 // This is used for implementation of the UnitTest class only.  We put
 // it in the anonymous namespace to prevent polluting the outer
 // namespace.
 //
-// TestCaseNameIs is copyable.
-class TestCaseNameIs {
+// TestSuiteNameIs is copyable.
+class TestSuiteNameIs {
  public:
   // Constructor.
-  explicit TestCaseNameIs(const std::string& name)
-      : name_(name) {}
+  explicit TestSuiteNameIs(const std::string& name) : name_(name) {}
 
-  // Returns true iff the name of test_case matches name_.
-  bool operator()(const TestCase* test_case) const {
-    return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0;
+  // Returns true if and only if the name of test_suite matches name_.
+  bool operator()(const TestSuite* test_suite) const {
+    return test_suite != nullptr &&
+           strcmp(test_suite->name(), name_.c_str()) == 0;
   }
 
  private:
   std::string name_;
 };
 
-// Finds and returns a TestCase with the given name.  If one doesn't
+// Finds and returns a TestSuite with the given name.  If one doesn't
 // exist, creates one and returns it.  It's the CALLER'S
 // RESPONSIBILITY to ensure that this function is only called WHEN THE
 // TESTS ARE NOT SHUFFLED.
 //
 // Arguments:
 //
-//   test_case_name: name of the test case
-//   type_param:     the name of the test case's type parameter, or NULL if
-//                   this is not a typed or a type-parameterized test case.
-//   set_up_tc:      pointer to the function that sets up the test case
-//   tear_down_tc:   pointer to the function that tears down the test case
-TestCase* UnitTestImpl::GetTestCase(const char* test_case_name,
-                                    const char* type_param,
-                                    Test::SetUpTestCaseFunc set_up_tc,
-                                    Test::TearDownTestCaseFunc tear_down_tc) {
-  // Can we find a TestCase with the given name?
-  const std::vector<TestCase*>::const_iterator test_case =
-      std::find_if(test_cases_.begin(), test_cases_.end(),
-                   TestCaseNameIs(test_case_name));
-
-  if (test_case != test_cases_.end())
-    return *test_case;
+//   test_suite_name: name of the test suite
+//   type_param:      the name of the test suite's type parameter, or NULL if
+//                    this is not a typed or a type-parameterized test suite.
+//   set_up_tc:       pointer to the function that sets up the test suite
+//   tear_down_tc:    pointer to the function that tears down the test suite
+TestSuite* UnitTestImpl::GetTestSuite(
+    const char* test_suite_name, const char* type_param,
+    internal::SetUpTestSuiteFunc set_up_tc,
+    internal::TearDownTestSuiteFunc tear_down_tc) {
+  // Can we find a TestSuite with the given name?
+  const auto test_suite =
+      std::find_if(test_suites_.rbegin(), test_suites_.rend(),
+                   TestSuiteNameIs(test_suite_name));
+
+  if (test_suite != test_suites_.rend()) return *test_suite;
 
   // No.  Let's create one.
-  TestCase* const new_test_case =
-      new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc);
-
-  // Is this a death test case?
-  if (internal::UnitTestOptions::MatchesFilter(test_case_name,
-                                               kDeathTestCaseFilter)) {
-    // Yes.  Inserts the test case after the last death test case
-    // defined so far.  This only works when the test cases haven't
+  auto* const new_test_suite =
+      new TestSuite(test_suite_name, type_param, set_up_tc, tear_down_tc);
+
+  // Is this a death test suite?
+  if (internal::UnitTestOptions::MatchesFilter(test_suite_name,
+                                               kDeathTestSuiteFilter)) {
+    // Yes.  Inserts the test suite after the last death test suite
+    // defined so far.  This only works when the test suites haven't
     // been shuffled.  Otherwise we may end up running a death test
     // after a non-death test.
-    ++last_death_test_case_;
-    test_cases_.insert(test_cases_.begin() + last_death_test_case_,
-                       new_test_case);
+    ++last_death_test_suite_;
+    test_suites_.insert(test_suites_.begin() + last_death_test_suite_,
+                        new_test_suite);
   } else {
     // No.  Appends to the end of the list.
-    test_cases_.push_back(new_test_case);
+    test_suites_.push_back(new_test_suite);
   }
 
-  test_case_indices_.push_back(static_cast<int>(test_case_indices_.size()));
-  return new_test_case;
+  test_suite_indices_.push_back(static_cast<int>(test_suite_indices_.size()));
+  return new_test_suite;
 }
 
 // Helpers for setting up / tearing down the given environment.  They
@@ -5706,13 +7240,9 @@ static void TearDownEnvironment(Environment* env) { env->TearDown(); }
 // All other functions called from RunAllTests() may safely assume that
 // parameterized tests are ready to be counted and run.
 bool UnitTestImpl::RunAllTests() {
-  // Makes sure InitGoogleTest() was called.
-  if (!GTestIsInitialized()) {
-    printf("%s",
-           "\nThis test program did NOT call ::testing::InitGoogleTest "
-           "before calling RUN_ALL_TESTS().  Please fix it.\n");
-    return false;
-  }
+  // True if and only if Google Test is initialized before RUN_ALL_TESTS() is
+  // called.
+  const bool gtest_is_initialized_before_run_all_tests = GTestIsInitialized();
 
   // Do not run any test if the --help flag was specified.
   if (g_help_flag)
@@ -5727,12 +7257,18 @@ bool UnitTestImpl::RunAllTests() {
   // protocol.
   internal::WriteToShardStatusFileIfNeeded();
 
-  // True iff we are in a subprocess for running a thread-safe-style
+  // True if and only if we are in a subprocess for running a thread-safe-style
   // death test.
   bool in_subprocess_for_death_test = false;
 
 #if GTEST_HAS_DEATH_TEST
-  in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL);
+  in_subprocess_for_death_test =
+      (internal_run_death_test_flag_.get() != nullptr);
+# if defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
+  if (in_subprocess_for_death_test) {
+    GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_();
+  }
+# endif  // defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
 #endif  // GTEST_HAS_DEATH_TEST
 
   const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex,
@@ -5754,7 +7290,7 @@ bool UnitTestImpl::RunAllTests() {
   random_seed_ = GTEST_FLAG(shuffle) ?
       GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0;
 
-  // True iff at least one test has failed.
+  // True if and only if at least one test has failed.
   bool failed = false;
 
   TestEventListener* repeater = listeners()->repeater();
@@ -5766,17 +7302,17 @@ bool UnitTestImpl::RunAllTests() {
   // when we are inside the subprocess of a death test.
   const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat);
   // Repeats forever if the repeat count is negative.
-  const bool forever = repeat < 0;
-  for (int i = 0; forever || i != repeat; i++) {
+  const bool gtest_repeat_forever = repeat < 0;
+  for (int i = 0; gtest_repeat_forever || i != repeat; i++) {
     // We want to preserve failures generated by ad-hoc test
     // assertions executed before RUN_ALL_TESTS().
     ClearNonAdHocTestResult();
 
-    const TimeInMillis start = GetTimeInMillis();
+    Timer timer;
 
-    // Shuffles test cases and tests if requested.
+    // Shuffles test suites and tests if requested.
     if (has_tests_to_run && GTEST_FLAG(shuffle)) {
-      random()->Reseed(random_seed_);
+      random()->Reseed(static_cast<uint32_t>(random_seed_));
       // This should be done before calling OnTestIterationStart(),
       // such that a test event listener can see the actual test order
       // in the event.
@@ -5786,19 +7322,48 @@ bool UnitTestImpl::RunAllTests() {
     // Tells the unit test event listeners that the tests are about to start.
     repeater->OnTestIterationStart(*parent_, i);
 
-    // Runs each test case if there is at least one test to run.
+    // Runs each test suite if there is at least one test to run.
     if (has_tests_to_run) {
       // Sets up all environments beforehand.
       repeater->OnEnvironmentsSetUpStart(*parent_);
       ForEach(environments_, SetUpEnvironment);
       repeater->OnEnvironmentsSetUpEnd(*parent_);
 
-      // Runs the tests only if there was no fatal failure during global
-      // set-up.
-      if (!Test::HasFatalFailure()) {
-        for (int test_index = 0; test_index < total_test_case_count();
+      // Runs the tests only if there was no fatal failure or skip triggered
+      // during global set-up.
+      if (Test::IsSkipped()) {
+        // Emit diagnostics when global set-up calls skip, as it will not be
+        // emitted by default.
+        TestResult& test_result =
+            *internal::GetUnitTestImpl()->current_test_result();
+        for (int j = 0; j < test_result.total_part_count(); ++j) {
+          const TestPartResult& test_part_result =
+              test_result.GetTestPartResult(j);
+          if (test_part_result.type() == TestPartResult::kSkip) {
+            const std::string& result = test_part_result.message();
+            printf("%s\n", result.c_str());
+          }
+        }
+        fflush(stdout);
+      } else if (!Test::HasFatalFailure()) {
+        for (int test_index = 0; test_index < total_test_suite_count();
              test_index++) {
-          GetMutableTestCase(test_index)->Run();
+          GetMutableSuiteCase(test_index)->Run();
+          if (GTEST_FLAG(fail_fast) &&
+              GetMutableSuiteCase(test_index)->Failed()) {
+            for (int j = test_index + 1; j < total_test_suite_count(); j++) {
+              GetMutableSuiteCase(j)->Skip();
+            }
+            break;
+          }
+        }
+      } else if (Test::HasFatalFailure()) {
+        // If there was a fatal failure during the global setup then we know we
+        // aren't going to run any tests. Explicitly mark all of the tests as
+        // skipped to make this obvious in the output.
+        for (int test_index = 0; test_index < total_test_suite_count();
+             test_index++) {
+          GetMutableSuiteCase(test_index)->Skip();
         }
       }
 
@@ -5809,7 +7374,7 @@ bool UnitTestImpl::RunAllTests() {
       repeater->OnEnvironmentsTearDownEnd(*parent_);
     }
 
-    elapsed_time_ = GetTimeInMillis() - start;
+    elapsed_time_ = timer.Elapsed();
 
     // Tells the unit test event listener that the tests have just finished.
     repeater->OnTestIterationEnd(*parent_, i);
@@ -5835,6 +7400,20 @@ bool UnitTestImpl::RunAllTests() {
 
   repeater->OnTestProgramEnd(*parent_);
 
+  if (!gtest_is_initialized_before_run_all_tests) {
+    ColoredPrintf(
+        GTestColor::kRed,
+        "\nIMPORTANT NOTICE - DO NOT IGNORE:\n"
+        "This test program did NOT call " GTEST_INIT_GOOGLE_TEST_NAME_
+        "() before calling RUN_ALL_TESTS(). This is INVALID. Soon " GTEST_NAME_
+        " will start to enforce the valid usage. "
+        "Please fix it ASAP, or IT WILL START TO FAIL.\n");  // NOLINT
+#if GTEST_FOR_GOOGLE_
+    ColoredPrintf(GTestColor::kRed,
+                  "For more details, see http://wiki/Main/ValidGUnitMain.\n");
+#endif  // GTEST_FOR_GOOGLE_
+  }
+
   return !failed;
 }
 
@@ -5844,10 +7423,10 @@ bool UnitTestImpl::RunAllTests() {
 // be created, prints an error and exits.
 void WriteToShardStatusFileIfNeeded() {
   const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);
-  if (test_shard_file != NULL) {
+  if (test_shard_file != nullptr) {
     FILE* const file = posix::FOpen(test_shard_file, "w");
-    if (file == NULL) {
-      ColoredPrintf(COLOR_RED,
+    if (file == nullptr) {
+      ColoredPrintf(GTestColor::kRed,
                     "Could not write to the test shard status file \"%s\" "
                     "specified by the %s environment variable.\n",
                     test_shard_file, kTestShardStatusFile);
@@ -5871,8 +7450,8 @@ bool ShouldShard(const char* total_shards_env,
     return false;
   }
 
-  const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1);
-  const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1);
+  const int32_t total_shards = Int32FromEnvOrDie(total_shards_env, -1);
+  const int32_t shard_index = Int32FromEnvOrDie(shard_index_env, -1);
 
   if (total_shards == -1 && shard_index == -1) {
     return false;
@@ -5881,7 +7460,7 @@ bool ShouldShard(const char* total_shards_env,
       << "Invalid environment variables: you have "
       << kTestShardIndex << " = " << shard_index
       << ", but have left " << kTestTotalShards << " unset.\n";
-    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
+    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
     fflush(stdout);
     exit(EXIT_FAILURE);
   } else if (total_shards != -1 && shard_index == -1) {
@@ -5889,7 +7468,7 @@ bool ShouldShard(const char* total_shards_env,
       << "Invalid environment variables: you have "
       << kTestTotalShards << " = " << total_shards
       << ", but have left " << kTestShardIndex << " unset.\n";
-    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
+    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
     fflush(stdout);
     exit(EXIT_FAILURE);
   } else if (shard_index < 0 || shard_index >= total_shards) {
@@ -5898,7 +7477,7 @@ bool ShouldShard(const char* total_shards_env,
       << kTestShardIndex << " < " << kTestTotalShards
       << ", but you have " << kTestShardIndex << "=" << shard_index
       << ", " << kTestTotalShards << "=" << total_shards << ".\n";
-    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
+    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
     fflush(stdout);
     exit(EXIT_FAILURE);
   }
@@ -5909,13 +7488,13 @@ bool ShouldShard(const char* total_shards_env,
 // Parses the environment variable var as an Int32. If it is unset,
 // returns default_val. If it is not an Int32, prints an error
 // and aborts.
-Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) {
+int32_t Int32FromEnvOrDie(const char* var, int32_t default_val) {
   const char* str_val = posix::GetEnv(var);
-  if (str_val == NULL) {
+  if (str_val == nullptr) {
     return default_val;
   }
 
-  Int32 result;
+  int32_t result;
   if (!ParseInt32(Message() << "The value of environment variable " << var,
                   str_val, &result)) {
     exit(EXIT_FAILURE);
@@ -5924,8 +7503,8 @@ Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) {
 }
 
 // Given the total number of shards, the shard index, and the test id,
-// returns true iff the test should be run on this shard. The test id is
-// some arbitrary but unique non-negative integer assigned to each test
+// returns true if and only if the test should be run on this shard. The test id
+// is some arbitrary but unique non-negative integer assigned to each test
 // method. Assumes that 0 <= shard_index < total_shards.
 bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
   return (test_id % total_shards) == shard_index;
@@ -5933,15 +7512,15 @@ bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
 
 // Compares the name of each test with the user-specified filter to
 // decide whether the test should be run, then records the result in
-// each TestCase and TestInfo object.
+// each TestSuite and TestInfo object.
 // If shard_tests == true, further filters tests based on sharding
 // variables in the environment - see
-// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide.
-// Returns the number of tests that should run.
+// https://github.com/google/googletest/blob/master/googletest/docs/advanced.md
+// . Returns the number of tests that should run.
 int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
-  const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
+  const int32_t total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
       Int32FromEnvOrDie(kTestTotalShards, -1) : -1;
-  const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
+  const int32_t shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
       Int32FromEnvOrDie(kTestShardIndex, -1) : -1;
 
   // num_runnable_tests are the number of tests that will
@@ -5950,42 +7529,40 @@ int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
   // this shard.
   int num_runnable_tests = 0;
   int num_selected_tests = 0;
-  for (size_t i = 0; i < test_cases_.size(); i++) {
-    TestCase* const test_case = test_cases_[i];
-    const std::string &test_case_name = test_case->name();
-    test_case->set_should_run(false);
+  for (auto* test_suite : test_suites_) {
+    const std::string& test_suite_name = test_suite->name();
+    test_suite->set_should_run(false);
 
-    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
-      TestInfo* const test_info = test_case->test_info_list()[j];
+    for (size_t j = 0; j < test_suite->test_info_list().size(); j++) {
+      TestInfo* const test_info = test_suite->test_info_list()[j];
       const std::string test_name(test_info->name());
-      // A test is disabled if test case name or test name matches
+      // A test is disabled if test suite name or test name matches
       // kDisableTestFilter.
-      const bool is_disabled =
-          internal::UnitTestOptions::MatchesFilter(test_case_name,
-                                                   kDisableTestFilter) ||
-          internal::UnitTestOptions::MatchesFilter(test_name,
-                                                   kDisableTestFilter);
+      const bool is_disabled = internal::UnitTestOptions::MatchesFilter(
+                                   test_suite_name, kDisableTestFilter) ||
+                               internal::UnitTestOptions::MatchesFilter(
+                                   test_name, kDisableTestFilter);
       test_info->is_disabled_ = is_disabled;
 
-      const bool matches_filter =
-          internal::UnitTestOptions::FilterMatchesTest(test_case_name,
-                                                       test_name);
+      const bool matches_filter = internal::UnitTestOptions::FilterMatchesTest(
+          test_suite_name, test_name);
       test_info->matches_filter_ = matches_filter;
 
       const bool is_runnable =
           (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) &&
           matches_filter;
 
-      const bool is_selected = is_runnable &&
-          (shard_tests == IGNORE_SHARDING_PROTOCOL ||
-           ShouldRunTestOnShard(total_shards, shard_index,
-                                num_runnable_tests));
+      const bool is_in_another_shard =
+          shard_tests != IGNORE_SHARDING_PROTOCOL &&
+          !ShouldRunTestOnShard(total_shards, shard_index, num_runnable_tests);
+      test_info->is_in_another_shard_ = is_in_another_shard;
+      const bool is_selected = is_runnable && !is_in_another_shard;
 
       num_runnable_tests += is_runnable;
       num_selected_tests += is_selected;
 
       test_info->should_run_ = is_selected;
-      test_case->set_should_run(test_case->should_run() || is_selected);
+      test_suite->set_should_run(test_suite->should_run() || is_selected);
     }
   }
   return num_selected_tests;
@@ -5996,7 +7573,7 @@ int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
 // max_length characters, only prints the first max_length characters
 // and "...".
 static void PrintOnOneLine(const char* str, int max_length) {
-  if (str != NULL) {
+  if (str != nullptr) {
     for (int i = 0; *str != '\0'; ++str) {
       if (i >= max_length) {
         printf("...");
@@ -6018,27 +7595,25 @@ void UnitTestImpl::ListTestsMatchingFilter() {
   // Print at most this many characters for each type/value parameter.
   const int kMaxParamLength = 250;
 
-  for (size_t i = 0; i < test_cases_.size(); i++) {
-    const TestCase* const test_case = test_cases_[i];
-    bool printed_test_case_name = false;
+  for (auto* test_suite : test_suites_) {
+    bool printed_test_suite_name = false;
 
-    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
-      const TestInfo* const test_info =
-          test_case->test_info_list()[j];
+    for (size_t j = 0; j < test_suite->test_info_list().size(); j++) {
+      const TestInfo* const test_info = test_suite->test_info_list()[j];
       if (test_info->matches_filter_) {
-        if (!printed_test_case_name) {
-          printed_test_case_name = true;
-          printf("%s.", test_case->name());
-          if (test_case->type_param() != NULL) {
+        if (!printed_test_suite_name) {
+          printed_test_suite_name = true;
+          printf("%s.", test_suite->name());
+          if (test_suite->type_param() != nullptr) {
             printf("  # %s = ", kTypeParamLabel);
             // We print the type parameter on a single line to make
             // the output easy to parse by a program.
-            PrintOnOneLine(test_case->type_param(), kMaxParamLength);
+            PrintOnOneLine(test_suite->type_param(), kMaxParamLength);
           }
           printf("\n");
         }
         printf("  %s", test_info->name());
-        if (test_info->value_param() != NULL) {
+        if (test_info->value_param() != nullptr) {
           printf("  # %s = ", kValueParamLabel);
           // We print the value parameter on a single line to make the
           // output easy to parse by a program.
@@ -6049,6 +7624,23 @@ void UnitTestImpl::ListTestsMatchingFilter() {
     }
   }
   fflush(stdout);
+  const std::string& output_format = UnitTestOptions::GetOutputFormat();
+  if (output_format == "xml" || output_format == "json") {
+    FILE* fileout = OpenFileForWriting(
+        UnitTestOptions::GetAbsolutePathToOutputFile().c_str());
+    std::stringstream stream;
+    if (output_format == "xml") {
+      XmlUnitTestResultPrinter(
+          UnitTestOptions::GetAbsolutePathToOutputFile().c_str())
+          .PrintXmlTestsList(&stream, test_suites_);
+    } else if (output_format == "json") {
+      JsonUnitTestResultPrinter(
+          UnitTestOptions::GetAbsolutePathToOutputFile().c_str())
+          .PrintJsonTestList(&stream, test_suites_);
+    }
+    fprintf(fileout, "%s", StringStreamToString(&stream).c_str());
+    fclose(fileout);
+  }
 }
 
 // Sets the OS stack trace getter.
@@ -6068,43 +7660,51 @@ void UnitTestImpl::set_os_stack_trace_getter(
 // otherwise, creates an OsStackTraceGetter, makes it the current
 // getter, and returns it.
 OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
-  if (os_stack_trace_getter_ == NULL) {
+  if (os_stack_trace_getter_ == nullptr) {
+#ifdef GTEST_OS_STACK_TRACE_GETTER_
+    os_stack_trace_getter_ = new GTEST_OS_STACK_TRACE_GETTER_;
+#else
     os_stack_trace_getter_ = new OsStackTraceGetter;
+#endif  // GTEST_OS_STACK_TRACE_GETTER_
   }
 
   return os_stack_trace_getter_;
 }
 
-// Returns the TestResult for the test that's currently running, or
-// the TestResult for the ad hoc test if no test is running.
+// Returns the most specific TestResult currently running.
 TestResult* UnitTestImpl::current_test_result() {
-  return current_test_info_ ?
-      &(current_test_info_->result_) : &ad_hoc_test_result_;
+  if (current_test_info_ != nullptr) {
+    return &current_test_info_->result_;
+  }
+  if (current_test_suite_ != nullptr) {
+    return &current_test_suite_->ad_hoc_test_result_;
+  }
+  return &ad_hoc_test_result_;
 }
 
-// Shuffles all test cases, and the tests within each test case,
+// Shuffles all test suites, and the tests within each test suite,
 // making sure that death tests are still run first.
 void UnitTestImpl::ShuffleTests() {
-  // Shuffles the death test cases.
-  ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_);
+  // Shuffles the death test suites.
+  ShuffleRange(random(), 0, last_death_test_suite_ + 1, &test_suite_indices_);
 
-  // Shuffles the non-death test cases.
-  ShuffleRange(random(), last_death_test_case_ + 1,
-               static_cast<int>(test_cases_.size()), &test_case_indices_);
+  // Shuffles the non-death test suites.
+  ShuffleRange(random(), last_death_test_suite_ + 1,
+               static_cast<int>(test_suites_.size()), &test_suite_indices_);
 
-  // Shuffles the tests inside each test case.
-  for (size_t i = 0; i < test_cases_.size(); i++) {
-    test_cases_[i]->ShuffleTests(random());
+  // Shuffles the tests inside each test suite.
+  for (auto& test_suite : test_suites_) {
+    test_suite->ShuffleTests(random());
   }
 }
 
-// Restores the test cases and tests to their order before the first shuffle.
+// Restores the test suites and tests to their order before the first shuffle.
 void UnitTestImpl::UnshuffleTests() {
-  for (size_t i = 0; i < test_cases_.size(); i++) {
-    // Unshuffles the tests in each test case.
-    test_cases_[i]->UnshuffleTests();
-    // Resets the index of each test case.
-    test_case_indices_[i] = static_cast<int>(i);
+  for (size_t i = 0; i < test_suites_.size(); i++) {
+    // Unshuffles the tests in each test suite.
+    test_suites_[i]->UnshuffleTests();
+    // Resets the index of each test suite.
+    test_suite_indices_[i] = static_cast<int>(i);
   }
 }
 
@@ -6160,16 +7760,15 @@ bool SkipPrefix(const char* prefix, const char** pstr) {
 // part can be omitted.
 //
 // Returns the value of the flag, or NULL if the parsing failed.
-const char* ParseFlagValue(const char* str,
-                           const char* flag,
-                           bool def_optional) {
+static const char* ParseFlagValue(const char* str, const char* flag,
+                                  bool def_optional) {
   // str and flag must not be NULL.
-  if (str == NULL || flag == NULL) return NULL;
+  if (str == nullptr || flag == nullptr) return nullptr;
 
   // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
   const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag;
   const size_t flag_len = flag_str.length();
-  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
+  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr;
 
   // Skips the flag name.
   const char* flag_end = str + flag_len;
@@ -6182,7 +7781,7 @@ const char* ParseFlagValue(const char* str,
   // If def_optional is true and there are more characters after the
   // flag name, or if def_optional is false, there must be a '=' after
   // the flag name.
-  if (flag_end[0] != '=') return NULL;
+  if (flag_end[0] != '=') return nullptr;
 
   // Returns the string after "=".
   return flag_end + 1;
@@ -6198,46 +7797,45 @@ const char* ParseFlagValue(const char* str,
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
-bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
+static bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
   // Gets the value of the flag as a string.
   const char* const value_str = ParseFlagValue(str, flag, true);
 
   // Aborts if the parsing failed.
-  if (value_str == NULL) return false;
+  if (value_str == nullptr) return false;
 
   // Converts the string value to a bool.
   *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
   return true;
 }
 
-// Parses a string for an Int32 flag, in the form of
-// "--flag=value".
+// Parses a string for an int32_t flag, in the form of "--flag=value".
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
-bool ParseInt32Flag(const char* str, const char* flag, Int32* value) {
+bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
   // Gets the value of the flag as a string.
   const char* const value_str = ParseFlagValue(str, flag, false);
 
   // Aborts if the parsing failed.
-  if (value_str == NULL) return false;
+  if (value_str == nullptr) return false;
 
   // Sets *value to the value of the flag.
   return ParseInt32(Message() << "The value of flag --" << flag,
                     value_str, value);
 }
 
-// Parses a string for a string flag, in the form of
-// "--flag=value".
+// Parses a string for a string flag, in the form of "--flag=value".
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
-bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
+template <typename String>
+static bool ParseStringFlag(const char* str, const char* flag, String* value) {
   // Gets the value of the flag as a string.
   const char* const value_str = ParseFlagValue(str, flag, false);
 
   // Aborts if the parsing failed.
-  if (value_str == NULL) return false;
+  if (value_str == nullptr) return false;
 
   // Sets *value to the value of the flag.
   *value = value_str;
@@ -6268,10 +7866,8 @@ static bool HasGoogleTestFlagPrefix(const char* str) {
 //   @Y    changes the color to yellow.
 //   @D    changes to the default terminal text color.
 //
-// TODO(wan@google.com): Write tests for this once we add stdout
-// capturing to Google Test.
 static void PrintColorEncoded(const char* str) {
-  GTestColor color = COLOR_DEFAULT;  // The current color.
+  GTestColor color = GTestColor::kDefault;  // The current color.
 
   // Conceptually, we split the string into segments divided by escape
   // sequences.  Then we print one segment at a time.  At the end of
@@ -6279,7 +7875,7 @@ static void PrintColorEncoded(const char* str) {
   // next segment.
   for (;;) {
     const char* p = strchr(str, '@');
-    if (p == NULL) {
+    if (p == nullptr) {
       ColoredPrintf(color, "%s", str);
       return;
     }
@@ -6291,13 +7887,13 @@ static void PrintColorEncoded(const char* str) {
     if (ch == '@') {
       ColoredPrintf(color, "@");
     } else if (ch == 'D') {
-      color = COLOR_DEFAULT;
+      color = GTestColor::kDefault;
     } else if (ch == 'R') {
-      color = COLOR_RED;
+      color = GTestColor::kRed;
     } else if (ch == 'G') {
-      color = COLOR_GREEN;
+      color = GTestColor::kGreen;
     } else if (ch == 'Y') {
-      color = COLOR_YELLOW;
+      color = GTestColor::kYellow;
     } else {
       --str;
     }
@@ -6305,68 +7901,147 @@ static void PrintColorEncoded(const char* str) {
 }
 
 static const char kColorEncodedHelpMessage[] =
-"This program contains tests written using " GTEST_NAME_ ". You can use the\n"
-"following command line flags to control its behavior:\n"
-"\n"
-"Test Selection:\n"
-"  @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n"
-"      List the names of all tests instead of running them. The name of\n"
-"      TEST(Foo, Bar) is \"Foo.Bar\".\n"
-"  @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS"
+    "This program contains tests written using " GTEST_NAME_
+    ". You can use the\n"
+    "following command line flags to control its behavior:\n"
+    "\n"
+    "Test Selection:\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "list_tests@D\n"
+    "      List the names of all tests instead of running them. The name of\n"
+    "      TEST(Foo, Bar) is \"Foo.Bar\".\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "filter=@YPOSITIVE_PATTERNS"
     "[@G-@YNEGATIVE_PATTERNS]@D\n"
-"      Run only the tests whose name matches one of the positive patterns but\n"
-"      none of the negative patterns. '?' matches any single character; '*'\n"
-"      matches any substring; ':' separates two patterns.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n"
-"      Run all disabled tests too.\n"
-"\n"
-"Test Execution:\n"
-"  @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n"
-"      Run the tests repeatedly; use a negative count to repeat forever.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n"
-"      Randomize tests' orders on every iteration.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n"
-"      Random number seed to use for shuffling test orders (between 1 and\n"
-"      99999, or 0 to use a seed based on the current time).\n"
-"\n"
-"Test Output:\n"
-"  @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
-"      Enable/disable colored output. The default is @Gauto@D.\n"
-"  -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n"
-"      Don't print the elapsed time of each test.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G"
-    GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n"
-"      Generate an XML report in the given directory or with the given file\n"
-"      name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n"
-#if GTEST_CAN_STREAM_RESULTS_
-"  @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n"
-"      Stream test results to the given server.\n"
-#endif  // GTEST_CAN_STREAM_RESULTS_
-"\n"
-"Assertion Behavior:\n"
-#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
-"  @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
-"      Set the default death test style.\n"
-#endif  // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
-"  @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n"
-"      Turn assertion failures into debugger break-points.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n"
-"      Turn assertion failures into C++ exceptions.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n"
-"      Do not report exceptions as test failures. Instead, allow them\n"
-"      to crash the program or throw a pop-up (on Windows).\n"
-"\n"
-"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set "
+    "      Run only the tests whose name matches one of the positive patterns "
+    "but\n"
+    "      none of the negative patterns. '?' matches any single character; "
+    "'*'\n"
+    "      matches any substring; ':' separates two patterns.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "also_run_disabled_tests@D\n"
+    "      Run all disabled tests too.\n"
+    "\n"
+    "Test Execution:\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "repeat=@Y[COUNT]@D\n"
+    "      Run the tests repeatedly; use a negative count to repeat forever.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "shuffle@D\n"
+    "      Randomize tests' orders on every iteration.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "random_seed=@Y[NUMBER]@D\n"
+    "      Random number seed to use for shuffling test orders (between 1 and\n"
+    "      99999, or 0 to use a seed based on the current time).\n"
+    "\n"
+    "Test Output:\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
+    "      Enable/disable colored output. The default is @Gauto@D.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "brief=1@D\n"
+    "      Only print test failures.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "print_time=0@D\n"
+    "      Don't print the elapsed time of each test.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "output=@Y(@Gjson@Y|@Gxml@Y)[@G:@YDIRECTORY_PATH@G" GTEST_PATH_SEP_
+    "@Y|@G:@YFILE_PATH]@D\n"
+    "      Generate a JSON or XML report in the given directory or with the "
+    "given\n"
+    "      file name. @YFILE_PATH@D defaults to @Gtest_detail.xml@D.\n"
+# if GTEST_CAN_STREAM_RESULTS_
+    "  @G--" GTEST_FLAG_PREFIX_
+    "stream_result_to=@YHOST@G:@YPORT@D\n"
+    "      Stream test results to the given server.\n"
+# endif  // GTEST_CAN_STREAM_RESULTS_
+    "\n"
+    "Assertion Behavior:\n"
+# if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
+    "  @G--" GTEST_FLAG_PREFIX_
+    "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
+    "      Set the default death test style.\n"
+# endif  // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
+    "  @G--" GTEST_FLAG_PREFIX_
+    "break_on_failure@D\n"
+    "      Turn assertion failures into debugger break-points.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "throw_on_failure@D\n"
+    "      Turn assertion failures into C++ exceptions for use by an external\n"
+    "      test framework.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "catch_exceptions=0@D\n"
+    "      Do not report exceptions as test failures. Instead, allow them\n"
+    "      to crash the program or throw a pop-up (on Windows).\n"
+    "\n"
+    "Except for @G--" GTEST_FLAG_PREFIX_
+    "list_tests@D, you can alternatively set "
     "the corresponding\n"
-"environment variable of a flag (all letters in upper-case). For example, to\n"
-"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_
+    "environment variable of a flag (all letters in upper-case). For example, "
+    "to\n"
+    "disable colored text output, you can either specify "
+    "@G--" GTEST_FLAG_PREFIX_
     "color=no@D or set\n"
-"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n"
-"\n"
-"For more information, please read the " GTEST_NAME_ " documentation at\n"
-"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n"
-"(not one in your own code or tests), please report it to\n"
-"@G<" GTEST_DEV_EMAIL_ ">@D.\n";
+    "the @G" GTEST_FLAG_PREFIX_UPPER_
+    "COLOR@D environment variable to @Gno@D.\n"
+    "\n"
+    "For more information, please read the " GTEST_NAME_
+    " documentation at\n"
+    "@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_
+    "\n"
+    "(not one in your own code or tests), please report it to\n"
+    "@G<" GTEST_DEV_EMAIL_ ">@D.\n";
+
+static bool ParseGoogleTestFlag(const char* const arg) {
+  return ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
+                       &GTEST_FLAG(also_run_disabled_tests)) ||
+         ParseBoolFlag(arg, kBreakOnFailureFlag,
+                       &GTEST_FLAG(break_on_failure)) ||
+         ParseBoolFlag(arg, kCatchExceptionsFlag,
+                       &GTEST_FLAG(catch_exceptions)) ||
+         ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
+         ParseStringFlag(arg, kDeathTestStyleFlag,
+                         &GTEST_FLAG(death_test_style)) ||
+         ParseBoolFlag(arg, kDeathTestUseFork,
+                       &GTEST_FLAG(death_test_use_fork)) ||
+         ParseBoolFlag(arg, kFailFast, &GTEST_FLAG(fail_fast)) ||
+         ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
+         ParseStringFlag(arg, kInternalRunDeathTestFlag,
+                         &GTEST_FLAG(internal_run_death_test)) ||
+         ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
+         ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
+         ParseBoolFlag(arg, kBriefFlag, &GTEST_FLAG(brief)) ||
+         ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
+         ParseBoolFlag(arg, kPrintUTF8Flag, &GTEST_FLAG(print_utf8)) ||
+         ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
+         ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
+         ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
+         ParseInt32Flag(arg, kStackTraceDepthFlag,
+                        &GTEST_FLAG(stack_trace_depth)) ||
+         ParseStringFlag(arg, kStreamResultToFlag,
+                         &GTEST_FLAG(stream_result_to)) ||
+         ParseBoolFlag(arg, kThrowOnFailureFlag, &GTEST_FLAG(throw_on_failure));
+}
+
+#if GTEST_USE_OWN_FLAGFILE_FLAG_
+static void LoadFlagsFromFile(const std::string& path) {
+  FILE* flagfile = posix::FOpen(path.c_str(), "r");
+  if (!flagfile) {
+    GTEST_LOG_(FATAL) << "Unable to open file \"" << GTEST_FLAG(flagfile)
+                      << "\"";
+  }
+  std::string contents(ReadEntireFile(flagfile));
+  posix::FClose(flagfile);
+  std::vector<std::string> lines;
+  SplitString(contents, '\n', &lines);
+  for (size_t i = 0; i < lines.size(); ++i) {
+    if (lines[i].empty())
+      continue;
+    if (!ParseGoogleTestFlag(lines[i].c_str()))
+      g_help_flag = true;
+  }
+}
+#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
 
 // Parses the command line for Google Test flags, without initializing
 // other parts of Google Test.  The type parameter CharType can be
@@ -6381,35 +8056,24 @@ void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
     using internal::ParseInt32Flag;
     using internal::ParseStringFlag;
 
-    // Do we see a Google Test flag?
-    if (ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
-                      &GTEST_FLAG(also_run_disabled_tests)) ||
-        ParseBoolFlag(arg, kBreakOnFailureFlag,
-                      &GTEST_FLAG(break_on_failure)) ||
-        ParseBoolFlag(arg, kCatchExceptionsFlag,
-                      &GTEST_FLAG(catch_exceptions)) ||
-        ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
-        ParseStringFlag(arg, kDeathTestStyleFlag,
-                        &GTEST_FLAG(death_test_style)) ||
-        ParseBoolFlag(arg, kDeathTestUseFork,
-                      &GTEST_FLAG(death_test_use_fork)) ||
-        ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
-        ParseStringFlag(arg, kInternalRunDeathTestFlag,
-                        &GTEST_FLAG(internal_run_death_test)) ||
-        ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
-        ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
-        ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
-        ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
-        ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
-        ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
-        ParseInt32Flag(arg, kStackTraceDepthFlag,
-                       &GTEST_FLAG(stack_trace_depth)) ||
-        ParseStringFlag(arg, kStreamResultToFlag,
-                        &GTEST_FLAG(stream_result_to)) ||
-        ParseBoolFlag(arg, kThrowOnFailureFlag,
-                      &GTEST_FLAG(throw_on_failure))
-        ) {
-      // Yes.  Shift the remainder of the argv list left by one.  Note
+    bool remove_flag = false;
+    if (ParseGoogleTestFlag(arg)) {
+      remove_flag = true;
+#if GTEST_USE_OWN_FLAGFILE_FLAG_
+    } else if (ParseStringFlag(arg, kFlagfileFlag, &GTEST_FLAG(flagfile))) {
+      LoadFlagsFromFile(GTEST_FLAG(flagfile));
+      remove_flag = true;
+#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
+    } else if (arg_string == "--help" || arg_string == "-h" ||
+               arg_string == "-?" || arg_string == "/?" ||
+               HasGoogleTestFlagPrefix(arg)) {
+      // Both help flag and unrecognized Google Test flags (excluding
+      // internal ones) trigger help display.
+      g_help_flag = true;
+    }
+
+    if (remove_flag) {
+      // Shift the remainder of the argv list left by one.  Note
       // that argv has (*argc + 1) elements, the last one always being
       // NULL.  The following loop moves the trailing NULL element as
       // well.
@@ -6423,12 +8087,6 @@ void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
       // We also need to decrement the iterator as we just removed
       // an element.
       i--;
-    } else if (arg_string == "--help" || arg_string == "-h" ||
-               arg_string == "-?" || arg_string == "/?" ||
-               HasGoogleTestFlagPrefix(arg)) {
-      // Both help flag and unrecognized Google Test flags (excluding
-      // internal ones) trigger help display.
-      g_help_flag = true;
     }
   }
 
@@ -6444,6 +8102,17 @@ void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
 // other parts of Google Test.
 void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
   ParseGoogleTestFlagsOnlyImpl(argc, argv);
+
+  // Fix the value of *_NSGetArgc() on macOS, but if and only if
+  // *_NSGetArgv() == argv
+  // Only applicable to char** version of argv
+#if GTEST_OS_MAC
+#ifndef GTEST_OS_IOS
+  if (*_NSGetArgv() == argv) {
+    *_NSGetArgc() = *argc;
+  }
+#endif
+#endif
 }
 void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
   ParseGoogleTestFlagsOnlyImpl(argc, argv);
@@ -6455,23 +8124,19 @@ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
 // wchar_t.
 template <typename CharType>
 void InitGoogleTestImpl(int* argc, CharType** argv) {
-  g_init_gtest_count++;
-
   // We don't want to run the initialization code twice.
-  if (g_init_gtest_count != 1) return;
+  if (GTestIsInitialized()) return;
 
   if (*argc <= 0) return;
 
-  internal::g_executable_path = internal::StreamableToString(argv[0]);
-
-#if GTEST_HAS_DEATH_TEST
-
   g_argvs.clear();
   for (int i = 0; i != *argc; i++) {
     g_argvs.push_back(StreamableToString(argv[i]));
   }
 
-#endif  // GTEST_HAS_DEATH_TEST
+#if GTEST_HAS_ABSL
+  absl::InitializeSymbolizer(g_argvs[0].c_str());
+#endif  // GTEST_HAS_ABSL
 
   ParseGoogleTestFlagsOnly(argc, argv);
   GetUnitTestImpl()->PostFlagParsingInit();
@@ -6489,13 +8154,89 @@ void InitGoogleTestImpl(int* argc, CharType** argv) {
 //
 // Calling the function for the second time has no user-visible effect.
 void InitGoogleTest(int* argc, char** argv) {
+#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+  GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
+#else  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
   internal::InitGoogleTestImpl(argc, argv);
+#endif  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
 }
 
 // This overloaded version can be used in Windows programs compiled in
 // UNICODE mode.
 void InitGoogleTest(int* argc, wchar_t** argv) {
+#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+  GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
+#else  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
   internal::InitGoogleTestImpl(argc, argv);
+#endif  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+}
+
+// This overloaded version can be used on Arduino/embedded platforms where
+// there is no argc/argv.
+void InitGoogleTest() {
+  // Since Arduino doesn't have a command line, fake out the argc/argv arguments
+  int argc = 1;
+  const auto arg0 = "dummy";
+  char* argv0 = const_cast<char*>(arg0);
+  char** argv = &argv0;
+
+#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+  GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(&argc, argv);
+#else  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+  internal::InitGoogleTestImpl(&argc, argv);
+#endif  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+}
+
+std::string TempDir() {
+#if defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_)
+  return GTEST_CUSTOM_TEMPDIR_FUNCTION_();
+#elif GTEST_OS_WINDOWS_MOBILE
+  return "\\temp\\";
+#elif GTEST_OS_WINDOWS
+  const char* temp_dir = internal::posix::GetEnv("TEMP");
+  if (temp_dir == nullptr || temp_dir[0] == '\0') {
+    return "\\temp\\";
+  } else if (temp_dir[strlen(temp_dir) - 1] == '\\') {
+    return temp_dir;
+  } else {
+    return std::string(temp_dir) + "\\";
+  }
+#elif GTEST_OS_LINUX_ANDROID
+  const char* temp_dir = internal::posix::GetEnv("TEST_TMPDIR");
+  if (temp_dir == nullptr || temp_dir[0] == '\0') {
+    return "/data/local/tmp/";
+  } else {
+    return temp_dir;
+  }
+#elif GTEST_OS_LINUX
+  const char* temp_dir = internal::posix::GetEnv("TEST_TMPDIR");
+  if (temp_dir == nullptr || temp_dir[0] == '\0') {
+    return "/tmp/";
+  } else {
+    return temp_dir;
+  }
+#else
+  return "/tmp/";
+#endif  // GTEST_OS_WINDOWS_MOBILE
+}
+
+// Class ScopedTrace
+
+// Pushes the given source file location and message onto a per-thread
+// trace stack maintained by Google Test.
+void ScopedTrace::PushTrace(const char* file, int line, std::string message) {
+  internal::TraceInfo trace;
+  trace.file = file;
+  trace.line = line;
+  trace.message.swap(message);
+
+  UnitTest::GetInstance()->PushGTestTrace(trace);
+}
+
+// Pops the info pushed by the c'tor.
+ScopedTrace::~ScopedTrace()
+    GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
+  UnitTest::GetInstance()->PopGTestTrace();
 }
 
 }  // namespace testing
@@ -6527,12 +8268,15 @@ void InitGoogleTest(int* argc, wchar_t** argv) {
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev)
+
 //
 // This file implements death tests.
 
 
+#include <functional>
+#include <utility>
+
+
 #if GTEST_HAS_DEATH_TEST
 
 # if GTEST_OS_MAC
@@ -6560,23 +8304,33 @@ void InitGoogleTest(int* argc, wchar_t** argv) {
 #  include <spawn.h>
 # endif  // GTEST_OS_QNX
 
-#endif  // GTEST_HAS_DEATH_TEST
+# if GTEST_OS_FUCHSIA
+#  include <lib/fdio/fd.h>
+#  include <lib/fdio/io.h>
+#  include <lib/fdio/spawn.h>
+#  include <lib/zx/channel.h>
+#  include <lib/zx/port.h>
+#  include <lib/zx/process.h>
+#  include <lib/zx/socket.h>
+#  include <zircon/processargs.h>
+#  include <zircon/syscalls.h>
+#  include <zircon/syscalls/policy.h>
+#  include <zircon/syscalls/port.h>
+# endif  // GTEST_OS_FUCHSIA
 
+#endif  // GTEST_HAS_DEATH_TEST
 
-// Indicates that this translation unit is part of Google Test's
-// implementation.  It must come before gtest-internal-inl.h is
-// included, or there will be a compiler error.  This trick is to
-// prevent a user from accidentally including gtest-internal-inl.h in
-// his code.
-#define GTEST_IMPLEMENTATION_ 1
-#undef GTEST_IMPLEMENTATION_
 
 namespace testing {
 
 // Constants.
 
 // The default death test style.
-static const char kDefaultDeathTestStyle[] = "fast";
+//
+// This is defined in internal/gtest-port.h as "fast", but can be overridden by
+// a definition in internal/custom/gtest-port.h. The recommended value, which is
+// used internally at Google, is "threadsafe".
+static const char kDefaultDeathTestStyle[] = GTEST_DEFAULT_DEATH_TEST_STYLE;
 
 GTEST_DEFINE_string_(
     death_test_style,
@@ -6605,8 +8359,8 @@ GTEST_DEFINE_string_(
     "Indicates the file, line number, temporal index of "
     "the single death test to run, and a file descriptor to "
     "which a success code may be sent, all separated by "
-    "the '|' characters.  This flag is specified if and only if the current "
-    "process is a sub-process launched for running a thread-safe "
+    "the '|' characters.  This flag is specified if and only if the "
+    "current process is a sub-process launched for running a thread-safe "
     "death test.  FOR INTERNAL USE ONLY.");
 }  // namespace internal
 
@@ -6616,7 +8370,9 @@ namespace internal {
 
 // Valid only for fast death tests. Indicates the code is running in the
 // child process of a fast style death test.
+# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
 static bool g_in_fast_death_test_child = false;
+# endif
 
 // Returns a Boolean value indicating whether the caller is currently
 // executing in the context of the death test child process.  Tools such as
@@ -6624,10 +8380,10 @@ static bool g_in_fast_death_test_child = false;
 // tests.  IMPORTANT: This is an internal utility.  Using it may break the
 // implementation of death tests.  User code MUST NOT use it.
 bool InDeathTestChild() {
-# if GTEST_OS_WINDOWS
+# if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
 
-  // On Windows, death tests are thread-safe regardless of the value of the
-  // death_test_style flag.
+  // On Windows and Fuchsia, death tests are thread-safe regardless of the value
+  // of the death_test_style flag.
   return !GTEST_FLAG(internal_run_death_test).empty();
 
 # else
@@ -6647,7 +8403,7 @@ ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) {
 
 // ExitedWithCode function-call operator.
 bool ExitedWithCode::operator()(int exit_status) const {
-# if GTEST_OS_WINDOWS
+# if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
 
   return exit_status == exit_code_;
 
@@ -6655,19 +8411,27 @@ bool ExitedWithCode::operator()(int exit_status) const {
 
   return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_;
 
-# endif  // GTEST_OS_WINDOWS
+# endif  // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
 }
 
-# if !GTEST_OS_WINDOWS
+# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
 // KilledBySignal constructor.
 KilledBySignal::KilledBySignal(int signum) : signum_(signum) {
 }
 
 // KilledBySignal function-call operator.
 bool KilledBySignal::operator()(int exit_status) const {
+#  if defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
+  {
+    bool result;
+    if (GTEST_KILLED_BY_SIGNAL_OVERRIDE_(signum_, exit_status, &result)) {
+      return result;
+    }
+  }
+#  endif  // defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
   return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_;
 }
-# endif  // !GTEST_OS_WINDOWS
+# endif  // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
 
 namespace internal {
 
@@ -6678,7 +8442,7 @@ namespace internal {
 static std::string ExitSummary(int exit_code) {
   Message m;
 
-# if GTEST_OS_WINDOWS
+# if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
 
   m << "Exited with exit status " << exit_code;
 
@@ -6694,7 +8458,7 @@ static std::string ExitSummary(int exit_code) {
     m << " (core dumped)";
   }
 #  endif
-# endif  // GTEST_OS_WINDOWS
+# endif  // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
 
   return m.GetString();
 }
@@ -6705,7 +8469,7 @@ bool ExitedUnsuccessfully(int exit_status) {
   return !ExitedWithCode(0)(exit_status);
 }
 
-# if !GTEST_OS_WINDOWS
+# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
 // Generates a textual failure message when a death test finds more than
 // one thread running, or cannot determine the number of threads, prior
 // to executing the given statement.  It is the responsibility of the
@@ -6714,13 +8478,19 @@ static std::string DeathTestThreadWarning(size_t thread_count) {
   Message msg;
   msg << "Death tests use fork(), which is unsafe particularly"
       << " in a threaded context. For this test, " << GTEST_NAME_ << " ";
-  if (thread_count == 0)
+  if (thread_count == 0) {
     msg << "couldn't detect the number of threads.";
-  else
+  } else {
     msg << "detected " << thread_count << " threads.";
+  }
+  msg << " See "
+         "https://github.com/google/googletest/blob/master/docs/"
+         "advanced.md#death-tests-and-threads"
+      << " for more explanation and suggested solutions, especially if"
+      << " this is the last message you see before your test times out.";
   return msg.GetString();
 }
-# endif  // !GTEST_OS_WINDOWS
+# endif  // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
 
 // Flag characters for reporting a death test that did not die.
 static const char kDeathTestLived = 'L';
@@ -6728,6 +8498,13 @@ static const char kDeathTestReturned = 'R';
 static const char kDeathTestThrew = 'T';
 static const char kDeathTestInternalError = 'I';
 
+#if GTEST_OS_FUCHSIA
+
+// File descriptor used for the pipe in the child process.
+static const int kFuchsiaReadPipeFd = 3;
+
+#endif
+
 // An enumeration describing all of the possible ways that a death test can
 // conclude.  DIED means that the process died while executing the test
 // code; LIVED means that process lived beyond the end of the test code;
@@ -6735,8 +8512,6 @@ static const char kDeathTestInternalError = 'I';
 // statement, which is not allowed; THREW means that the test statement
 // returned control by throwing an exception.  IN_PROGRESS means the test
 // has not yet concluded.
-// TODO(vladl@google.com): Unify names and possibly values for
-// AbortReason, DeathTestOutcome, and flag characters above.
 enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };
 
 // Routine for aborting the program which is safe to call from an
@@ -6744,13 +8519,13 @@ enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };
 // message is propagated back to the parent process.  Otherwise, the
 // message is simply printed to stderr.  In either case, the program
 // then exits with status 1.
-void DeathTestAbort(const std::string& message) {
+static void DeathTestAbort(const std::string& message) {
   // On a POSIX system, this function may be called from a threadsafe-style
   // death test child process, which operates on a very small stack.  Use
   // the heap for any additional non-minuscule memory requirements.
   const InternalRunDeathTestFlag* const flag =
       GetUnitTestImpl()->internal_run_death_test_flag();
-  if (flag != NULL) {
+  if (flag != nullptr) {
     FILE* parent = posix::FDOpen(flag->write_fd(), "w");
     fputc(kDeathTestInternalError, parent);
     fprintf(parent, "%s", message.c_str());
@@ -6830,7 +8605,7 @@ static void FailFromInternalError(int fd) {
 // for the current test.
 DeathTest::DeathTest() {
   TestInfo* const info = GetUnitTestImpl()->current_test_info();
-  if (info == NULL) {
+  if (info == nullptr) {
     DeathTestAbort("Cannot run a death test outside of a TEST or "
                    "TEST_F construct");
   }
@@ -6838,10 +8613,11 @@ DeathTest::DeathTest() {
 
 // Creates and returns a death test by dispatching to the current
 // death test factory.
-bool DeathTest::Create(const char* statement, const RE* regex,
-                       const char* file, int line, DeathTest** test) {
+bool DeathTest::Create(const char* statement,
+                       Matcher<const std::string&> matcher, const char* file,
+                       int line, DeathTest** test) {
   return GetUnitTestImpl()->death_test_factory()->Create(
-      statement, regex, file, line, test);
+      statement, std::move(matcher), file, line, test);
 }
 
 const char* DeathTest::LastMessage() {
@@ -6857,9 +8633,9 @@ std::string DeathTest::last_death_test_message_;
 // Provides cross platform implementation for some death functionality.
 class DeathTestImpl : public DeathTest {
  protected:
-  DeathTestImpl(const char* a_statement, const RE* a_regex)
+  DeathTestImpl(const char* a_statement, Matcher<const std::string&> matcher)
       : statement_(a_statement),
-        regex_(a_regex),
+        matcher_(std::move(matcher)),
         spawned_(false),
         status_(-1),
         outcome_(IN_PROGRESS),
@@ -6867,13 +8643,12 @@ class DeathTestImpl : public DeathTest {
         write_fd_(-1) {}
 
   // read_fd_ is expected to be closed and cleared by a derived class.
-  ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
+  ~DeathTestImpl() override { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
 
-  void Abort(AbortReason reason);
-  virtual bool Passed(bool status_ok);
+  void Abort(AbortReason reason) override;
+  bool Passed(bool status_ok) override;
 
   const char* statement() const { return statement_; }
-  const RE* regex() const { return regex_; }
   bool spawned() const { return spawned_; }
   void set_spawned(bool is_spawned) { spawned_ = is_spawned; }
   int status() const { return status_; }
@@ -6891,13 +8666,15 @@ class DeathTestImpl : public DeathTest {
   // case of unexpected codes.
   void ReadAndInterpretStatusByte();
 
+  // Returns stderr output from the child process.
+  virtual std::string GetErrorLogs();
+
  private:
   // The textual content of the code this object is testing.  This class
   // doesn't own this string and should not attempt to delete it.
   const char* const statement_;
-  // The regular expression which test output must match.  DeathTestImpl
-  // doesn't own this object and should not attempt to delete it.
-  const RE* const regex_;
+  // A matcher that's expected to match the stderr output by the child process.
+  Matcher<const std::string&> matcher_;
   // True if the death test child process has been successfully spawned.
   bool spawned_;
   // The exit status of the child process.
@@ -6959,6 +8736,10 @@ void DeathTestImpl::ReadAndInterpretStatusByte() {
   set_read_fd(-1);
 }
 
+std::string DeathTestImpl::GetErrorLogs() {
+  return GetCapturedStderr();
+}
+
 // Signals that the death test code which should have exited, didn't.
 // Should be called only in a death test child process.
 // Writes a status byte to the child's status file descriptor, then
@@ -7012,22 +8793,21 @@ static ::std::string FormatDeathTestOutput(const ::std::string& output) {
 //             in the format specified by wait(2). On Windows, this is the
 //             value supplied to the ExitProcess() API or a numeric code
 //             of the exception that terminated the program.
-//   regex:    A regular expression object to be applied to
-//             the test's captured standard error output; the death test
-//             fails if it does not match.
+//   matcher_: A matcher that's expected to match the stderr output by the child
+//             process.
 //
 // Argument:
 //   status_ok: true if exit_status is acceptable in the context of
 //              this particular death test, which fails if it is false
 //
-// Returns true iff all of the above conditions are met.  Otherwise, the
-// first failing condition, in the order given above, is the one that is
+// Returns true if and only if all of the above conditions are met.  Otherwise,
+// the first failing condition, in the order given above, is the one that is
 // reported. Also sets the last death test message string.
 bool DeathTestImpl::Passed(bool status_ok) {
   if (!spawned())
     return false;
 
-  const std::string error_message = GetCapturedStderr();
+  const std::string error_message = GetErrorLogs();
 
   bool success = false;
   Message buffer;
@@ -7048,13 +8828,15 @@ bool DeathTestImpl::Passed(bool status_ok) {
       break;
     case DIED:
       if (status_ok) {
-        const bool matched = RE::PartialMatch(error_message.c_str(), *regex());
-        if (matched) {
+        if (matcher_.Matches(error_message)) {
           success = true;
         } else {
+          std::ostringstream stream;
+          matcher_.DescribeTo(&stream);
           buffer << "    Result: died but not with expected error.\n"
-                 << "  Expected: " << regex()->pattern() << "\n"
-                 << "Actual msg:\n" << FormatDeathTestOutput(error_message);
+                 << "  Expected: " << stream.str() << "\n"
+                 << "Actual msg:\n"
+                 << FormatDeathTestOutput(error_message);
         }
       } else {
         buffer << "    Result: died but not with expected exit code:\n"
@@ -7103,11 +8885,11 @@ bool DeathTestImpl::Passed(bool status_ok) {
 //
 class WindowsDeathTest : public DeathTestImpl {
  public:
-  WindowsDeathTest(const char* a_statement,
-                   const RE* a_regex,
-                   const char* file,
-                   int line)
-      : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {}
+  WindowsDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
+                   const char* file, int line)
+      : DeathTestImpl(a_statement, std::move(matcher)),
+        file_(file),
+        line_(line) {}
 
   // All of these virtual functions are inherited from DeathTest.
   virtual int Wait();
@@ -7184,7 +8966,7 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() {
   const TestInfo* const info = impl->current_test_info();
   const int death_test_index = info->result()->death_test_count();
 
-  if (flag != NULL) {
+  if (flag != nullptr) {
     // ParseInternalRunDeathTestFlag() has performed all the necessary
     // processing.
     set_write_fd(flag->write_fd());
@@ -7193,8 +8975,8 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() {
 
   // WindowsDeathTest uses an anonymous pipe to communicate results of
   // a death test.
-  SECURITY_ATTRIBUTES handles_are_inheritable = {
-    sizeof(SECURITY_ATTRIBUTES), NULL, TRUE };
+  SECURITY_ATTRIBUTES handles_are_inheritable = {sizeof(SECURITY_ATTRIBUTES),
+                                                 nullptr, TRUE};
   HANDLE read_handle, write_handle;
   GTEST_DEATH_TEST_CHECK_(
       ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable,
@@ -7205,13 +8987,13 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() {
   write_handle_.Reset(write_handle);
   event_handle_.Reset(::CreateEvent(
       &handles_are_inheritable,
-      TRUE,    // The event will automatically reset to non-signaled state.
-      FALSE,   // The initial state is non-signalled.
-      NULL));  // The even is unnamed.
-  GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL);
-  const std::string filter_flag =
-      std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" +
-      info->test_case_name() + "." + info->name();
+      TRUE,       // The event will automatically reset to non-signaled state.
+      FALSE,      // The initial state is non-signalled.
+      nullptr));  // The even is unnamed.
+  GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != nullptr);
+  const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
+                                  kFilterFlag + "=" + info->test_suite_name() +
+                                  "." + info->name();
   const std::string internal_flag =
       std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag +
       "=" + file_ + "|" + StreamableToString(line_) + "|" +
@@ -7224,10 +9006,9 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() {
       "|" + StreamableToString(reinterpret_cast<size_t>(event_handle_.Get()));
 
   char executable_path[_MAX_PATH + 1];  // NOLINT
-  GTEST_DEATH_TEST_CHECK_(
-      _MAX_PATH + 1 != ::GetModuleFileNameA(NULL,
-                                            executable_path,
-                                            _MAX_PATH));
+  GTEST_DEATH_TEST_CHECK_(_MAX_PATH + 1 != ::GetModuleFileNameA(nullptr,
+                                                                executable_path,
+                                                                _MAX_PATH));
 
   std::string command_line =
       std::string(::GetCommandLineA()) + " " + filter_flag + " \"" +
@@ -7248,33 +9029,288 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() {
   startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);
 
   PROCESS_INFORMATION process_info;
-  GTEST_DEATH_TEST_CHECK_(::CreateProcessA(
-      executable_path,
-      const_cast<char*>(command_line.c_str()),
-      NULL,   // Retuned process handle is not inheritable.
-      NULL,   // Retuned thread handle is not inheritable.
-      TRUE,   // Child inherits all inheritable handles (for write_handle_).
-      0x0,    // Default creation flags.
-      NULL,   // Inherit the parent's environment.
-      UnitTest::GetInstance()->original_working_dir(),
-      &startup_info,
-      &process_info) != FALSE);
+  GTEST_DEATH_TEST_CHECK_(
+      ::CreateProcessA(
+          executable_path, const_cast<char*>(command_line.c_str()),
+          nullptr,  // Retuned process handle is not inheritable.
+          nullptr,  // Retuned thread handle is not inheritable.
+          TRUE,  // Child inherits all inheritable handles (for write_handle_).
+          0x0,   // Default creation flags.
+          nullptr,  // Inherit the parent's environment.
+          UnitTest::GetInstance()->original_working_dir(), &startup_info,
+          &process_info) != FALSE);
   child_handle_.Reset(process_info.hProcess);
   ::CloseHandle(process_info.hThread);
   set_spawned(true);
   return OVERSEE_TEST;
 }
-# else  // We are not on Windows.
+
+# elif GTEST_OS_FUCHSIA
+
+class FuchsiaDeathTest : public DeathTestImpl {
+ public:
+  FuchsiaDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
+                   const char* file, int line)
+      : DeathTestImpl(a_statement, std::move(matcher)),
+        file_(file),
+        line_(line) {}
+
+  // All of these virtual functions are inherited from DeathTest.
+  int Wait() override;
+  TestRole AssumeRole() override;
+  std::string GetErrorLogs() override;
+
+ private:
+  // The name of the file in which the death test is located.
+  const char* const file_;
+  // The line number on which the death test is located.
+  const int line_;
+  // The stderr data captured by the child process.
+  std::string captured_stderr_;
+
+  zx::process child_process_;
+  zx::channel exception_channel_;
+  zx::socket stderr_socket_;
+};
+
+// Utility class for accumulating command-line arguments.
+class Arguments {
+ public:
+  Arguments() { args_.push_back(nullptr); }
+
+  ~Arguments() {
+    for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
+         ++i) {
+      free(*i);
+    }
+  }
+  void AddArgument(const char* argument) {
+    args_.insert(args_.end() - 1, posix::StrDup(argument));
+  }
+
+  template <typename Str>
+  void AddArguments(const ::std::vector<Str>& arguments) {
+    for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
+         i != arguments.end();
+         ++i) {
+      args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
+    }
+  }
+  char* const* Argv() {
+    return &args_[0];
+  }
+
+  int size() {
+    return static_cast<int>(args_.size()) - 1;
+  }
+
+ private:
+  std::vector<char*> args_;
+};
+
+// Waits for the child in a death test to exit, returning its exit
+// status, or 0 if no child process exists.  As a side effect, sets the
+// outcome data member.
+int FuchsiaDeathTest::Wait() {
+  const int kProcessKey = 0;
+  const int kSocketKey = 1;
+  const int kExceptionKey = 2;
+
+  if (!spawned())
+    return 0;
+
+  // Create a port to wait for socket/task/exception events.
+  zx_status_t status_zx;
+  zx::port port;
+  status_zx = zx::port::create(0, &port);
+  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+  // Register to wait for the child process to terminate.
+  status_zx = child_process_.wait_async(
+      port, kProcessKey, ZX_PROCESS_TERMINATED, 0);
+  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+  // Register to wait for the socket to be readable or closed.
+  status_zx = stderr_socket_.wait_async(
+      port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
+  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+  // Register to wait for an exception.
+  status_zx = exception_channel_.wait_async(
+      port, kExceptionKey, ZX_CHANNEL_READABLE, 0);
+  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+  bool process_terminated = false;
+  bool socket_closed = false;
+  do {
+    zx_port_packet_t packet = {};
+    status_zx = port.wait(zx::time::infinite(), &packet);
+    GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+    if (packet.key == kExceptionKey) {
+      // Process encountered an exception. Kill it directly rather than
+      // letting other handlers process the event. We will get a kProcessKey
+      // event when the process actually terminates.
+      status_zx = child_process_.kill();
+      GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+    } else if (packet.key == kProcessKey) {
+      // Process terminated.
+      GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type));
+      GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_PROCESS_TERMINATED);
+      process_terminated = true;
+    } else if (packet.key == kSocketKey) {
+      GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type));
+      if (packet.signal.observed & ZX_SOCKET_READABLE) {
+        // Read data from the socket.
+        constexpr size_t kBufferSize = 1024;
+        do {
+          size_t old_length = captured_stderr_.length();
+          size_t bytes_read = 0;
+          captured_stderr_.resize(old_length + kBufferSize);
+          status_zx = stderr_socket_.read(
+              0, &captured_stderr_.front() + old_length, kBufferSize,
+              &bytes_read);
+          captured_stderr_.resize(old_length + bytes_read);
+        } while (status_zx == ZX_OK);
+        if (status_zx == ZX_ERR_PEER_CLOSED) {
+          socket_closed = true;
+        } else {
+          GTEST_DEATH_TEST_CHECK_(status_zx == ZX_ERR_SHOULD_WAIT);
+          status_zx = stderr_socket_.wait_async(
+              port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
+          GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+        }
+      } else {
+        GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_SOCKET_PEER_CLOSED);
+        socket_closed = true;
+      }
+    }
+  } while (!process_terminated && !socket_closed);
+
+  ReadAndInterpretStatusByte();
+
+  zx_info_process_t buffer;
+  status_zx = child_process_.get_info(ZX_INFO_PROCESS, &buffer, sizeof(buffer),
+                                      nullptr, nullptr);
+  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+  GTEST_DEATH_TEST_CHECK_(buffer.flags & ZX_INFO_PROCESS_FLAG_EXITED);
+  set_status(static_cast<int>(buffer.return_code));
+  return status();
+}
+
+// The AssumeRole process for a Fuchsia death test.  It creates a child
+// process with the same executable as the current process to run the
+// death test.  The child process is given the --gtest_filter and
+// --gtest_internal_run_death_test flags such that it knows to run the
+// current death test only.
+DeathTest::TestRole FuchsiaDeathTest::AssumeRole() {
+  const UnitTestImpl* const impl = GetUnitTestImpl();
+  const InternalRunDeathTestFlag* const flag =
+      impl->internal_run_death_test_flag();
+  const TestInfo* const info = impl->current_test_info();
+  const int death_test_index = info->result()->death_test_count();
+
+  if (flag != nullptr) {
+    // ParseInternalRunDeathTestFlag() has performed all the necessary
+    // processing.
+    set_write_fd(kFuchsiaReadPipeFd);
+    return EXECUTE_TEST;
+  }
+
+  // Flush the log buffers since the log streams are shared with the child.
+  FlushInfoLog();
+
+  // Build the child process command line.
+  const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
+                                  kFilterFlag + "=" + info->test_suite_name() +
+                                  "." + info->name();
+  const std::string internal_flag =
+      std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "="
+      + file_ + "|"
+      + StreamableToString(line_) + "|"
+      + StreamableToString(death_test_index);
+  Arguments args;
+  args.AddArguments(GetInjectableArgvs());
+  args.AddArgument(filter_flag.c_str());
+  args.AddArgument(internal_flag.c_str());
+
+  // Build the pipe for communication with the child.
+  zx_status_t status;
+  zx_handle_t child_pipe_handle;
+  int child_pipe_fd;
+  status = fdio_pipe_half(&child_pipe_fd, &child_pipe_handle);
+  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+  set_read_fd(child_pipe_fd);
+
+  // Set the pipe handle for the child.
+  fdio_spawn_action_t spawn_actions[2] = {};
+  fdio_spawn_action_t* add_handle_action = &spawn_actions[0];
+  add_handle_action->action = FDIO_SPAWN_ACTION_ADD_HANDLE;
+  add_handle_action->h.id = PA_HND(PA_FD, kFuchsiaReadPipeFd);
+  add_handle_action->h.handle = child_pipe_handle;
+
+  // Create a socket pair will be used to receive the child process' stderr.
+  zx::socket stderr_producer_socket;
+  status =
+      zx::socket::create(0, &stderr_producer_socket, &stderr_socket_);
+  GTEST_DEATH_TEST_CHECK_(status >= 0);
+  int stderr_producer_fd = -1;
+  status =
+      fdio_fd_create(stderr_producer_socket.release(), &stderr_producer_fd);
+  GTEST_DEATH_TEST_CHECK_(status >= 0);
+
+  // Make the stderr socket nonblocking.
+  GTEST_DEATH_TEST_CHECK_(fcntl(stderr_producer_fd, F_SETFL, 0) == 0);
+
+  fdio_spawn_action_t* add_stderr_action = &spawn_actions[1];
+  add_stderr_action->action = FDIO_SPAWN_ACTION_CLONE_FD;
+  add_stderr_action->fd.local_fd = stderr_producer_fd;
+  add_stderr_action->fd.target_fd = STDERR_FILENO;
+
+  // Create a child job.
+  zx_handle_t child_job = ZX_HANDLE_INVALID;
+  status = zx_job_create(zx_job_default(), 0, & child_job);
+  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+  zx_policy_basic_t policy;
+  policy.condition = ZX_POL_NEW_ANY;
+  policy.policy = ZX_POL_ACTION_ALLOW;
+  status = zx_job_set_policy(
+      child_job, ZX_JOB_POL_RELATIVE, ZX_JOB_POL_BASIC, &policy, 1);
+  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+
+  // Create an exception channel attached to the |child_job|, to allow
+  // us to suppress the system default exception handler from firing.
+  status =
+      zx_task_create_exception_channel(
+          child_job, 0, exception_channel_.reset_and_get_address());
+  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+
+  // Spawn the child process.
+  status = fdio_spawn_etc(
+      child_job, FDIO_SPAWN_CLONE_ALL, args.Argv()[0], args.Argv(), nullptr,
+      2, spawn_actions, child_process_.reset_and_get_address(), nullptr);
+  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+
+  set_spawned(true);
+  return OVERSEE_TEST;
+}
+
+std::string FuchsiaDeathTest::GetErrorLogs() {
+  return captured_stderr_;
+}
+
+#else  // We are neither on Windows, nor on Fuchsia.
 
 // ForkingDeathTest provides implementations for most of the abstract
 // methods of the DeathTest interface.  Only the AssumeRole method is
 // left undefined.
 class ForkingDeathTest : public DeathTestImpl {
  public:
-  ForkingDeathTest(const char* statement, const RE* regex);
+  ForkingDeathTest(const char* statement, Matcher<const std::string&> matcher);
 
   // All of these virtual functions are inherited from DeathTest.
-  virtual int Wait();
+  int Wait() override;
 
  protected:
   void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; }
@@ -7285,9 +9321,9 @@ class ForkingDeathTest : public DeathTestImpl {
 };
 
 // Constructs a ForkingDeathTest.
-ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex)
-    : DeathTestImpl(a_statement, a_regex),
-      child_pid_(-1) {}
+ForkingDeathTest::ForkingDeathTest(const char* a_statement,
+                                   Matcher<const std::string&> matcher)
+    : DeathTestImpl(a_statement, std::move(matcher)), child_pid_(-1) {}
 
 // Waits for the child in a death test to exit, returning its exit
 // status, or 0 if no child process exists.  As a side effect, sets the
@@ -7308,9 +9344,9 @@ int ForkingDeathTest::Wait() {
 // in the child process.
 class NoExecDeathTest : public ForkingDeathTest {
  public:
-  NoExecDeathTest(const char* a_statement, const RE* a_regex) :
-      ForkingDeathTest(a_statement, a_regex) { }
-  virtual TestRole AssumeRole();
+  NoExecDeathTest(const char* a_statement, Matcher<const std::string&> matcher)
+      : ForkingDeathTest(a_statement, std::move(matcher)) {}
+  TestRole AssumeRole() override;
 };
 
 // The AssumeRole process for a fork-and-run death test.  It implements a
@@ -7363,14 +9399,21 @@ DeathTest::TestRole NoExecDeathTest::AssumeRole() {
 // only this specific death test to be run.
 class ExecDeathTest : public ForkingDeathTest {
  public:
-  ExecDeathTest(const char* a_statement, const RE* a_regex,
-                const char* file, int line) :
-      ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { }
-  virtual TestRole AssumeRole();
+  ExecDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
+                const char* file, int line)
+      : ForkingDeathTest(a_statement, std::move(matcher)),
+        file_(file),
+        line_(line) {}
+  TestRole AssumeRole() override;
+
  private:
-  static ::std::vector<testing::internal::string>
-  GetArgvsForDeathTestChildProcess() {
-    ::std::vector<testing::internal::string> args = GetInjectableArgvs();
+  static ::std::vector<std::string> GetArgvsForDeathTestChildProcess() {
+    ::std::vector<std::string> args = GetInjectableArgvs();
+#  if defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
+    ::std::vector<std::string> extra_args =
+        GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_();
+    args.insert(args.end(), extra_args.begin(), extra_args.end());
+#  endif  // defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
     return args;
   }
   // The name of the file in which the death test is located.
@@ -7382,9 +9425,7 @@ class ExecDeathTest : public ForkingDeathTest {
 // Utility class for accumulating command-line arguments.
 class Arguments {
  public:
-  Arguments() {
-    args_.push_back(NULL);
-  }
+  Arguments() { args_.push_back(nullptr); }
 
   ~Arguments() {
     for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
@@ -7419,21 +9460,9 @@ struct ExecDeathTestArgs {
   int close_fd;       // File descriptor to close; the read end of a pipe
 };
 
-#  if GTEST_OS_MAC
-inline char** GetEnviron() {
-  // When Google Test is built as a framework on MacOS X, the environ variable
-  // is unavailable. Apple's documentation (man environ) recommends using
-  // _NSGetEnviron() instead.
-  return *_NSGetEnviron();
-}
-#  else
-// Some POSIX platforms expect you to declare environ. extern "C" makes
-// it reside in the global namespace.
+#  if GTEST_OS_QNX
 extern "C" char** environ;
-inline char** GetEnviron() { return environ; }
-#  endif  // GTEST_OS_MAC
-
-#  if !GTEST_OS_QNX
+#  else  // GTEST_OS_QNX
 // The main function for a threadsafe-style death test child process.
 // This function is called in a clone()-ed process and thus must avoid
 // any potentially unsafe operations like malloc or libc functions.
@@ -7453,19 +9482,20 @@ static int ExecDeathTestChildMain(void* child_arg) {
     return EXIT_FAILURE;
   }
 
-  // We can safely call execve() as it's a direct system call.  We
+  // We can safely call execv() as it's almost a direct system call. We
   // cannot use execvp() as it's a libc function and thus potentially
-  // unsafe.  Since execve() doesn't search the PATH, the user must
+  // unsafe.  Since execv() doesn't search the PATH, the user must
   // invoke the test program via a valid path that contains at least
   // one path separator.
-  execve(args->argv[0], args->argv, GetEnviron());
-  DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " +
+  execv(args->argv[0], args->argv);
+  DeathTestAbort(std::string("execv(") + args->argv[0] + ", ...) in " +
                  original_dir + " failed: " +
                  GetLastErrnoDescription());
   return EXIT_FAILURE;
 }
-#  endif  // !GTEST_OS_QNX
+#  endif  // GTEST_OS_QNX
 
+#  if GTEST_HAS_CLONE
 // Two utility routines that together determine the direction the stack
 // grows.
 // This could be accomplished more elegantly by a single recursive
@@ -7475,18 +9505,31 @@ static int ExecDeathTestChildMain(void* child_arg) {
 // GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining
 // StackLowerThanAddress into StackGrowsDown, which then doesn't give
 // correct answer.
-void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_;
-void StackLowerThanAddress(const void* ptr, bool* result) {
-  int dummy;
-  *result = (&dummy < ptr);
-}
-
-bool StackGrowsDown() {
-  int dummy;
+static void StackLowerThanAddress(const void* ptr,
+                                  bool* result) GTEST_NO_INLINE_;
+// Make sure sanitizers do not tamper with the stack here.
+// Ideally, we want to use `__builtin_frame_address` instead of a local variable
+// address with sanitizer disabled, but it does not work when the
+// compiler optimizes the stack frame out, which happens on PowerPC targets.
+// HWAddressSanitizer add a random tag to the MSB of the local variable address,
+// making comparison result unpredictable.
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+static void StackLowerThanAddress(const void* ptr, bool* result) {
+  int dummy = 0;
+  *result = std::less<const void*>()(&dummy, ptr);
+}
+
+// Make sure AddressSanitizer does not tamper with the stack here.
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+static bool StackGrowsDown() {
+  int dummy = 0;
   bool result;
   StackLowerThanAddress(&dummy, &result);
   return result;
 }
+#  endif  // GTEST_HAS_CLONE
 
 // Spawns a child process with the same executable as the current process in
 // a thread-safe manner and instructs it to run the death test.  The
@@ -7524,7 +9567,7 @@ static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
                                         fd_flags | FD_CLOEXEC));
   struct inheritance inherit = {0};
   // spawn is a system call.
-  child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron());
+  child_pid = spawn(args.argv[0], 0, nullptr, &inherit, args.argv, environ);
   // Restores the current working directory.
   GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1);
   GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd));
@@ -7548,9 +9591,9 @@ static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
 
   if (!use_fork) {
     static const bool stack_grows_down = StackGrowsDown();
-    const size_t stack_size = getpagesize();
+    const auto stack_size = static_cast<size_t>(getpagesize() * 2);
     // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
-    void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
+    void* const stack = mmap(nullptr, stack_size, PROT_READ | PROT_WRITE,
                              MAP_ANON | MAP_PRIVATE, -1, 0);
     GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);
 
@@ -7564,8 +9607,9 @@ static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
     void* const stack_top =
         static_cast<char*>(stack) +
             (stack_grows_down ? stack_size - kMaxStackAlignment : 0);
-    GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment &&
-        reinterpret_cast<intptr_t>(stack_top) % kMaxStackAlignment == 0);
+    GTEST_DEATH_TEST_CHECK_(
+        static_cast<size_t>(stack_size) > kMaxStackAlignment &&
+        reinterpret_cast<uintptr_t>(stack_top) % kMaxStackAlignment == 0);
 
     child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);
 
@@ -7582,7 +9626,7 @@ static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
 #  endif  // GTEST_OS_QNX
 #  if GTEST_OS_LINUX
   GTEST_DEATH_TEST_CHECK_SYSCALL_(
-      sigaction(SIGPROF, &saved_sigprof_action, NULL));
+      sigaction(SIGPROF, &saved_sigprof_action, nullptr));
 #  endif  // GTEST_OS_LINUX
 
   GTEST_DEATH_TEST_CHECK_(child_pid != -1);
@@ -7600,7 +9644,7 @@ DeathTest::TestRole ExecDeathTest::AssumeRole() {
   const TestInfo* const info = impl->current_test_info();
   const int death_test_index = info->result()->death_test_count();
 
-  if (flag != NULL) {
+  if (flag != nullptr) {
     set_write_fd(flag->write_fd());
     return EXECUTE_TEST;
   }
@@ -7611,9 +9655,9 @@ DeathTest::TestRole ExecDeathTest::AssumeRole() {
   // it be closed when the child process does an exec:
   GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);
 
-  const std::string filter_flag =
-      std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "="
-      + info->test_case_name() + "." + info->name();
+  const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
+                                  kFilterFlag + "=" + info->test_suite_name() +
+                                  "." + info->name();
   const std::string internal_flag =
       std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "="
       + file_ + "|" + StreamableToString(line_) + "|"
@@ -7646,7 +9690,8 @@ DeathTest::TestRole ExecDeathTest::AssumeRole() {
 // by the "test" argument to its address.  If the test should be
 // skipped, sets that pointer to NULL.  Returns true, unless the
 // flag is set to an invalid value.
-bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
+bool DefaultDeathTestFactory::Create(const char* statement,
+                                     Matcher<const std::string&> matcher,
                                      const char* file, int line,
                                      DeathTest** test) {
   UnitTestImpl* const impl = GetUnitTestImpl();
@@ -7655,7 +9700,7 @@ bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
   const int death_test_index = impl->current_test_info()
       ->increment_death_test_count();
 
-  if (flag != NULL) {
+  if (flag != nullptr) {
     if (death_test_index > flag->index()) {
       DeathTest::set_last_death_test_message(
           "Death test count (" + StreamableToString(death_test_index)
@@ -7666,7 +9711,7 @@ bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
 
     if (!(flag->file() == file && flag->line() == line &&
           flag->index() == death_test_index)) {
-      *test = NULL;
+      *test = nullptr;
       return true;
     }
   }
@@ -7675,54 +9720,41 @@ bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
 
   if (GTEST_FLAG(death_test_style) == "threadsafe" ||
       GTEST_FLAG(death_test_style) == "fast") {
-    *test = new WindowsDeathTest(statement, regex, file, line);
-  }
-
-# else
-
-  if (GTEST_FLAG(death_test_style) == "threadsafe") {
-    *test = new ExecDeathTest(statement, regex, file, line);
-  } else if (GTEST_FLAG(death_test_style) == "fast") {
-    *test = new NoExecDeathTest(statement, regex);
+    *test = new WindowsDeathTest(statement, std::move(matcher), file, line);
   }
 
-# endif  // GTEST_OS_WINDOWS
+# elif GTEST_OS_FUCHSIA
 
-  else {  // NOLINT - this is more readable than unbalanced brackets inside #if.
-    DeathTest::set_last_death_test_message(
-        "Unknown death test style \"" + GTEST_FLAG(death_test_style)
-        + "\" encountered");
-    return false;
+  if (GTEST_FLAG(death_test_style) == "threadsafe" ||
+      GTEST_FLAG(death_test_style) == "fast") {
+    *test = new FuchsiaDeathTest(statement, std::move(matcher), file, line);
   }
 
-  return true;
-}
+# else
 
-// Splits a given string on a given delimiter, populating a given
-// vector with the fields.  GTEST_HAS_DEATH_TEST implies that we have
-// ::std::string, so we can use it here.
-static void SplitString(const ::std::string& str, char delimiter,
-                        ::std::vector< ::std::string>* dest) {
-  ::std::vector< ::std::string> parsed;
-  ::std::string::size_type pos = 0;
-  while (::testing::internal::AlwaysTrue()) {
-    const ::std::string::size_type colon = str.find(delimiter, pos);
-    if (colon == ::std::string::npos) {
-      parsed.push_back(str.substr(pos));
-      break;
-    } else {
-      parsed.push_back(str.substr(pos, colon - pos));
-      pos = colon + 1;
-    }
+  if (GTEST_FLAG(death_test_style) == "threadsafe") {
+    *test = new ExecDeathTest(statement, std::move(matcher), file, line);
+  } else if (GTEST_FLAG(death_test_style) == "fast") {
+    *test = new NoExecDeathTest(statement, std::move(matcher));
   }
-  dest->swap(parsed);
+
+# endif  // GTEST_OS_WINDOWS
+
+  else {  // NOLINT - this is more readable than unbalanced brackets inside #if.
+    DeathTest::set_last_death_test_message(
+        "Unknown death test style \"" + GTEST_FLAG(death_test_style)
+        + "\" encountered");
+    return false;
+  }
+
+  return true;
 }
 
 # if GTEST_OS_WINDOWS
 // Recreates the pipe and event handles from the provided parameters,
 // signals the event, and returns a file descriptor wrapped around the pipe
 // handle. This function is called in the child process only.
-int GetStatusFileDescriptor(unsigned int parent_process_id,
+static int GetStatusFileDescriptor(unsigned int parent_process_id,
                             size_t write_handle_as_size_t,
                             size_t event_handle_as_size_t) {
   AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE,
@@ -7733,15 +9765,13 @@ int GetStatusFileDescriptor(unsigned int parent_process_id,
                    StreamableToString(parent_process_id));
   }
 
-  // TODO(vladl@google.com): Replace the following check with a
-  // compile-time assertion when available.
   GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t));
 
   const HANDLE write_handle =
       reinterpret_cast<HANDLE>(write_handle_as_size_t);
   HANDLE dup_write_handle;
 
-  // The newly initialized handle is accessible only in in the parent
+  // The newly initialized handle is accessible only in the parent
   // process. To obtain one accessible within the child, we need to use
   // DuplicateHandle.
   if (!::DuplicateHandle(parent_process_handle.Get(), write_handle,
@@ -7790,7 +9820,7 @@ int GetStatusFileDescriptor(unsigned int parent_process_id,
 // initialized from the GTEST_FLAG(internal_run_death_test) flag if
 // the flag is specified; otherwise returns NULL.
 InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
-  if (GTEST_FLAG(internal_run_death_test) == "") return NULL;
+  if (GTEST_FLAG(internal_run_death_test) == "") return nullptr;
 
   // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
   // can use it here.
@@ -7818,6 +9848,16 @@ InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
   write_fd = GetStatusFileDescriptor(parent_process_id,
                                      write_handle_as_size_t,
                                      event_handle_as_size_t);
+
+# elif GTEST_OS_FUCHSIA
+
+  if (fields.size() != 3
+      || !ParseNaturalNumber(fields[1], &line)
+      || !ParseNaturalNumber(fields[2], &index)) {
+    DeathTestAbort("Bad --gtest_internal_run_death_test flag: "
+        + GTEST_FLAG(internal_run_death_test));
+  }
+
 # else
 
   if (fields.size() != 4
@@ -7866,8 +9906,6 @@ InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: keith.ray@gmail.com (Keith Ray)
 
 
 #include <stdlib.h>
@@ -7877,14 +9915,12 @@ InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
 #elif GTEST_OS_WINDOWS
 # include <direct.h>
 # include <io.h>
-#elif GTEST_OS_SYMBIAN
-// Symbian OpenC has PATH_MAX in sys/syslimits.h
-# include <sys/syslimits.h>
 #else
 # include <limits.h>
 # include <climits>  // Some Linux distributions define PATH_MAX here.
 #endif  // GTEST_OS_WINDOWS_MOBILE
 
+
 #if GTEST_OS_WINDOWS
 # define GTEST_PATH_MAX_ _MAX_PATH
 #elif defined(PATH_MAX)
@@ -7895,7 +9931,6 @@ InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
 # define GTEST_PATH_MAX_ _POSIX_PATH_MAX
 #endif  // GTEST_OS_WINDOWS
 
-
 namespace testing {
 namespace internal {
 
@@ -7906,7 +9941,6 @@ namespace internal {
 // of them.
 const char kPathSeparator = '\\';
 const char kAlternatePathSeparator = '/';
-//const char kPathSeparatorString[] = "\\";
 const char kAlternatePathSeparatorString[] = "/";
 # if GTEST_OS_WINDOWS_MOBILE
 // Windows CE doesn't have a current directory. You should not use
@@ -7920,7 +9954,6 @@ const char kCurrentDirectoryString[] = ".\\";
 # endif  // GTEST_OS_WINDOWS_MOBILE
 #else
 const char kPathSeparator = '/';
-//const char kPathSeparatorString[] = "/";
 const char kCurrentDirectoryString[] = "./";
 #endif  // GTEST_OS_WINDOWS
 
@@ -7935,16 +9968,25 @@ static bool IsPathSeparator(char c) {
 
 // Returns the current working directory, or "" if unsuccessful.
 FilePath FilePath::GetCurrentDir() {
-#if GTEST_OS_WINDOWS_MOBILE
-  // Windows CE doesn't have a current directory, so we just return
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE ||         \
+    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_ESP32 || \
+    GTEST_OS_XTENSA
+  // These platforms do not have a current directory, so we just return
   // something reasonable.
   return FilePath(kCurrentDirectoryString);
 #elif GTEST_OS_WINDOWS
   char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
-  return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
+  return FilePath(_getcwd(cwd, sizeof(cwd)) == nullptr ? "" : cwd);
 #else
   char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
-  return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
+  char* result = getcwd(cwd, sizeof(cwd));
+# if GTEST_OS_NACL
+  // getcwd will likely fail in NaCl due to the sandbox, so return something
+  // reasonable. The user may have provided a shim implementation for getcwd,
+  // however, so fallback only when failure is detected.
+  return FilePath(result == nullptr ? kCurrentDirectoryString : cwd);
+# endif  // GTEST_OS_NACL
+  return FilePath(result == nullptr ? "" : cwd);
 #endif  // GTEST_OS_WINDOWS_MOBILE
 }
 
@@ -7969,8 +10011,8 @@ const char* FilePath::FindLastPathSeparator() const {
 #if GTEST_HAS_ALT_PATH_SEP_
   const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
   // Comparing two pointers of which only one is NULL is undefined.
-  if (last_alt_sep != NULL &&
-      (last_sep == NULL || last_alt_sep > last_sep)) {
+  if (last_alt_sep != nullptr &&
+      (last_sep == nullptr || last_alt_sep > last_sep)) {
     return last_alt_sep;
   }
 #endif
@@ -7998,7 +10040,7 @@ FilePath FilePath::RemoveFileName() const {
   const char* const last_sep = FindLastPathSeparator();
   std::string dir;
   if (last_sep) {
-    dir = std::string(c_str(), last_sep + 1 - c_str());
+    dir = std::string(c_str(), static_cast<size_t>(last_sep + 1 - c_str()));
   } else {
     dir = kCurrentDirectoryString;
   }
@@ -8044,7 +10086,7 @@ bool FilePath::FileOrDirectoryExists() const {
   delete [] unicode;
   return attributes != kInvalidFileAttributes;
 #else
-  posix::StatStruct file_stat;
+  posix::StatStruct file_stat{};
   return posix::Stat(pathname_.c_str(), &file_stat) == 0;
 #endif  // GTEST_OS_WINDOWS_MOBILE
 }
@@ -8071,7 +10113,7 @@ bool FilePath::DirectoryExists() const {
     result = true;
   }
 #else
-  posix::StatStruct file_stat;
+  posix::StatStruct file_stat{};
   result = posix::Stat(path.c_str(), &file_stat) == 0 &&
       posix::IsDir(file_stat);
 #endif  // GTEST_OS_WINDOWS_MOBILE
@@ -8083,9 +10125,6 @@ bool FilePath::DirectoryExists() const {
 // root directory per disk drive.)
 bool FilePath::IsRootDirectory() const {
 #if GTEST_OS_WINDOWS
-  // TODO(wan@google.com): on Windows a network share like
-  // \\server\share can be a root directory, although it cannot be the
-  // current directory.  Handle this properly.
   return pathname_.length() == 3 && IsAbsolutePath();
 #else
   return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
@@ -8157,10 +10196,13 @@ bool FilePath::CreateFolder() const {
 #if GTEST_OS_WINDOWS_MOBILE
   FilePath removed_sep(this->RemoveTrailingPathSeparator());
   LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
-  int result = CreateDirectory(unicode, NULL) ? 0 : -1;
+  int result = CreateDirectory(unicode, nullptr) ? 0 : -1;
   delete [] unicode;
 #elif GTEST_OS_WINDOWS
   int result = _mkdir(pathname_.c_str());
+#elif GTEST_OS_ESP8266 || GTEST_OS_XTENSA
+  // do nothing
+  int result = 0;
 #else
   int result = mkdir(pathname_.c_str(), 0777);
 #endif  // GTEST_OS_WINDOWS_MOBILE
@@ -8183,40 +10225,25 @@ FilePath FilePath::RemoveTrailingPathSeparator() const {
 // Removes any redundant separators that might be in the pathname.
 // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
 // redundancies that might be in a pathname involving "." or "..".
-// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share).
 void FilePath::Normalize() {
-  if (pathname_.c_str() == NULL) {
-    pathname_ = "";
-    return;
-  }
-  const char* src = pathname_.c_str();
-  char* const dest = new char[pathname_.length() + 1];
-  char* dest_ptr = dest;
-  memset(dest_ptr, 0, pathname_.length() + 1);
+  auto out = pathname_.begin();
 
-  while (*src != '\0') {
-    *dest_ptr = *src;
-    if (!IsPathSeparator(*src)) {
-      src++;
+  for (const char character : pathname_) {
+    if (!IsPathSeparator(character)) {
+      *(out++) = character;
+    } else if (out == pathname_.begin() || *std::prev(out) != kPathSeparator) {
+      *(out++) = kPathSeparator;
     } else {
-#if GTEST_HAS_ALT_PATH_SEP_
-      if (*dest_ptr == kAlternatePathSeparator) {
-        *dest_ptr = kPathSeparator;
-      }
-#endif
-      while (IsPathSeparator(*src))
-        src++;
+      continue;
     }
-    dest_ptr++;
   }
-  *dest_ptr = '\0';
-  pathname_ = dest;
-  delete[] dest;
+
+  pathname_.erase(out, pathname_.end());
 }
 
 }  // namespace internal
 }  // namespace testing
-// Copyright 2008, Google Inc.
+// Copyright 2007, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -8244,23 +10271,122 @@ void FilePath::Normalize() {
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This file implements just enough of the matcher interface to allow
+// EXPECT_DEATH and friends to accept a matcher argument.
+
+
+#include <string>
+
+namespace testing {
+
+// Constructs a matcher that matches a const std::string& whose value is
+// equal to s.
+Matcher<const std::string&>::Matcher(const std::string& s) { *this = Eq(s); }
+
+// Constructs a matcher that matches a const std::string& whose value is
+// equal to s.
+Matcher<const std::string&>::Matcher(const char* s) {
+  *this = Eq(std::string(s));
+}
+
+// Constructs a matcher that matches a std::string whose value is equal to
+// s.
+Matcher<std::string>::Matcher(const std::string& s) { *this = Eq(s); }
+
+// Constructs a matcher that matches a std::string whose value is equal to
+// s.
+Matcher<std::string>::Matcher(const char* s) { *this = Eq(std::string(s)); }
+
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+// Constructs a matcher that matches a const StringView& whose value is
+// equal to s.
+Matcher<const internal::StringView&>::Matcher(const std::string& s) {
+  *this = Eq(s);
+}
+
+// Constructs a matcher that matches a const StringView& whose value is
+// equal to s.
+Matcher<const internal::StringView&>::Matcher(const char* s) {
+  *this = Eq(std::string(s));
+}
+
+// Constructs a matcher that matches a const StringView& whose value is
+// equal to s.
+Matcher<const internal::StringView&>::Matcher(internal::StringView s) {
+  *this = Eq(std::string(s));
+}
+
+// Constructs a matcher that matches a StringView whose value is equal to
+// s.
+Matcher<internal::StringView>::Matcher(const std::string& s) { *this = Eq(s); }
+
+// Constructs a matcher that matches a StringView whose value is equal to
+// s.
+Matcher<internal::StringView>::Matcher(const char* s) {
+  *this = Eq(std::string(s));
+}
+
+// Constructs a matcher that matches a StringView whose value is equal to
+// s.
+Matcher<internal::StringView>::Matcher(internal::StringView s) {
+  *this = Eq(std::string(s));
+}
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
+
+}  // namespace testing
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
 //
-// Author: wan@google.com (Zhanyong Wan)
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 
 
 #include <limits.h>
-#include <stdlib.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
+#include <cstdint>
+#include <fstream>
+#include <memory>
 
-#if GTEST_OS_WINDOWS_MOBILE
-# include <windows.h>  // For TerminateProcess()
-#elif GTEST_OS_WINDOWS
+#if GTEST_OS_WINDOWS
+# include <windows.h>
 # include <io.h>
 # include <sys/stat.h>
+# include <map>  // Used in ThreadLocal.
+# ifdef _MSC_VER
+#  include <crtdbg.h>
+# endif  // _MSC_VER
 #else
 # include <unistd.h>
-#endif  // GTEST_OS_WINDOWS_MOBILE
+#endif  // GTEST_OS_WINDOWS
 
 #if GTEST_OS_MAC
 # include <mach/mach_init.h>
@@ -8268,19 +10394,30 @@ void FilePath::Normalize() {
 # include <mach/vm_map.h>
 #endif  // GTEST_OS_MAC
 
+#if GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD || \
+    GTEST_OS_NETBSD || GTEST_OS_OPENBSD
+# include <sys/sysctl.h>
+# if GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD
+#  include <sys/user.h>
+# endif
+#endif
+
 #if GTEST_OS_QNX
 # include <devctl.h>
+# include <fcntl.h>
 # include <sys/procfs.h>
 #endif  // GTEST_OS_QNX
 
+#if GTEST_OS_AIX
+# include <procinfo.h>
+# include <sys/types.h>
+#endif  // GTEST_OS_AIX
+
+#if GTEST_OS_FUCHSIA
+# include <zircon/process.h>
+# include <zircon/syscalls.h>
+#endif  // GTEST_OS_FUCHSIA
 
-// Indicates that this translation unit is part of Google Test's
-// implementation.  It must come before gtest-internal-inl.h is
-// included, or there will be a compiler error.  This trick is to
-// prevent a user from accidentally including gtest-internal-inl.h in
-// his code.
-#define GTEST_IMPLEMENTATION_ 1
-#undef GTEST_IMPLEMENTATION_
 
 namespace testing {
 namespace internal {
@@ -8294,10 +10431,31 @@ const int kStdOutFileno = STDOUT_FILENO;
 const int kStdErrFileno = STDERR_FILENO;
 #endif  // _MSC_VER
 
-#if GTEST_OS_MAC
+#if GTEST_OS_LINUX
+
+namespace {
+template <typename T>
+T ReadProcFileField(const std::string& filename, int field) {
+  std::string dummy;
+  std::ifstream file(filename.c_str());
+  while (field-- > 0) {
+    file >> dummy;
+  }
+  T output = 0;
+  file >> output;
+  return output;
+}
+}  // namespace
+
+// Returns the number of active threads, or 0 when there is an error.
+size_t GetThreadCount() {
+  const std::string filename =
+      (Message() << "/proc/" << getpid() << "/stat").GetString();
+  return ReadProcFileField<size_t>(filename, 19);
+}
+
+#elif GTEST_OS_MAC
 
-// Returns the number of threads running in the process, or 0 to indicate that
-// we cannot detect it.
 size_t GetThreadCount() {
   const task_t task = mach_task_self();
   mach_msg_type_number_t thread_count;
@@ -8315,35 +10473,569 @@ size_t GetThreadCount() {
   }
 }
 
-#elif GTEST_OS_QNX
+#elif GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD || \
+      GTEST_OS_NETBSD
+
+#if GTEST_OS_NETBSD
+#undef KERN_PROC
+#define KERN_PROC KERN_PROC2
+#define kinfo_proc kinfo_proc2
+#endif
+
+#if GTEST_OS_DRAGONFLY
+#define KP_NLWP(kp) (kp.kp_nthreads)
+#elif GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD
+#define KP_NLWP(kp) (kp.ki_numthreads)
+#elif GTEST_OS_NETBSD
+#define KP_NLWP(kp) (kp.p_nlwps)
+#endif
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+  int mib[] = {
+    CTL_KERN,
+    KERN_PROC,
+    KERN_PROC_PID,
+    getpid(),
+#if GTEST_OS_NETBSD
+    sizeof(struct kinfo_proc),
+    1,
+#endif
+  };
+  u_int miblen = sizeof(mib) / sizeof(mib[0]);
+  struct kinfo_proc info;
+  size_t size = sizeof(info);
+  if (sysctl(mib, miblen, &info, &size, NULL, 0)) {
+    return 0;
+  }
+  return static_cast<size_t>(KP_NLWP(info));
+}
+#elif GTEST_OS_OPENBSD
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+  int mib[] = {
+    CTL_KERN,
+    KERN_PROC,
+    KERN_PROC_PID | KERN_PROC_SHOW_THREADS,
+    getpid(),
+    sizeof(struct kinfo_proc),
+    0,
+  };
+  u_int miblen = sizeof(mib) / sizeof(mib[0]);
+
+  // get number of structs
+  size_t size;
+  if (sysctl(mib, miblen, NULL, &size, NULL, 0)) {
+    return 0;
+  }
+
+  mib[5] = static_cast<int>(size / static_cast<size_t>(mib[4]));
+
+  // populate array of structs
+  struct kinfo_proc info[mib[5]];
+  if (sysctl(mib, miblen, &info, &size, NULL, 0)) {
+    return 0;
+  }
+
+  // exclude empty members
+  size_t nthreads = 0;
+  for (size_t i = 0; i < size / static_cast<size_t>(mib[4]); i++) {
+    if (info[i].p_tid != -1)
+      nthreads++;
+  }
+  return nthreads;
+}
+
+#elif GTEST_OS_QNX
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+  const int fd = open("/proc/self/as", O_RDONLY);
+  if (fd < 0) {
+    return 0;
+  }
+  procfs_info process_info;
+  const int status =
+      devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), nullptr);
+  close(fd);
+  if (status == EOK) {
+    return static_cast<size_t>(process_info.num_threads);
+  } else {
+    return 0;
+  }
+}
+
+#elif GTEST_OS_AIX
+
+size_t GetThreadCount() {
+  struct procentry64 entry;
+  pid_t pid = getpid();
+  int status = getprocs64(&entry, sizeof(entry), nullptr, 0, &pid, 1);
+  if (status == 1) {
+    return entry.pi_thcount;
+  } else {
+    return 0;
+  }
+}
+
+#elif GTEST_OS_FUCHSIA
+
+size_t GetThreadCount() {
+  int dummy_buffer;
+  size_t avail;
+  zx_status_t status = zx_object_get_info(
+      zx_process_self(),
+      ZX_INFO_PROCESS_THREADS,
+      &dummy_buffer,
+      0,
+      nullptr,
+      &avail);
+  if (status == ZX_OK) {
+    return avail;
+  } else {
+    return 0;
+  }
+}
+
+#else
+
+size_t GetThreadCount() {
+  // There's no portable way to detect the number of threads, so we just
+  // return 0 to indicate that we cannot detect it.
+  return 0;
+}
+
+#endif  // GTEST_OS_LINUX
+
+#if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
+
+void SleepMilliseconds(int n) {
+  ::Sleep(static_cast<DWORD>(n));
+}
+
+AutoHandle::AutoHandle()
+    : handle_(INVALID_HANDLE_VALUE) {}
+
+AutoHandle::AutoHandle(Handle handle)
+    : handle_(handle) {}
+
+AutoHandle::~AutoHandle() {
+  Reset();
+}
+
+AutoHandle::Handle AutoHandle::Get() const {
+  return handle_;
+}
+
+void AutoHandle::Reset() {
+  Reset(INVALID_HANDLE_VALUE);
+}
+
+void AutoHandle::Reset(HANDLE handle) {
+  // Resetting with the same handle we already own is invalid.
+  if (handle_ != handle) {
+    if (IsCloseable()) {
+      ::CloseHandle(handle_);
+    }
+    handle_ = handle;
+  } else {
+    GTEST_CHECK_(!IsCloseable())
+        << "Resetting a valid handle to itself is likely a programmer error "
+            "and thus not allowed.";
+  }
+}
+
+bool AutoHandle::IsCloseable() const {
+  // Different Windows APIs may use either of these values to represent an
+  // invalid handle.
+  return handle_ != nullptr && handle_ != INVALID_HANDLE_VALUE;
+}
+
+Notification::Notification()
+    : event_(::CreateEvent(nullptr,     // Default security attributes.
+                           TRUE,        // Do not reset automatically.
+                           FALSE,       // Initially unset.
+                           nullptr)) {  // Anonymous event.
+  GTEST_CHECK_(event_.Get() != nullptr);
+}
+
+void Notification::Notify() {
+  GTEST_CHECK_(::SetEvent(event_.Get()) != FALSE);
+}
+
+void Notification::WaitForNotification() {
+  GTEST_CHECK_(
+      ::WaitForSingleObject(event_.Get(), INFINITE) == WAIT_OBJECT_0);
+}
+
+Mutex::Mutex()
+    : owner_thread_id_(0),
+      type_(kDynamic),
+      critical_section_init_phase_(0),
+      critical_section_(new CRITICAL_SECTION) {
+  ::InitializeCriticalSection(critical_section_);
+}
+
+Mutex::~Mutex() {
+  // Static mutexes are leaked intentionally. It is not thread-safe to try
+  // to clean them up.
+  if (type_ == kDynamic) {
+    ::DeleteCriticalSection(critical_section_);
+    delete critical_section_;
+    critical_section_ = nullptr;
+  }
+}
+
+void Mutex::Lock() {
+  ThreadSafeLazyInit();
+  ::EnterCriticalSection(critical_section_);
+  owner_thread_id_ = ::GetCurrentThreadId();
+}
+
+void Mutex::Unlock() {
+  ThreadSafeLazyInit();
+  // We don't protect writing to owner_thread_id_ here, as it's the
+  // caller's responsibility to ensure that the current thread holds the
+  // mutex when this is called.
+  owner_thread_id_ = 0;
+  ::LeaveCriticalSection(critical_section_);
+}
+
+// Does nothing if the current thread holds the mutex. Otherwise, crashes
+// with high probability.
+void Mutex::AssertHeld() {
+  ThreadSafeLazyInit();
+  GTEST_CHECK_(owner_thread_id_ == ::GetCurrentThreadId())
+      << "The current thread is not holding the mutex @" << this;
+}
+
+namespace {
+
+#ifdef _MSC_VER
+// Use the RAII idiom to flag mem allocs that are intentionally never
+// deallocated. The motivation is to silence the false positive mem leaks
+// that are reported by the debug version of MS's CRT which can only detect
+// if an alloc is missing a matching deallocation.
+// Example:
+//    MemoryIsNotDeallocated memory_is_not_deallocated;
+//    critical_section_ = new CRITICAL_SECTION;
+//
+class MemoryIsNotDeallocated
+{
+ public:
+  MemoryIsNotDeallocated() : old_crtdbg_flag_(0) {
+    old_crtdbg_flag_ = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG);
+    // Set heap allocation block type to _IGNORE_BLOCK so that MS debug CRT
+    // doesn't report mem leak if there's no matching deallocation.
+    _CrtSetDbgFlag(old_crtdbg_flag_ & ~_CRTDBG_ALLOC_MEM_DF);
+  }
+
+  ~MemoryIsNotDeallocated() {
+    // Restore the original _CRTDBG_ALLOC_MEM_DF flag
+    _CrtSetDbgFlag(old_crtdbg_flag_);
+  }
+
+ private:
+  int old_crtdbg_flag_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(MemoryIsNotDeallocated);
+};
+#endif  // _MSC_VER
+
+}  // namespace
+
+// Initializes owner_thread_id_ and critical_section_ in static mutexes.
+void Mutex::ThreadSafeLazyInit() {
+  // Dynamic mutexes are initialized in the constructor.
+  if (type_ == kStatic) {
+    switch (
+        ::InterlockedCompareExchange(&critical_section_init_phase_, 1L, 0L)) {
+      case 0:
+        // If critical_section_init_phase_ was 0 before the exchange, we
+        // are the first to test it and need to perform the initialization.
+        owner_thread_id_ = 0;
+        {
+          // Use RAII to flag that following mem alloc is never deallocated.
+#ifdef _MSC_VER
+          MemoryIsNotDeallocated memory_is_not_deallocated;
+#endif  // _MSC_VER
+          critical_section_ = new CRITICAL_SECTION;
+        }
+        ::InitializeCriticalSection(critical_section_);
+        // Updates the critical_section_init_phase_ to 2 to signal
+        // initialization complete.
+        GTEST_CHECK_(::InterlockedCompareExchange(
+                          &critical_section_init_phase_, 2L, 1L) ==
+                      1L);
+        break;
+      case 1:
+        // Somebody else is already initializing the mutex; spin until they
+        // are done.
+        while (::InterlockedCompareExchange(&critical_section_init_phase_,
+                                            2L,
+                                            2L) != 2L) {
+          // Possibly yields the rest of the thread's time slice to other
+          // threads.
+          ::Sleep(0);
+        }
+        break;
+
+      case 2:
+        break;  // The mutex is already initialized and ready for use.
+
+      default:
+        GTEST_CHECK_(false)
+            << "Unexpected value of critical_section_init_phase_ "
+            << "while initializing a static mutex.";
+    }
+  }
+}
+
+namespace {
+
+class ThreadWithParamSupport : public ThreadWithParamBase {
+ public:
+  static HANDLE CreateThread(Runnable* runnable,
+                             Notification* thread_can_start) {
+    ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start);
+    DWORD thread_id;
+    HANDLE thread_handle = ::CreateThread(
+        nullptr,  // Default security.
+        0,        // Default stack size.
+        &ThreadWithParamSupport::ThreadMain,
+        param,        // Parameter to ThreadMainStatic
+        0x0,          // Default creation flags.
+        &thread_id);  // Need a valid pointer for the call to work under Win98.
+    GTEST_CHECK_(thread_handle != nullptr)
+        << "CreateThread failed with error " << ::GetLastError() << ".";
+    if (thread_handle == nullptr) {
+      delete param;
+    }
+    return thread_handle;
+  }
+
+ private:
+  struct ThreadMainParam {
+    ThreadMainParam(Runnable* runnable, Notification* thread_can_start)
+        : runnable_(runnable),
+          thread_can_start_(thread_can_start) {
+    }
+    std::unique_ptr<Runnable> runnable_;
+    // Does not own.
+    Notification* thread_can_start_;
+  };
+
+  static DWORD WINAPI ThreadMain(void* ptr) {
+    // Transfers ownership.
+    std::unique_ptr<ThreadMainParam> param(static_cast<ThreadMainParam*>(ptr));
+    if (param->thread_can_start_ != nullptr)
+      param->thread_can_start_->WaitForNotification();
+    param->runnable_->Run();
+    return 0;
+  }
+
+  // Prohibit instantiation.
+  ThreadWithParamSupport();
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParamSupport);
+};
+
+}  // namespace
+
+ThreadWithParamBase::ThreadWithParamBase(Runnable *runnable,
+                                         Notification* thread_can_start)
+      : thread_(ThreadWithParamSupport::CreateThread(runnable,
+                                                     thread_can_start)) {
+}
+
+ThreadWithParamBase::~ThreadWithParamBase() {
+  Join();
+}
 
-// Returns the number of threads running in the process, or 0 to indicate that
-// we cannot detect it.
-size_t GetThreadCount() {
-  const int fd = open("/proc/self/as", O_RDONLY);
-  if (fd < 0) {
-    return 0;
+void ThreadWithParamBase::Join() {
+  GTEST_CHECK_(::WaitForSingleObject(thread_.Get(), INFINITE) == WAIT_OBJECT_0)
+      << "Failed to join the thread with error " << ::GetLastError() << ".";
+}
+
+// Maps a thread to a set of ThreadIdToThreadLocals that have values
+// instantiated on that thread and notifies them when the thread exits.  A
+// ThreadLocal instance is expected to persist until all threads it has
+// values on have terminated.
+class ThreadLocalRegistryImpl {
+ public:
+  // Registers thread_local_instance as having value on the current thread.
+  // Returns a value that can be used to identify the thread from other threads.
+  static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
+      const ThreadLocalBase* thread_local_instance) {
+#ifdef _MSC_VER
+    MemoryIsNotDeallocated memory_is_not_deallocated;
+#endif  // _MSC_VER
+    DWORD current_thread = ::GetCurrentThreadId();
+    MutexLock lock(&mutex_);
+    ThreadIdToThreadLocals* const thread_to_thread_locals =
+        GetThreadLocalsMapLocked();
+    ThreadIdToThreadLocals::iterator thread_local_pos =
+        thread_to_thread_locals->find(current_thread);
+    if (thread_local_pos == thread_to_thread_locals->end()) {
+      thread_local_pos = thread_to_thread_locals->insert(
+          std::make_pair(current_thread, ThreadLocalValues())).first;
+      StartWatcherThreadFor(current_thread);
+    }
+    ThreadLocalValues& thread_local_values = thread_local_pos->second;
+    ThreadLocalValues::iterator value_pos =
+        thread_local_values.find(thread_local_instance);
+    if (value_pos == thread_local_values.end()) {
+      value_pos =
+          thread_local_values
+              .insert(std::make_pair(
+                  thread_local_instance,
+                  std::shared_ptr<ThreadLocalValueHolderBase>(
+                      thread_local_instance->NewValueForCurrentThread())))
+              .first;
+    }
+    return value_pos->second.get();
+  }
+
+  static void OnThreadLocalDestroyed(
+      const ThreadLocalBase* thread_local_instance) {
+    std::vector<std::shared_ptr<ThreadLocalValueHolderBase> > value_holders;
+    // Clean up the ThreadLocalValues data structure while holding the lock, but
+    // defer the destruction of the ThreadLocalValueHolderBases.
+    {
+      MutexLock lock(&mutex_);
+      ThreadIdToThreadLocals* const thread_to_thread_locals =
+          GetThreadLocalsMapLocked();
+      for (ThreadIdToThreadLocals::iterator it =
+          thread_to_thread_locals->begin();
+          it != thread_to_thread_locals->end();
+          ++it) {
+        ThreadLocalValues& thread_local_values = it->second;
+        ThreadLocalValues::iterator value_pos =
+            thread_local_values.find(thread_local_instance);
+        if (value_pos != thread_local_values.end()) {
+          value_holders.push_back(value_pos->second);
+          thread_local_values.erase(value_pos);
+          // This 'if' can only be successful at most once, so theoretically we
+          // could break out of the loop here, but we don't bother doing so.
+        }
+      }
+    }
+    // Outside the lock, let the destructor for 'value_holders' deallocate the
+    // ThreadLocalValueHolderBases.
+  }
+
+  static void OnThreadExit(DWORD thread_id) {
+    GTEST_CHECK_(thread_id != 0) << ::GetLastError();
+    std::vector<std::shared_ptr<ThreadLocalValueHolderBase> > value_holders;
+    // Clean up the ThreadIdToThreadLocals data structure while holding the
+    // lock, but defer the destruction of the ThreadLocalValueHolderBases.
+    {
+      MutexLock lock(&mutex_);
+      ThreadIdToThreadLocals* const thread_to_thread_locals =
+          GetThreadLocalsMapLocked();
+      ThreadIdToThreadLocals::iterator thread_local_pos =
+          thread_to_thread_locals->find(thread_id);
+      if (thread_local_pos != thread_to_thread_locals->end()) {
+        ThreadLocalValues& thread_local_values = thread_local_pos->second;
+        for (ThreadLocalValues::iterator value_pos =
+            thread_local_values.begin();
+            value_pos != thread_local_values.end();
+            ++value_pos) {
+          value_holders.push_back(value_pos->second);
+        }
+        thread_to_thread_locals->erase(thread_local_pos);
+      }
+    }
+    // Outside the lock, let the destructor for 'value_holders' deallocate the
+    // ThreadLocalValueHolderBases.
   }
-  procfs_info process_info;
-  const int status =
-      devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL);
-  close(fd);
-  if (status == EOK) {
-    return static_cast<size_t>(process_info.num_threads);
-  } else {
+
+ private:
+  // In a particular thread, maps a ThreadLocal object to its value.
+  typedef std::map<const ThreadLocalBase*,
+                   std::shared_ptr<ThreadLocalValueHolderBase> >
+      ThreadLocalValues;
+  // Stores all ThreadIdToThreadLocals having values in a thread, indexed by
+  // thread's ID.
+  typedef std::map<DWORD, ThreadLocalValues> ThreadIdToThreadLocals;
+
+  // Holds the thread id and thread handle that we pass from
+  // StartWatcherThreadFor to WatcherThreadFunc.
+  typedef std::pair<DWORD, HANDLE> ThreadIdAndHandle;
+
+  static void StartWatcherThreadFor(DWORD thread_id) {
+    // The returned handle will be kept in thread_map and closed by
+    // watcher_thread in WatcherThreadFunc.
+    HANDLE thread = ::OpenThread(SYNCHRONIZE | THREAD_QUERY_INFORMATION,
+                                 FALSE,
+                                 thread_id);
+    GTEST_CHECK_(thread != nullptr);
+    // We need to pass a valid thread ID pointer into CreateThread for it
+    // to work correctly under Win98.
+    DWORD watcher_thread_id;
+    HANDLE watcher_thread = ::CreateThread(
+        nullptr,  // Default security.
+        0,        // Default stack size
+        &ThreadLocalRegistryImpl::WatcherThreadFunc,
+        reinterpret_cast<LPVOID>(new ThreadIdAndHandle(thread_id, thread)),
+        CREATE_SUSPENDED, &watcher_thread_id);
+    GTEST_CHECK_(watcher_thread != nullptr);
+    // Give the watcher thread the same priority as ours to avoid being
+    // blocked by it.
+    ::SetThreadPriority(watcher_thread,
+                        ::GetThreadPriority(::GetCurrentThread()));
+    ::ResumeThread(watcher_thread);
+    ::CloseHandle(watcher_thread);
+  }
+
+  // Monitors exit from a given thread and notifies those
+  // ThreadIdToThreadLocals about thread termination.
+  static DWORD WINAPI WatcherThreadFunc(LPVOID param) {
+    const ThreadIdAndHandle* tah =
+        reinterpret_cast<const ThreadIdAndHandle*>(param);
+    GTEST_CHECK_(
+        ::WaitForSingleObject(tah->second, INFINITE) == WAIT_OBJECT_0);
+    OnThreadExit(tah->first);
+    ::CloseHandle(tah->second);
+    delete tah;
     return 0;
   }
-}
 
-#else
+  // Returns map of thread local instances.
+  static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() {
+    mutex_.AssertHeld();
+#ifdef _MSC_VER
+    MemoryIsNotDeallocated memory_is_not_deallocated;
+#endif  // _MSC_VER
+    static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals();
+    return map;
+  }
 
-size_t GetThreadCount() {
-  // There's no portable way to detect the number of threads, so we just
-  // return 0 to indicate that we cannot detect it.
-  return 0;
+  // Protects access to GetThreadLocalsMapLocked() and its return value.
+  static Mutex mutex_;
+  // Protects access to GetThreadMapLocked() and its return value.
+  static Mutex thread_map_mutex_;
+};
+
+Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex);  // NOLINT
+Mutex ThreadLocalRegistryImpl::thread_map_mutex_(Mutex::kStaticMutex);  // NOLINT
+
+ThreadLocalValueHolderBase* ThreadLocalRegistry::GetValueOnCurrentThread(
+      const ThreadLocalBase* thread_local_instance) {
+  return ThreadLocalRegistryImpl::GetValueOnCurrentThread(
+      thread_local_instance);
 }
 
-#endif  // GTEST_OS_MAC
+void ThreadLocalRegistry::OnThreadLocalDestroyed(
+      const ThreadLocalBase* thread_local_instance) {
+  ThreadLocalRegistryImpl::OnThreadLocalDestroyed(thread_local_instance);
+}
+
+#endif  // GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
 
 #if GTEST_USES_POSIX_RE
 
@@ -8361,7 +11053,7 @@ RE::~RE() {
   free(const_cast<char*>(pattern_));
 }
 
-// Returns true iff regular expression re matches the entire str.
+// Returns true if and only if regular expression re matches the entire str.
 bool RE::FullMatch(const char* str, const RE& re) {
   if (!re.is_valid_) return false;
 
@@ -8369,8 +11061,8 @@ bool RE::FullMatch(const char* str, const RE& re) {
   return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
 }
 
-// Returns true iff regular expression re matches a substring of str
-// (including str itself).
+// Returns true if and only if regular expression re matches a substring of
+// str (including str itself).
 bool RE::PartialMatch(const char* str, const RE& re) {
   if (!re.is_valid_) return false;
 
@@ -8410,14 +11102,14 @@ void RE::Init(const char* regex) {
 
 #elif GTEST_USES_SIMPLE_RE
 
-// Returns true iff ch appears anywhere in str (excluding the
+// Returns true if and only if ch appears anywhere in str (excluding the
 // terminating '\0' character).
 bool IsInSet(char ch, const char* str) {
-  return ch != '\0' && strchr(str, ch) != NULL;
+  return ch != '\0' && strchr(str, ch) != nullptr;
 }
 
-// Returns true iff ch belongs to the given classification.  Unlike
-// similar functions in <ctype.h>, these aren't affected by the
+// Returns true if and only if ch belongs to the given classification.
+// Unlike similar functions in <ctype.h>, these aren't affected by the
 // current locale.
 bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
 bool IsAsciiPunct(char ch) {
@@ -8430,13 +11122,13 @@ bool IsAsciiWordChar(char ch) {
       ('0' <= ch && ch <= '9') || ch == '_';
 }
 
-// Returns true iff "\\c" is a supported escape sequence.
+// Returns true if and only if "\\c" is a supported escape sequence.
 bool IsValidEscape(char c) {
   return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
 }
 
-// Returns true iff the given atom (specified by escaped and pattern)
-// matches ch.  The result is undefined if the atom is invalid.
+// Returns true if and only if the given atom (specified by escaped and
+// pattern) matches ch.  The result is undefined if the atom is invalid.
 bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
   if (escaped) {  // "\\p" where p is pattern_char.
     switch (pattern_char) {
@@ -8459,7 +11151,7 @@ bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
 }
 
 // Helper function used by ValidateRegex() to format error messages.
-std::string FormatRegexSyntaxError(const char* regex, int index) {
+static std::string FormatRegexSyntaxError(const char* regex, int index) {
   return (Message() << "Syntax error at index " << index
           << " in simple regular expression \"" << regex << "\": ").GetString();
 }
@@ -8467,17 +11159,14 @@ std::string FormatRegexSyntaxError(const char* regex, int index) {
 // Generates non-fatal failures and returns false if regex is invalid;
 // otherwise returns true.
 bool ValidateRegex(const char* regex) {
-  if (regex == NULL) {
-    // TODO(wan@google.com): fix the source file location in the
-    // assertion failures to match where the regex is used in user
-    // code.
+  if (regex == nullptr) {
     ADD_FAILURE() << "NULL is not a valid simple regular expression.";
     return false;
   }
 
   bool is_valid = true;
 
-  // True iff ?, *, or + can follow the previous atom.
+  // True if and only if ?, *, or + can follow the previous atom.
   bool prev_repeatable = false;
   for (int i = 0; regex[i]; i++) {
     if (regex[i] == '\\') {  // An escape sequence
@@ -8553,8 +11242,8 @@ bool MatchRepetitionAndRegexAtHead(
   return false;
 }
 
-// Returns true iff regex matches a prefix of str.  regex must be a
-// valid simple regular expression and not start with "^", or the
+// Returns true if and only if regex matches a prefix of str. regex must
+// be a valid simple regular expression and not start with "^", or the
 // result is undefined.
 bool MatchRegexAtHead(const char* regex, const char* str) {
   if (*regex == '\0')  // An empty regex matches a prefix of anything.
@@ -8584,8 +11273,8 @@ bool MatchRegexAtHead(const char* regex, const char* str) {
   }
 }
 
-// Returns true iff regex matches any substring of str.  regex must be
-// a valid simple regular expression, or the result is undefined.
+// Returns true if and only if regex matches any substring of str.  regex must
+// be a valid simple regular expression, or the result is undefined.
 //
 // The algorithm is recursive, but the recursion depth doesn't exceed
 // the regex length, so we won't need to worry about running out of
@@ -8593,8 +11282,7 @@ bool MatchRegexAtHead(const char* regex, const char* str) {
 // exponential with respect to the regex length + the string length,
 // but usually it's must faster (often close to linear).
 bool MatchRegexAnywhere(const char* regex, const char* str) {
-  if (regex == NULL || str == NULL)
-    return false;
+  if (regex == nullptr || str == nullptr) return false;
 
   if (*regex == '^')
     return MatchRegexAtHead(regex + 1, str);
@@ -8614,21 +11302,21 @@ RE::~RE() {
   free(const_cast<char*>(full_pattern_));
 }
 
-// Returns true iff regular expression re matches the entire str.
+// Returns true if and only if regular expression re matches the entire str.
 bool RE::FullMatch(const char* str, const RE& re) {
   return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
 }
 
-// Returns true iff regular expression re matches a substring of str
-// (including str itself).
+// Returns true if and only if regular expression re matches a substring of
+// str (including str itself).
 bool RE::PartialMatch(const char* str, const RE& re) {
   return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
 }
 
 // Initializes an RE from its string representation.
 void RE::Init(const char* regex) {
-  pattern_ = full_pattern_ = NULL;
-  if (regex != NULL) {
+  pattern_ = full_pattern_ = nullptr;
+  if (regex != nullptr) {
     pattern_ = posix::StrDup(regex);
   }
 
@@ -8666,7 +11354,7 @@ const char kUnknownFile[] = "unknown file";
 // Formats a source file path and a line number as they would appear
 // in an error message from the compiler used to compile this code.
 GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
-  const std::string file_name(file == NULL ? kUnknownFile : file);
+  const std::string file_name(file == nullptr ? kUnknownFile : file);
 
   if (line < 0) {
     return file_name + ":";
@@ -8685,7 +11373,7 @@ GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
 // to the file location it produces, unlike FormatFileLocation().
 GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
     const char* file, int line) {
-  const std::string file_name(file == NULL ? kUnknownFile : file);
+  const std::string file_name(file == nullptr ? kUnknownFile : file);
 
   if (line < 0)
     return file_name;
@@ -8693,7 +11381,6 @@ GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
     return file_name + ":" + StreamableToString(line);
 }
 
-
 GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
     : severity_(severity) {
   const char* const marker =
@@ -8712,12 +11399,10 @@ GTestLog::~GTestLog() {
     posix::Abort();
   }
 }
+
 // Disable Microsoft deprecation warnings for POSIX functions called from
 // this class (creat, dup, dup2, and close)
-#ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable: 4996)
-#endif  // _MSC_VER
+GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
 
 #if GTEST_HAS_STREAM_REDIRECTION
 
@@ -8743,9 +11428,9 @@ class CapturedStream {
     filename_ = temp_file_path;
 # else
     // There's no guarantee that a test has write access to the current
-    // directory, so we create the temporary file in the /tmp directory
-    // instead. We use /tmp on most systems, and /sdcard on Android.
-    // That's because Android doesn't have /tmp.
+    // directory, so we create the temporary file in a temporary directory.
+    std::string name_template;
+
 #  if GTEST_OS_LINUX_ANDROID
     // Note: Android applications are expected to call the framework's
     // Context.getExternalStorageDirectory() method through JNI to get
@@ -8755,20 +11440,51 @@ class CapturedStream {
     // code as part of a regular standalone executable, which doesn't
     // run in a Dalvik process (e.g. when running it through 'adb shell').
     //
-    // The location /sdcard is directly accessible from native code
-    // and is the only location (unofficially) supported by the Android
-    // team. It's generally a symlink to the real SD Card mount point
-    // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or
-    // other OEM-customized locations. Never rely on these, and always
-    // use /sdcard.
-    char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX";
+    // The location /data/local/tmp is directly accessible from native code.
+    // '/sdcard' and other variants cannot be relied on, as they are not
+    // guaranteed to be mounted, or may have a delay in mounting.
+    name_template = "/data/local/tmp/";
+#  elif GTEST_OS_IOS
+    char user_temp_dir[PATH_MAX + 1];
+
+    // Documented alternative to NSTemporaryDirectory() (for obtaining creating
+    // a temporary directory) at
+    // https://developer.apple.com/library/archive/documentation/Security/Conceptual/SecureCodingGuide/Articles/RaceConditions.html#//apple_ref/doc/uid/TP40002585-SW10
+    //
+    // _CS_DARWIN_USER_TEMP_DIR (as well as _CS_DARWIN_USER_CACHE_DIR) is not
+    // documented in the confstr() man page at
+    // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/confstr.3.html#//apple_ref/doc/man/3/confstr
+    // but are still available, according to the WebKit patches at
+    // https://trac.webkit.org/changeset/262004/webkit
+    // https://trac.webkit.org/changeset/263705/webkit
+    //
+    // The confstr() implementation falls back to getenv("TMPDIR"). See
+    // https://opensource.apple.com/source/Libc/Libc-1439.100.3/gen/confstr.c.auto.html
+    ::confstr(_CS_DARWIN_USER_TEMP_DIR, user_temp_dir, sizeof(user_temp_dir));
+
+    name_template = user_temp_dir;
+    if (name_template.back() != GTEST_PATH_SEP_[0])
+      name_template.push_back(GTEST_PATH_SEP_[0]);
 #  else
-    char name_template[] = "/tmp/captured_stream.XXXXXX";
-#  endif  // GTEST_OS_LINUX_ANDROID
-    const int captured_fd = mkstemp(name_template);
-    filename_ = name_template;
+    name_template = "/tmp/";
+#  endif
+    name_template.append("gtest_captured_stream.XXXXXX");
+
+    // mkstemp() modifies the string bytes in place, and does not go beyond the
+    // string's length. This results in well-defined behavior in C++17.
+    //
+    // The const_cast is needed below C++17. The constraints on std::string
+    // implementations in C++11 and above make assumption behind the const_cast
+    // fairly safe.
+    const int captured_fd = ::mkstemp(const_cast<char*>(name_template.data()));
+    if (captured_fd == -1) {
+      GTEST_LOG_(WARNING)
+          << "Failed to create tmp file " << name_template
+          << " for test; does the test have access to the /tmp directory?";
+    }
+    filename_ = std::move(name_template);
 # endif  // GTEST_OS_WINDOWS
-    fflush(NULL);
+    fflush(nullptr);
     dup2(captured_fd, fd_);
     close(captured_fd);
   }
@@ -8780,25 +11496,23 @@ class CapturedStream {
   std::string GetCapturedString() {
     if (uncaptured_fd_ != -1) {
       // Restores the original stream.
-      fflush(NULL);
+      fflush(nullptr);
       dup2(uncaptured_fd_, fd_);
       close(uncaptured_fd_);
       uncaptured_fd_ = -1;
     }
 
     FILE* const file = posix::FOpen(filename_.c_str(), "r");
+    if (file == nullptr) {
+      GTEST_LOG_(FATAL) << "Failed to open tmp file " << filename_
+                        << " for capturing stream.";
+    }
     const std::string content = ReadEntireFile(file);
     posix::FClose(file);
     return content;
   }
 
  private:
-  // Reads the entire content of a file as an std::string.
-  static std::string ReadEntireFile(FILE* file);
-
-  // Returns the size (in bytes) of a file.
-  static size_t GetFileSize(FILE* file);
-
   const int fd_;  // A stream to capture.
   int uncaptured_fd_;
   // Name of the temporary file holding the stderr output.
@@ -8807,45 +11521,15 @@ class CapturedStream {
   GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
 };
 
-// Returns the size (in bytes) of a file.
-size_t CapturedStream::GetFileSize(FILE* file) {
-  fseek(file, 0, SEEK_END);
-  return static_cast<size_t>(ftell(file));
-}
-
-// Reads the entire content of a file as a string.
-std::string CapturedStream::ReadEntireFile(FILE* file) {
-  const size_t file_size = GetFileSize(file);
-  char* const buffer = new char[file_size];
-
-  size_t bytes_last_read = 0;  // # of bytes read in the last fread()
-  size_t bytes_read = 0;       // # of bytes read so far
-
-  fseek(file, 0, SEEK_SET);
-
-  // Keeps reading the file until we cannot read further or the
-  // pre-determined file size is reached.
-  do {
-    bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
-    bytes_read += bytes_last_read;
-  } while (bytes_last_read > 0 && bytes_read < file_size);
-
-  const std::string content(buffer, bytes_read);
-  delete[] buffer;
-
-  return content;
-}
-
-# ifdef _MSC_VER
-#  pragma warning(pop)
-# endif  // _MSC_VER
+GTEST_DISABLE_MSC_DEPRECATED_POP_()
 
-static CapturedStream* g_captured_stderr = NULL;
-static CapturedStream* g_captured_stdout = NULL;
+static CapturedStream* g_captured_stderr = nullptr;
+static CapturedStream* g_captured_stdout = nullptr;
 
 // Starts capturing an output stream (stdout/stderr).
-void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
-  if (*stream != NULL) {
+static void CaptureStream(int fd, const char* stream_name,
+                          CapturedStream** stream) {
+  if (*stream != nullptr) {
     GTEST_LOG_(FATAL) << "Only one " << stream_name
                       << " capturer can exist at a time.";
   }
@@ -8853,11 +11537,11 @@ void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
 }
 
 // Stops capturing the output stream and returns the captured string.
-std::string GetCapturedStream(CapturedStream** captured_stream) {
+static std::string GetCapturedStream(CapturedStream** captured_stream) {
   const std::string content = (*captured_stream)->GetCapturedString();
 
   delete *captured_stream;
-  *captured_stream = NULL;
+  *captured_stream = nullptr;
 
   return content;
 }
@@ -8884,25 +11568,61 @@ std::string GetCapturedStderr() {
 
 #endif  // GTEST_HAS_STREAM_REDIRECTION
 
-#if GTEST_HAS_DEATH_TEST
 
-// A copy of all command line arguments.  Set by InitGoogleTest().
-::std::vector<testing::internal::string> g_argvs;
 
-static const ::std::vector<testing::internal::string>* g_injected_test_argvs =
-                                        NULL;  // Owned.
 
-void SetInjectableArgvs(const ::std::vector<testing::internal::string>* argvs) {
-  if (g_injected_test_argvs != argvs)
-    delete g_injected_test_argvs;
-  g_injected_test_argvs = argvs;
+
+size_t GetFileSize(FILE* file) {
+  fseek(file, 0, SEEK_END);
+  return static_cast<size_t>(ftell(file));
+}
+
+std::string ReadEntireFile(FILE* file) {
+  const size_t file_size = GetFileSize(file);
+  char* const buffer = new char[file_size];
+
+  size_t bytes_last_read = 0;  // # of bytes read in the last fread()
+  size_t bytes_read = 0;       // # of bytes read so far
+
+  fseek(file, 0, SEEK_SET);
+
+  // Keeps reading the file until we cannot read further or the
+  // pre-determined file size is reached.
+  do {
+    bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
+    bytes_read += bytes_last_read;
+  } while (bytes_last_read > 0 && bytes_read < file_size);
+
+  const std::string content(buffer, bytes_read);
+  delete[] buffer;
+
+  return content;
 }
 
-const ::std::vector<testing::internal::string>& GetInjectableArgvs() {
-  if (g_injected_test_argvs != NULL) {
+#if GTEST_HAS_DEATH_TEST
+static const std::vector<std::string>* g_injected_test_argvs =
+    nullptr;  // Owned.
+
+std::vector<std::string> GetInjectableArgvs() {
+  if (g_injected_test_argvs != nullptr) {
     return *g_injected_test_argvs;
   }
-  return g_argvs;
+  return GetArgvs();
+}
+
+void SetInjectableArgvs(const std::vector<std::string>* new_argvs) {
+  if (g_injected_test_argvs != new_argvs) delete g_injected_test_argvs;
+  g_injected_test_argvs = new_argvs;
+}
+
+void SetInjectableArgvs(const std::vector<std::string>& new_argvs) {
+  SetInjectableArgvs(
+      new std::vector<std::string>(new_argvs.begin(), new_argvs.end()));
+}
+
+void ClearInjectableArgvs() {
+  delete g_injected_test_argvs;
+  g_injected_test_argvs = nullptr;
 }
 #endif  // GTEST_HAS_DEATH_TEST
 
@@ -8933,9 +11653,9 @@ static std::string FlagToEnvVar(const char* flag) {
 // Parses 'str' for a 32-bit signed integer.  If successful, writes
 // the result to *value and returns true; otherwise leaves *value
 // unchanged and returns false.
-bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
+bool ParseInt32(const Message& src_text, const char* str, int32_t* value) {
   // Parses the environment variable as a decimal integer.
-  char* end = NULL;
+  char* end = nullptr;
   const long long_value = strtol(str, &end, 10);  // NOLINT
 
   // Has strtol() consumed all characters in the string?
@@ -8950,13 +11670,13 @@ bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
     return false;
   }
 
-  // Is the parsed value in the range of an Int32?
-  const Int32 result = static_cast<Int32>(long_value);
+  // Is the parsed value in the range of an int32_t?
+  const auto result = static_cast<int32_t>(long_value);
   if (long_value == LONG_MAX || long_value == LONG_MIN ||
       // The parsed value overflows as a long.  (strtol() returns
       // LONG_MAX or LONG_MIN when the input overflows.)
       result != long_value
-      // The parsed value overflows as an Int32.
+      // The parsed value overflows as an int32_t.
       ) {
     Message msg;
     msg << "WARNING: " << src_text
@@ -8974,26 +11694,33 @@ bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
 // Reads and returns the Boolean environment variable corresponding to
 // the given flag; if it's not set, returns default_value.
 //
-// The value is considered true iff it's not "0".
+// The value is considered true if and only if it's not "0".
 bool BoolFromGTestEnv(const char* flag, bool default_value) {
+#if defined(GTEST_GET_BOOL_FROM_ENV_)
+  return GTEST_GET_BOOL_FROM_ENV_(flag, default_value);
+#else
   const std::string env_var = FlagToEnvVar(flag);
   const char* const string_value = posix::GetEnv(env_var.c_str());
-  return string_value == NULL ?
-      default_value : strcmp(string_value, "0") != 0;
+  return string_value == nullptr ? default_value
+                                 : strcmp(string_value, "0") != 0;
+#endif  // defined(GTEST_GET_BOOL_FROM_ENV_)
 }
 
 // Reads and returns a 32-bit integer stored in the environment
 // variable corresponding to the given flag; if it isn't set or
 // doesn't represent a valid 32-bit integer, returns default_value.
-Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
+int32_t Int32FromGTestEnv(const char* flag, int32_t default_value) {
+#if defined(GTEST_GET_INT32_FROM_ENV_)
+  return GTEST_GET_INT32_FROM_ENV_(flag, default_value);
+#else
   const std::string env_var = FlagToEnvVar(flag);
   const char* const string_value = posix::GetEnv(env_var.c_str());
-  if (string_value == NULL) {
+  if (string_value == nullptr) {
     // The environment variable is not set.
     return default_value;
   }
 
-  Int32 result = default_value;
+  int32_t result = default_value;
   if (!ParseInt32(Message() << "Environment variable " << env_var,
                   string_value, &result)) {
     printf("The default value %s is used.\n",
@@ -9003,14 +11730,36 @@ Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
   }
 
   return result;
+#endif  // defined(GTEST_GET_INT32_FROM_ENV_)
+}
+
+// As a special case for the 'output' flag, if GTEST_OUTPUT is not
+// set, we look for XML_OUTPUT_FILE, which is set by the Bazel build
+// system.  The value of XML_OUTPUT_FILE is a filename without the
+// "xml:" prefix of GTEST_OUTPUT.
+// Note that this is meant to be called at the call site so it does
+// not check that the flag is 'output'
+// In essence this checks an env variable called XML_OUTPUT_FILE
+// and if it is set we prepend "xml:" to its value, if it not set we return ""
+std::string OutputFlagAlsoCheckEnvVar(){
+  std::string default_value_for_output_flag = "";
+  const char* xml_output_file_env = posix::GetEnv("XML_OUTPUT_FILE");
+  if (nullptr != xml_output_file_env) {
+    default_value_for_output_flag = std::string("xml:") + xml_output_file_env;
+  }
+  return default_value_for_output_flag;
 }
 
 // Reads and returns the string environment variable corresponding to
 // the given flag; if it's not set, returns default_value.
 const char* StringFromGTestEnv(const char* flag, const char* default_value) {
+#if defined(GTEST_GET_STRING_FROM_ENV_)
+  return GTEST_GET_STRING_FROM_ENV_(flag, default_value);
+#else
   const std::string env_var = FlagToEnvVar(flag);
   const char* const value = posix::GetEnv(env_var.c_str());
-  return value == NULL ? default_value : value;
+  return value == nullptr ? default_value : value;
+#endif  // defined(GTEST_GET_STRING_FROM_ENV_)
 }
 
 }  // namespace internal
@@ -9043,10 +11792,9 @@ const char* StringFromGTestEnv(const char* flag, const char* default_value) {
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
 
-// Google Test - The Google C++ Testing Framework
+
+// Google Test - The Google C++ Testing and Mocking Framework
 //
 // This file implements a universal value printer that can print a
 // value of any type T:
@@ -9059,10 +11807,16 @@ const char* StringFromGTestEnv(const char* flag, const char* default_value) {
 // or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
 // defines Foo.
 
-#include <ctype.h>
+
 #include <stdio.h>
+
+#include <cctype>
+#include <cstdint>
+#include <cwchar>
 #include <ostream>  // NOLINT
 #include <string>
+#include <type_traits>
+
 
 namespace testing {
 
@@ -9071,6 +11825,10 @@ namespace {
 using ::std::ostream;
 
 // Prints a segment of bytes in the given object.
+GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
 void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
                                 size_t count, ostream* os) {
   char text[5] = "";
@@ -9100,7 +11858,6 @@ void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
   // If the object size is bigger than kThreshold, we'll have to omit
   // some details by printing only the first and the last kChunkSize
   // bytes.
-  // TODO(wan): let the user control the threshold using a flag.
   if (count < kThreshold) {
     PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
   } else {
@@ -9113,9 +11870,19 @@ void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
   *os << ">";
 }
 
+// Helpers for widening a character to char32_t. Since the standard does not
+// specify if char / wchar_t is signed or unsigned, it is important to first
+// convert it to the unsigned type of the same width before widening it to
+// char32_t.
+template <typename CharType>
+char32_t ToChar32(CharType in) {
+  return static_cast<char32_t>(
+      static_cast<typename std::make_unsigned<CharType>::type>(in));
+}
+
 }  // namespace
 
-namespace internal2 {
+namespace internal {
 
 // Delegates to PrintBytesInObjectToImpl() to print the bytes in the
 // given object.  The delegation simplifies the implementation, which
@@ -9127,14 +11894,10 @@ void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
   PrintBytesInObjectToImpl(obj_bytes, count, os);
 }
 
-}  // namespace internal2
-
-namespace internal {
-
 // Depending on the value of a char (or wchar_t), we print it in one
 // of three formats:
 //   - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
-//   - as a hexidecimal escape sequence (e.g. '\x7F'), or
+//   - as a hexadecimal escape sequence (e.g. '\x7F'), or
 //   - as a special escape sequence (e.g. '\r', '\n').
 enum CharFormat {
   kAsIs,
@@ -9145,17 +11908,15 @@ enum CharFormat {
 // Returns true if c is a printable ASCII character.  We test the
 // value of c directly instead of calling isprint(), which is buggy on
 // Windows Mobile.
-inline bool IsPrintableAscii(wchar_t c) {
-  return 0x20 <= c && c <= 0x7E;
-}
+inline bool IsPrintableAscii(char32_t c) { return 0x20 <= c && c <= 0x7E; }
 
-// Prints a wide or narrow char c as a character literal without the
-// quotes, escaping it when necessary; returns how c was formatted.
-// The template argument UnsignedChar is the unsigned version of Char,
-// which is the type of c.
-template <typename UnsignedChar, typename Char>
+// Prints c (of type char, char8_t, char16_t, char32_t, or wchar_t) as a
+// character literal without the quotes, escaping it when necessary; returns how
+// c was formatted.
+template <typename Char>
 static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
-  switch (static_cast<wchar_t>(c)) {
+  const char32_t u_c = ToChar32(c);
+  switch (u_c) {
     case L'\0':
       *os << "\\0";
       break;
@@ -9187,20 +11948,22 @@ static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
       *os << "\\v";
       break;
     default:
-      if (IsPrintableAscii(c)) {
+      if (IsPrintableAscii(u_c)) {
         *os << static_cast<char>(c);
         return kAsIs;
       } else {
-        *os << "\\x" + String::FormatHexInt(static_cast<UnsignedChar>(c));
+        ostream::fmtflags flags = os->flags();
+        *os << "\\x" << std::hex << std::uppercase << static_cast<int>(u_c);
+        os->flags(flags);
         return kHexEscape;
       }
   }
   return kSpecialEscape;
 }
 
-// Prints a wchar_t c as if it's part of a string literal, escaping it when
+// Prints a char32_t c as if it's part of a string literal, escaping it when
 // necessary; returns how c was formatted.
-static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
+static CharFormat PrintAsStringLiteralTo(char32_t c, ostream* os) {
   switch (c) {
     case L'\'':
       *os << "'";
@@ -9209,26 +11972,68 @@ static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
       *os << "\\\"";
       return kSpecialEscape;
     default:
-      return PrintAsCharLiteralTo<wchar_t>(c, os);
+      return PrintAsCharLiteralTo(c, os);
   }
 }
 
+static const char* GetCharWidthPrefix(char) {
+  return "";
+}
+
+static const char* GetCharWidthPrefix(signed char) {
+  return "";
+}
+
+static const char* GetCharWidthPrefix(unsigned char) {
+  return "";
+}
+
+#ifdef __cpp_char8_t
+static const char* GetCharWidthPrefix(char8_t) {
+  return "u8";
+}
+#endif
+
+static const char* GetCharWidthPrefix(char16_t) {
+  return "u";
+}
+
+static const char* GetCharWidthPrefix(char32_t) {
+  return "U";
+}
+
+static const char* GetCharWidthPrefix(wchar_t) {
+  return "L";
+}
+
 // Prints a char c as if it's part of a string literal, escaping it when
 // necessary; returns how c was formatted.
 static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
-  return PrintAsStringLiteralTo(
-      static_cast<wchar_t>(static_cast<unsigned char>(c)), os);
+  return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+
+#ifdef __cpp_char8_t
+static CharFormat PrintAsStringLiteralTo(char8_t c, ostream* os) {
+  return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+#endif
+
+static CharFormat PrintAsStringLiteralTo(char16_t c, ostream* os) {
+  return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+
+static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
+  return PrintAsStringLiteralTo(ToChar32(c), os);
 }
 
-// Prints a wide or narrow character c and its code.  '\0' is printed
-// as "'\\0'", other unprintable characters are also properly escaped
-// using the standard C++ escape sequence.  The template argument
-// UnsignedChar is the unsigned version of Char, which is the type of c.
-template <typename UnsignedChar, typename Char>
+// Prints a character c (of type char, char8_t, char16_t, char32_t, or wchar_t)
+// and its code. '\0' is printed as "'\\0'", other unprintable characters are
+// also properly escaped using the standard C++ escape sequence.
+template <typename Char>
 void PrintCharAndCodeTo(Char c, ostream* os) {
   // First, print c as a literal in the most readable form we can find.
-  *os << ((sizeof(c) > 1) ? "L'" : "'");
-  const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os);
+  *os << GetCharWidthPrefix(c) << "'";
+  const CharFormat format = PrintAsCharLiteralTo(c, os);
   *os << "'";
 
   // To aid user debugging, we also print c's code in decimal, unless
@@ -9238,56 +12043,70 @@ void PrintCharAndCodeTo(Char c, ostream* os) {
     return;
   *os << " (" << static_cast<int>(c);
 
-  // For more convenience, we print c's code again in hexidecimal,
+  // For more convenience, we print c's code again in hexadecimal,
   // unless c was already printed in the form '\x##' or the code is in
   // [1, 9].
   if (format == kHexEscape || (1 <= c && c <= 9)) {
     // Do nothing.
   } else {
-    *os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c));
+    *os << ", 0x" << String::FormatHexInt(static_cast<int>(c));
   }
   *os << ")";
 }
 
-void PrintTo(unsigned char c, ::std::ostream* os) {
-  PrintCharAndCodeTo<unsigned char>(c, os);
-}
-void PrintTo(signed char c, ::std::ostream* os) {
-  PrintCharAndCodeTo<unsigned char>(c, os);
-}
+void PrintTo(unsigned char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); }
+void PrintTo(signed char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); }
 
 // Prints a wchar_t as a symbol if it is printable or as its internal
 // code otherwise and also as its code.  L'\0' is printed as "L'\\0'".
-void PrintTo(wchar_t wc, ostream* os) {
-  PrintCharAndCodeTo<wchar_t>(wc, os);
+void PrintTo(wchar_t wc, ostream* os) { PrintCharAndCodeTo(wc, os); }
+
+// TODO(dcheng): Consider making this delegate to PrintCharAndCodeTo() as well.
+void PrintTo(char32_t c, ::std::ostream* os) {
+  *os << std::hex << "U+" << std::uppercase << std::setfill('0') << std::setw(4)
+      << static_cast<uint32_t>(c);
 }
 
 // Prints the given array of characters to the ostream.  CharType must be either
-// char or wchar_t.
+// char, char8_t, char16_t, char32_t, or wchar_t.
 // The array starts at begin, the length is len, it may include '\0' characters
 // and may not be NUL-terminated.
 template <typename CharType>
-static void PrintCharsAsStringTo(
+GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
+static CharFormat PrintCharsAsStringTo(
     const CharType* begin, size_t len, ostream* os) {
-  const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\"";
-  *os << kQuoteBegin;
+  const char* const quote_prefix = GetCharWidthPrefix(*begin);
+  *os << quote_prefix << "\"";
   bool is_previous_hex = false;
+  CharFormat print_format = kAsIs;
   for (size_t index = 0; index < len; ++index) {
     const CharType cur = begin[index];
     if (is_previous_hex && IsXDigit(cur)) {
       // Previous character is of '\x..' form and this character can be
       // interpreted as another hexadecimal digit in its number. Break string to
       // disambiguate.
-      *os << "\" " << kQuoteBegin;
+      *os << "\" " << quote_prefix << "\"";
     }
     is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
+    // Remember if any characters required hex escaping.
+    if (is_previous_hex) {
+      print_format = kHexEscape;
+    }
   }
   *os << "\"";
+  return print_format;
 }
 
 // Prints a (const) char/wchar_t array of 'len' elements, starting at address
 // 'begin'.  CharType must be either char or wchar_t.
 template <typename CharType>
+GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
 static void UniversalPrintCharArray(
     const CharType* begin, size_t len, ostream* os) {
   // The code
@@ -9315,22 +12134,57 @@ void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
   UniversalPrintCharArray(begin, len, os);
 }
 
+#ifdef __cpp_char8_t
+// Prints a (const) char8_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const char8_t* begin, size_t len, ostream* os) {
+  UniversalPrintCharArray(begin, len, os);
+}
+#endif
+
+// Prints a (const) char16_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const char16_t* begin, size_t len, ostream* os) {
+  UniversalPrintCharArray(begin, len, os);
+}
+
+// Prints a (const) char32_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const char32_t* begin, size_t len, ostream* os) {
+  UniversalPrintCharArray(begin, len, os);
+}
+
 // Prints a (const) wchar_t array of 'len' elements, starting at address
 // 'begin'.
 void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
   UniversalPrintCharArray(begin, len, os);
 }
 
-// Prints the given C string to the ostream.
-void PrintTo(const char* s, ostream* os) {
-  if (s == NULL) {
+namespace {
+
+// Prints a null-terminated C-style string to the ostream.
+template <typename Char>
+void PrintCStringTo(const Char* s, ostream* os) {
+  if (s == nullptr) {
     *os << "NULL";
   } else {
     *os << ImplicitCast_<const void*>(s) << " pointing to ";
-    PrintCharsAsStringTo(s, strlen(s), os);
+    PrintCharsAsStringTo(s, std::char_traits<Char>::length(s), os);
   }
 }
 
+}  // anonymous namespace
+
+void PrintTo(const char* s, ostream* os) { PrintCStringTo(s, os); }
+
+#ifdef __cpp_char8_t
+void PrintTo(const char8_t* s, ostream* os) { PrintCStringTo(s, os); }
+#endif
+
+void PrintTo(const char16_t* s, ostream* os) { PrintCStringTo(s, os); }
+
+void PrintTo(const char32_t* s, ostream* os) { PrintCStringTo(s, os); }
+
 // MSVC compiler can be configured to define whar_t as a typedef
 // of unsigned short. Defining an overload for const wchar_t* in that case
 // would cause pointers to unsigned shorts be printed as wide strings,
@@ -9339,33 +12193,97 @@ void PrintTo(const char* s, ostream* os) {
 // wchar_t is implemented as a native type.
 #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
 // Prints the given wide C string to the ostream.
-void PrintTo(const wchar_t* s, ostream* os) {
-  if (s == NULL) {
-    *os << "NULL";
-  } else {
-    *os << ImplicitCast_<const void*>(s) << " pointing to ";
-    PrintCharsAsStringTo(s, wcslen(s), os);
+void PrintTo(const wchar_t* s, ostream* os) { PrintCStringTo(s, os); }
+#endif  // wchar_t is native
+
+namespace {
+
+bool ContainsUnprintableControlCodes(const char* str, size_t length) {
+  const unsigned char *s = reinterpret_cast<const unsigned char *>(str);
+
+  for (size_t i = 0; i < length; i++) {
+    unsigned char ch = *s++;
+    if (std::iscntrl(ch)) {
+        switch (ch) {
+        case '\t':
+        case '\n':
+        case '\r':
+          break;
+        default:
+          return true;
+        }
+      }
   }
+  return false;
 }
-#endif  // wchar_t is native
 
-// Prints a ::string object.
-#if GTEST_HAS_GLOBAL_STRING
-void PrintStringTo(const ::string& s, ostream* os) {
-  PrintCharsAsStringTo(s.data(), s.size(), os);
+bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; }
+
+bool IsValidUTF8(const char* str, size_t length) {
+  const unsigned char *s = reinterpret_cast<const unsigned char *>(str);
+
+  for (size_t i = 0; i < length;) {
+    unsigned char lead = s[i++];
+
+    if (lead <= 0x7f) {
+      continue;  // single-byte character (ASCII) 0..7F
+    }
+    if (lead < 0xc2) {
+      return false;  // trail byte or non-shortest form
+    } else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) {
+      ++i;  // 2-byte character
+    } else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length &&
+               IsUTF8TrailByte(s[i]) &&
+               IsUTF8TrailByte(s[i + 1]) &&
+               // check for non-shortest form and surrogate
+               (lead != 0xe0 || s[i] >= 0xa0) &&
+               (lead != 0xed || s[i] < 0xa0)) {
+      i += 2;  // 3-byte character
+    } else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
+               IsUTF8TrailByte(s[i]) &&
+               IsUTF8TrailByte(s[i + 1]) &&
+               IsUTF8TrailByte(s[i + 2]) &&
+               // check for non-shortest form
+               (lead != 0xf0 || s[i] >= 0x90) &&
+               (lead != 0xf4 || s[i] < 0x90)) {
+      i += 3;  // 4-byte character
+    } else {
+      return false;
+    }
+  }
+  return true;
 }
-#endif  // GTEST_HAS_GLOBAL_STRING
+
+void ConditionalPrintAsText(const char* str, size_t length, ostream* os) {
+  if (!ContainsUnprintableControlCodes(str, length) &&
+      IsValidUTF8(str, length)) {
+    *os << "\n    As Text: \"" << str << "\"";
+  }
+}
+
+}  // anonymous namespace
 
 void PrintStringTo(const ::std::string& s, ostream* os) {
+  if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) {
+    if (GTEST_FLAG(print_utf8)) {
+      ConditionalPrintAsText(s.data(), s.size(), os);
+    }
+  }
+}
+
+#ifdef __cpp_char8_t
+void PrintU8StringTo(const ::std::u8string& s, ostream* os) {
+  PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif
+
+void PrintU16StringTo(const ::std::u16string& s, ostream* os) {
   PrintCharsAsStringTo(s.data(), s.size(), os);
 }
 
-// Prints a ::wstring object.
-#if GTEST_HAS_GLOBAL_WSTRING
-void PrintWideStringTo(const ::wstring& s, ostream* os) {
+void PrintU32StringTo(const ::std::u32string& s, ostream* os) {
   PrintCharsAsStringTo(s.data(), s.size(), os);
 }
-#endif  // GTEST_HAS_GLOBAL_WSTRING
 
 #if GTEST_HAS_STD_WSTRING
 void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
@@ -9404,19 +12322,11 @@ void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 //
-// Author: mheule@google.com (Markus Heule)
-//
-// The Google C++ Testing Framework (Google Test)
+// The Google C++ Testing and Mocking Framework (Google Test)
 
 
-// Indicates that this translation unit is part of Google Test's
-// implementation.  It must come before gtest-internal-inl.h is
-// included, or there will be a compiler error.  This trick is to
-// prevent a user from accidentally including gtest-internal-inl.h in
-// his code.
-#define GTEST_IMPLEMENTATION_ 1
-#undef GTEST_IMPLEMENTATION_
 
 namespace testing {
 
@@ -9426,18 +12336,23 @@ using internal::GetUnitTestImpl;
 // in it.
 std::string TestPartResult::ExtractSummary(const char* message) {
   const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
-  return stack_trace == NULL ? message :
-      std::string(message, stack_trace);
+  return stack_trace == nullptr ? message : std::string(message, stack_trace);
 }
 
 // Prints a TestPartResult object.
 std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
-  return os
-      << result.file_name() << ":" << result.line_number() << ": "
-      << (result.type() == TestPartResult::kSuccess ? "Success" :
-          result.type() == TestPartResult::kFatalFailure ? "Fatal failure" :
-          "Non-fatal failure") << ":\n"
-      << result.message() << std::endl;
+  return os << internal::FormatFileLocation(result.file_name(),
+                                            result.line_number())
+            << " "
+            << (result.type() == TestPartResult::kSuccess
+                    ? "Success"
+                    : result.type() == TestPartResult::kSkip
+                          ? "Skipped"
+                          : result.type() == TestPartResult::kFatalFailure
+                                ? "Fatal failure"
+                                : "Non-fatal failure")
+            << ":\n"
+            << result.message() << std::endl;
 }
 
 // Appends a TestPartResult to the array.
@@ -9452,7 +12367,7 @@ const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
     internal::posix::Abort();
   }
 
-  return array_[index];
+  return array_[static_cast<size_t>(index)];
 }
 
 // Returns the number of TestPartResult objects in the array.
@@ -9512,15 +12427,13 @@ void HasNewFatalFailureHelper::ReportTestPartResult(
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
+
+
 
 
 namespace testing {
 namespace internal {
 
-#if GTEST_HAS_TYPED_TEST_P
-
 // Skips to the first non-space char in str. Returns an empty string if str
 // contains only whitespace characters.
 static const char* SkipSpaces(const char* str) {
@@ -9529,51 +12442,52 @@ static const char* SkipSpaces(const char* str) {
   return str;
 }
 
+static std::vector<std::string> SplitIntoTestNames(const char* src) {
+  std::vector<std::string> name_vec;
+  src = SkipSpaces(src);
+  for (; src != nullptr; src = SkipComma(src)) {
+    name_vec.push_back(StripTrailingSpaces(GetPrefixUntilComma(src)));
+  }
+  return name_vec;
+}
+
 // Verifies that registered_tests match the test names in
-// defined_test_names_; returns registered_tests if successful, or
+// registered_tests_; returns registered_tests if successful, or
 // aborts the program otherwise.
-const char* TypedTestCasePState::VerifyRegisteredTestNames(
-    const char* file, int line, const char* registered_tests) {
-  typedef ::std::set<const char*>::const_iterator DefinedTestIter;
+const char* TypedTestSuitePState::VerifyRegisteredTestNames(
+    const char* test_suite_name, const char* file, int line,
+    const char* registered_tests) {
+  RegisterTypeParameterizedTestSuite(test_suite_name, CodeLocation(file, line));
+
+  typedef RegisteredTestsMap::const_iterator RegisteredTestIter;
   registered_ = true;
 
-  // Skip initial whitespace in registered_tests since some
-  // preprocessors prefix stringizied literals with whitespace.
-  registered_tests = SkipSpaces(registered_tests);
+  std::vector<std::string> name_vec = SplitIntoTestNames(registered_tests);
 
   Message errors;
-  ::std::set<std::string> tests;
-  for (const char* names = registered_tests; names != NULL;
-       names = SkipComma(names)) {
-    const std::string name = GetPrefixUntilComma(names);
+
+  std::set<std::string> tests;
+  for (std::vector<std::string>::const_iterator name_it = name_vec.begin();
+       name_it != name_vec.end(); ++name_it) {
+    const std::string& name = *name_it;
     if (tests.count(name) != 0) {
       errors << "Test " << name << " is listed more than once.\n";
       continue;
     }
 
-    bool found = false;
-    for (DefinedTestIter it = defined_test_names_.begin();
-         it != defined_test_names_.end();
-         ++it) {
-      if (name == *it) {
-        found = true;
-        break;
-      }
-    }
-
-    if (found) {
+    if (registered_tests_.count(name) != 0) {
       tests.insert(name);
     } else {
       errors << "No test named " << name
-             << " can be found in this test case.\n";
+             << " can be found in this test suite.\n";
     }
   }
 
-  for (DefinedTestIter it = defined_test_names_.begin();
-       it != defined_test_names_.end();
+  for (RegisteredTestIter it = registered_tests_.begin();
+       it != registered_tests_.end();
        ++it) {
-    if (tests.count(*it) == 0) {
-      errors << "You forgot to list test " << *it << ".\n";
+    if (tests.count(it->first) == 0) {
+      errors << "You forgot to list test " << it->first << ".\n";
     }
   }
 
@@ -9588,7 +12502,5 @@ const char* TypedTestCasePState::VerifyRegisteredTestNames(
   return registered_tests;
 }
 
-#endif  // GTEST_HAS_TYPED_TEST_P
-
 }  // namespace internal
 }  // namespace testing
diff --git a/packages/kokkos/tpls/gtest/gtest/gtest-test-part.h b/packages/kokkos/tpls/gtest/gtest/gtest-test-part.h
deleted file mode 120000
index 48d39090f1cabfc4a852d54e0e1f186362eeb1f5..0000000000000000000000000000000000000000
--- a/packages/kokkos/tpls/gtest/gtest/gtest-test-part.h
+++ /dev/null
@@ -1 +0,0 @@
-gtest.h
\ No newline at end of file
diff --git a/packages/kokkos/tpls/gtest/gtest/gtest.h b/packages/kokkos/tpls/gtest/gtest/gtest.h
index f39d0b87c90f73113e44c7dec9b2cdac9088644e..e7490573ac5fbf630111d2ac9d63ad1906f6e2a9 100644
--- a/packages/kokkos/tpls/gtest/gtest/gtest.h
+++ b/packages/kokkos/tpls/gtest/gtest/gtest.h
@@ -26,10 +26,9 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 //
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
+// The Google C++ Testing and Mocking Framework (Google Test)
 //
 // This header file defines the public API for Google Test.  It should be
 // included by any test program that uses Google Test.
@@ -48,15 +47,16 @@
 // registration from Barthelemy Dagenais' (barthelemy@prologique.com)
 // easyUnit framework.
 
-#ifdef __GNUC__
-#pragma GCC system_header
-#endif
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_H_
 
+#include <cstddef>
 #include <limits>
+#include <memory>
 #include <ostream>
+#include <type_traits>
 #include <vector>
 
 // Copyright 2005, Google Inc.
@@ -88,15 +88,15 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
-// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
-//
-// The Google C++ Testing Framework (Google Test)
+// The Google C++ Testing and Mocking Framework (Google Test)
 //
 // This header file declares functions and macros used internally by
 // Google Test.  They are subject to change without notice.
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
 
 // Copyright 2005, Google Inc.
 // All rights reserved.
@@ -127,33 +127,51 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
-// Authors: wan@google.com (Zhanyong Wan)
-//
 // Low-level types and utilities for porting Google Test to various
-// platforms.  They are subject to change without notice.  DO NOT USE
-// THEM IN USER CODE.
+// platforms.  All macros ending with _ and symbols defined in an
+// internal namespace are subject to change without notice.  Code
+// outside Google Test MUST NOT USE THEM DIRECTLY.  Macros that don't
+// end with _ are part of Google Test's public API and can be used by
+// code outside Google Test.
 //
 // This file is fundamental to Google Test.  All other Google Test source
 // files are expected to #include this.  Therefore, it cannot #include
 // any other Google Test header.
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
 
-// The user can define the following macros in the build script to
-// control Google Test's behavior.  If the user doesn't define a macro
-// in this list, Google Test will define it.
+// Environment-describing macros
+// -----------------------------
+//
+// Google Test can be used in many different environments.  Macros in
+// this section tell Google Test what kind of environment it is being
+// used in, such that Google Test can provide environment-specific
+// features and implementations.
+//
+// Google Test tries to automatically detect the properties of its
+// environment, so users usually don't need to worry about these
+// macros.  However, the automatic detection is not perfect.
+// Sometimes it's necessary for a user to define some of the following
+// macros in the build script to override Google Test's decisions.
+//
+// If the user doesn't define a macro in the list, Google Test will
+// provide a default definition.  After this header is #included, all
+// macros in this list will be defined to either 1 or 0.
+//
+// Notes to maintainers:
+//   - Each macro here is a user-tweakable knob; do not grow the list
+//     lightly.
+//   - Use #if to key off these macros.  Don't use #ifdef or "#if
+//     defined(...)", which will not work as these macros are ALWAYS
+//     defined.
 //
 //   GTEST_HAS_CLONE          - Define it to 1/0 to indicate that clone(2)
 //                              is/isn't available.
 //   GTEST_HAS_EXCEPTIONS     - Define it to 1/0 to indicate that exceptions
 //                              are enabled.
-//   GTEST_HAS_GLOBAL_STRING  - Define it to 1/0 to indicate that ::string
-//                              is/isn't available (some systems define
-//                              ::string, which is different to std::string).
-//   GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string
-//                              is/isn't available (some systems define
-//                              ::wstring, which is different to std::wstring).
 //   GTEST_HAS_POSIX_RE       - Define it to 1/0 to indicate that POSIX regular
 //                              expressions are/aren't available.
 //   GTEST_HAS_PTHREAD        - Define it to 1/0 to indicate that <pthread.h>
@@ -163,8 +181,6 @@
 //   GTEST_HAS_STD_WSTRING    - Define it to 1/0 to indicate that
 //                              std::wstring does/doesn't work (Google Test can
 //                              be used where std::wstring is unavailable).
-//   GTEST_HAS_TR1_TUPLE      - Define it to 1/0 to indicate tr1::tuple
-//                              is/isn't available.
 //   GTEST_HAS_SEH            - Define it to 1/0 to indicate whether the
 //                              compiler supports Microsoft's "Structured
 //                              Exception Handling".
@@ -172,12 +188,6 @@
 //                            - Define it to 1/0 to indicate whether the
 //                              platform supports I/O stream redirection using
 //                              dup() and dup2().
-//   GTEST_USE_OWN_TR1_TUPLE  - Define it to 1/0 to indicate whether Google
-//                              Test's own tr1 tuple implementation should be
-//                              used.  Unused when the user sets
-//                              GTEST_HAS_TR1_TUPLE to 0.
-//   GTEST_LANG_CXX11         - Define it to 1/0 to indicate that Google Test
-//                              is building in C++11/C++98 mode.
 //   GTEST_LINKED_AS_SHARED_LIBRARY
 //                            - Define to 1 when compiling tests that use
 //                              Google Test as a shared library (known as
@@ -185,83 +195,133 @@
 //   GTEST_CREATE_SHARED_LIBRARY
 //                            - Define to 1 when compiling Google Test itself
 //                              as a shared library.
-
-// This header defines the following utilities:
+//   GTEST_DEFAULT_DEATH_TEST_STYLE
+//                            - The default value of --gtest_death_test_style.
+//                              The legacy default has been "fast" in the open
+//                              source version since 2008. The recommended value
+//                              is "threadsafe", and can be set in
+//                              custom/gtest-port.h.
+
+// Platform-indicating macros
+// --------------------------
+//
+// Macros indicating the platform on which Google Test is being used
+// (a macro is defined to 1 if compiled on the given platform;
+// otherwise UNDEFINED -- it's never defined to 0.).  Google Test
+// defines these macros automatically.  Code outside Google Test MUST
+// NOT define them.
 //
-// Macros indicating the current platform (defined to 1 if compiled on
-// the given platform; otherwise undefined):
 //   GTEST_OS_AIX      - IBM AIX
 //   GTEST_OS_CYGWIN   - Cygwin
+//   GTEST_OS_DRAGONFLY - DragonFlyBSD
+//   GTEST_OS_FREEBSD  - FreeBSD
+//   GTEST_OS_FUCHSIA  - Fuchsia
+//   GTEST_OS_GNU_KFREEBSD - GNU/kFreeBSD
+//   GTEST_OS_HAIKU    - Haiku
 //   GTEST_OS_HPUX     - HP-UX
 //   GTEST_OS_LINUX    - Linux
 //     GTEST_OS_LINUX_ANDROID - Google Android
 //   GTEST_OS_MAC      - Mac OS X
 //     GTEST_OS_IOS    - iOS
-//       GTEST_OS_IOS_SIMULATOR - iOS simulator
 //   GTEST_OS_NACL     - Google Native Client (NaCl)
+//   GTEST_OS_NETBSD   - NetBSD
 //   GTEST_OS_OPENBSD  - OpenBSD
+//   GTEST_OS_OS2      - OS/2
 //   GTEST_OS_QNX      - QNX
 //   GTEST_OS_SOLARIS  - Sun Solaris
-//   GTEST_OS_SYMBIAN  - Symbian
 //   GTEST_OS_WINDOWS  - Windows (Desktop, MinGW, or Mobile)
 //     GTEST_OS_WINDOWS_DESKTOP  - Windows Desktop
 //     GTEST_OS_WINDOWS_MINGW    - MinGW
 //     GTEST_OS_WINDOWS_MOBILE   - Windows Mobile
+//     GTEST_OS_WINDOWS_PHONE    - Windows Phone
+//     GTEST_OS_WINDOWS_RT       - Windows Store App/WinRT
 //   GTEST_OS_ZOS      - z/OS
 //
-// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the
+// Among the platforms, Cygwin, Linux, Mac OS X, and Windows have the
 // most stable support.  Since core members of the Google Test project
 // don't have access to other platforms, support for them may be less
 // stable.  If you notice any problems on your platform, please notify
 // googletestframework@googlegroups.com (patches for fixing them are
 // even more welcome!).
 //
-// Note that it is possible that none of the GTEST_OS_* macros are defined.
+// It is possible that none of the GTEST_OS_* macros are defined.
+
+// Feature-indicating macros
+// -------------------------
+//
+// Macros indicating which Google Test features are available (a macro
+// is defined to 1 if the corresponding feature is supported;
+// otherwise UNDEFINED -- it's never defined to 0.).  Google Test
+// defines these macros automatically.  Code outside Google Test MUST
+// NOT define them.
+//
+// These macros are public so that portable tests can be written.
+// Such tests typically surround code using a feature with an #if
+// which controls that code.  For example:
+//
+// #if GTEST_HAS_DEATH_TEST
+//   EXPECT_DEATH(DoSomethingDeadly());
+// #endif
 //
-// Macros indicating available Google Test features (defined to 1 if
-// the corresponding feature is supported; otherwise undefined):
-//   GTEST_HAS_COMBINE      - the Combine() function (for value-parameterized
-//                            tests)
 //   GTEST_HAS_DEATH_TEST   - death tests
-//   GTEST_HAS_PARAM_TEST   - value-parameterized tests
 //   GTEST_HAS_TYPED_TEST   - typed tests
 //   GTEST_HAS_TYPED_TEST_P - type-parameterized tests
+//   GTEST_IS_THREADSAFE    - Google Test is thread-safe.
+//   GOOGLETEST_CM0007 DO NOT DELETE
 //   GTEST_USES_POSIX_RE    - enhanced POSIX regex is used. Do not confuse with
 //                            GTEST_HAS_POSIX_RE (see above) which users can
 //                            define themselves.
 //   GTEST_USES_SIMPLE_RE   - our own simple regex is used;
-//                            the above two are mutually exclusive.
-//   GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ().
+//                            the above RE\b(s) are mutually exclusive.
+
+// Misc public macros
+// ------------------
+//
+//   GTEST_FLAG(flag_name)  - references the variable corresponding to
+//                            the given Google Test flag.
+
+// Internal utilities
+// ------------------
+//
+// The following macros and utilities are for Google Test's INTERNAL
+// use only.  Code outside Google Test MUST NOT USE THEM DIRECTLY.
 //
 // Macros for basic C++ coding:
 //   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
 //   GTEST_ATTRIBUTE_UNUSED_  - declares that a class' instances or a
 //                              variable don't have to be used.
-//   GTEST_DISALLOW_ASSIGN_   - disables operator=.
+//   GTEST_DISALLOW_ASSIGN_   - disables copy operator=.
 //   GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
+//   GTEST_DISALLOW_MOVE_ASSIGN_   - disables move operator=.
+//   GTEST_DISALLOW_MOVE_AND_ASSIGN_ - disables move ctor and operator=.
 //   GTEST_MUST_USE_RESULT_   - declares that a function's result must be used.
+//   GTEST_INTENTIONAL_CONST_COND_PUSH_ - start code section where MSVC C4127 is
+//                                        suppressed (constant conditional).
+//   GTEST_INTENTIONAL_CONST_COND_POP_  - finish code section where MSVC C4127
+//                                        is suppressed.
+//   GTEST_INTERNAL_HAS_ANY - for enabling UniversalPrinter<std::any> or
+//                            UniversalPrinter<absl::any> specializations.
+//   GTEST_INTERNAL_HAS_OPTIONAL - for enabling UniversalPrinter<std::optional>
+//   or
+//                                 UniversalPrinter<absl::optional>
+//                                 specializations.
+//   GTEST_INTERNAL_HAS_STRING_VIEW - for enabling Matcher<std::string_view> or
+//                                    Matcher<absl::string_view>
+//                                    specializations.
+//   GTEST_INTERNAL_HAS_VARIANT - for enabling UniversalPrinter<std::variant> or
+//                                UniversalPrinter<absl::variant>
+//                                specializations.
 //
 // Synchronization:
 //   Mutex, MutexLock, ThreadLocal, GetThreadCount()
-//                  - synchronization primitives.
-//   GTEST_IS_THREADSAFE - defined to 1 to indicate that the above
-//                         synchronization primitives have real implementations
-//                         and Google Test is thread-safe; or 0 otherwise.
-//
-// Template meta programming:
-//   is_pointer     - as in TR1; needed on Symbian and IBM XL C/C++ only.
-//   IteratorTraits - partial implementation of std::iterator_traits, which
-//                    is not available in libCstd when compiled with Sun C++.
-//
-// Smart pointers:
-//   scoped_ptr     - as in TR2.
+//                            - synchronization primitives.
 //
 // Regular expressions:
 //   RE             - a simple regular expression class using the POSIX
-//                    Extended Regular Expression syntax on UNIX-like
-//                    platforms, or a reduced regular exception syntax on
-//                    other platforms, including Windows.
-//
+//                    Extended Regular Expression syntax on UNIX-like platforms
+//                    GOOGLETEST_CM0008 DO NOT DELETE
+//                    or a reduced regular exception syntax on other
+//                    platforms, including Windows.
 // Logging:
 //   GTEST_LOG_()   - logs messages at the specified severity level.
 //   LogToStderr()  - directs all log messages to stderr.
@@ -277,12 +337,10 @@
 //
 // Integer types:
 //   TypeWithSize   - maps an integer to a int type.
-//   Int32, UInt32, Int64, UInt64, TimeInMillis
-//                  - integers of known sizes.
+//   TimeInMillis   - integers of known sizes.
 //   BiggestInt     - the biggest signed integer type.
 //
 // Command-line utilities:
-//   GTEST_FLAG()       - references a flag.
 //   GTEST_DECLARE_*()  - declares a flag.
 //   GTEST_DEFINE_*()   - defines a flag.
 //   GetInjectableArgvs() - returns the command line as a vector of strings.
@@ -290,14 +348,25 @@
 // Environment variable utilities:
 //   GetEnv()             - gets the value of an environment variable.
 //   BoolFromGTestEnv()   - parses a bool environment variable.
-//   Int32FromGTestEnv()  - parses an Int32 environment variable.
+//   Int32FromGTestEnv()  - parses an int32_t environment variable.
 //   StringFromGTestEnv() - parses a string environment variable.
+//
+// Deprecation warnings:
+//   GTEST_INTERNAL_DEPRECATED(message) - attribute marking a function as
+//                                        deprecated; calling a marked function
+//                                        should generate a compiler warning
 
 #include <ctype.h>   // for isspace, etc
 #include <stddef.h>  // for ptrdiff_t
-#include <stdlib.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
+
+#include <cerrno>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
 #ifndef _WIN32_WCE
 # include <sys/types.h>
 # include <sys/stat.h>
@@ -309,45 +378,131 @@
 #endif
 
 #include <iostream>  // NOLINT
-#include <sstream>  // NOLINT
+#include <locale>
+#include <memory>
 #include <string>  // NOLINT
+#include <tuple>
+#include <vector>  // NOLINT
 
-#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
-#define GTEST_FLAG_PREFIX_ "gtest_"
-#define GTEST_FLAG_PREFIX_DASH_ "gtest-"
-#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
-#define GTEST_NAME_ "Google Test"
-#define GTEST_PROJECT_URL_ "http://code.google.com/p/googletest/"
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Injection point for custom user configurations. See README for details
+//
+// ** Custom implementation starts here **
 
-// Determines the version of gcc that is used to compile this.
-#ifdef __GNUC__
-// 40302 means version 4.3.2.
-# define GTEST_GCC_VER_ \
-    (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
-#endif  // __GNUC__
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the GTEST_OS_* macro.
+// It is separate from gtest-port.h so that custom/gtest-port.h can include it.
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
 
 // Determines the platform on which Google Test is compiled.
 #ifdef __CYGWIN__
 # define GTEST_OS_CYGWIN 1
-#elif defined __SYMBIAN32__
-# define GTEST_OS_SYMBIAN 1
+# elif defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)
+#  define GTEST_OS_WINDOWS_MINGW 1
+#  define GTEST_OS_WINDOWS 1
 #elif defined _WIN32
 # define GTEST_OS_WINDOWS 1
 # ifdef _WIN32_WCE
 #  define GTEST_OS_WINDOWS_MOBILE 1
-# elif defined(__MINGW__) || defined(__MINGW32__)
-#  define GTEST_OS_WINDOWS_MINGW 1
+# elif defined(WINAPI_FAMILY)
+#  include <winapifamily.h>
+#  if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#   define GTEST_OS_WINDOWS_DESKTOP 1
+#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
+#   define GTEST_OS_WINDOWS_PHONE 1
+#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
+#   define GTEST_OS_WINDOWS_RT 1
+#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_TV_TITLE)
+#   define GTEST_OS_WINDOWS_PHONE 1
+#   define GTEST_OS_WINDOWS_TV_TITLE 1
+#  else
+    // WINAPI_FAMILY defined but no known partition matched.
+    // Default to desktop.
+#   define GTEST_OS_WINDOWS_DESKTOP 1
+#  endif
 # else
 #  define GTEST_OS_WINDOWS_DESKTOP 1
 # endif  // _WIN32_WCE
+#elif defined __OS2__
+# define GTEST_OS_OS2 1
 #elif defined __APPLE__
 # define GTEST_OS_MAC 1
+# include <TargetConditionals.h>
 # if TARGET_OS_IPHONE
 #  define GTEST_OS_IOS 1
-#  if TARGET_IPHONE_SIMULATOR
-#   define GTEST_OS_IOS_SIMULATOR 1
-#  endif
 # endif
+#elif defined __DragonFly__
+# define GTEST_OS_DRAGONFLY 1
+#elif defined __FreeBSD__
+# define GTEST_OS_FREEBSD 1
+#elif defined __Fuchsia__
+# define GTEST_OS_FUCHSIA 1
+#elif defined(__GLIBC__) && defined(__FreeBSD_kernel__)
+# define GTEST_OS_GNU_KFREEBSD 1
 #elif defined __linux__
 # define GTEST_OS_LINUX 1
 # if defined __ANDROID__
@@ -363,55 +518,128 @@
 # define GTEST_OS_HPUX 1
 #elif defined __native_client__
 # define GTEST_OS_NACL 1
+#elif defined __NetBSD__
+# define GTEST_OS_NETBSD 1
 #elif defined __OpenBSD__
 # define GTEST_OS_OPENBSD 1
 #elif defined __QNX__
 # define GTEST_OS_QNX 1
+#elif defined(__HAIKU__)
+#define GTEST_OS_HAIKU 1
+#elif defined ESP8266
+#define GTEST_OS_ESP8266 1
+#elif defined ESP32
+#define GTEST_OS_ESP32 1
+#elif defined(__XTENSA__)
+#define GTEST_OS_XTENSA 1
 #endif  // __CYGWIN__
 
-#ifndef GTEST_LANG_CXX11
-// gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when
-// -std={c,gnu}++{0x,11} is passed.  The C++11 standard specifies a
-// value for __cplusplus, and recent versions of clang, gcc, and
-// probably other compilers set that too in C++11 mode.
-# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L
-// Compiling in at least C++11 mode.
-#  define GTEST_LANG_CXX11 1
-# else
-#  define GTEST_LANG_CXX11 0
-# endif
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+
+#if !defined(GTEST_DEV_EMAIL_)
+# define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
+# define GTEST_FLAG_PREFIX_ "gtest_"
+# define GTEST_FLAG_PREFIX_DASH_ "gtest-"
+# define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
+# define GTEST_NAME_ "Google Test"
+# define GTEST_PROJECT_URL_ "https://github.com/google/googletest/"
+#endif  // !defined(GTEST_DEV_EMAIL_)
+
+#if !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
+# define GTEST_INIT_GOOGLE_TEST_NAME_ "testing::InitGoogleTest"
+#endif  // !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
+
+// Determines the version of gcc that is used to compile this.
+#ifdef __GNUC__
+// 40302 means version 4.3.2.
+# define GTEST_GCC_VER_ \
+    (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
+#endif  // __GNUC__
+
+// Macros for disabling Microsoft Visual C++ warnings.
+//
+//   GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 4385)
+//   /* code that triggers warnings C4800 and C4385 */
+//   GTEST_DISABLE_MSC_WARNINGS_POP_()
+#if defined(_MSC_VER)
+# define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings) \
+    __pragma(warning(push))                        \
+    __pragma(warning(disable: warnings))
+# define GTEST_DISABLE_MSC_WARNINGS_POP_()          \
+    __pragma(warning(pop))
+#else
+// Not all compilers are MSVC
+# define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings)
+# define GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif
+
+// Clang on Windows does not understand MSVC's pragma warning.
+// We need clang-specific way to disable function deprecation warning.
+#ifdef __clang__
+# define GTEST_DISABLE_MSC_DEPRECATED_PUSH_()                         \
+    _Pragma("clang diagnostic push")                                  \
+    _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") \
+    _Pragma("clang diagnostic ignored \"-Wdeprecated-implementations\"")
+#define GTEST_DISABLE_MSC_DEPRECATED_POP_() \
+    _Pragma("clang diagnostic pop")
+#else
+# define GTEST_DISABLE_MSC_DEPRECATED_PUSH_() \
+    GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996)
+# define GTEST_DISABLE_MSC_DEPRECATED_POP_() \
+    GTEST_DISABLE_MSC_WARNINGS_POP_()
 #endif
 
 // Brings in definitions for functions used in the testing::internal::posix
 // namespace (read, write, close, chdir, isatty, stat). We do not currently
 // use them on Windows Mobile.
-#if !GTEST_OS_WINDOWS
+#if GTEST_OS_WINDOWS
+# if !GTEST_OS_WINDOWS_MOBILE
+#  include <direct.h>
+#  include <io.h>
+# endif
+// In order to avoid having to include <windows.h>, use forward declaration
+#if GTEST_OS_WINDOWS_MINGW && !defined(__MINGW64_VERSION_MAJOR)
+// MinGW defined _CRITICAL_SECTION and _RTL_CRITICAL_SECTION as two
+// separate (equivalent) structs, instead of using typedef
+typedef struct _CRITICAL_SECTION GTEST_CRITICAL_SECTION;
+#else
+// Assume CRITICAL_SECTION is a typedef of _RTL_CRITICAL_SECTION.
+// This assumption is verified by
+// WindowsTypesTest.CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION.
+typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
+#endif
+#elif GTEST_OS_XTENSA
+#include <unistd.h>
+// Xtensa toolchains define strcasecmp in the string.h header instead of
+// strings.h. string.h is already included.
+#else
 // This assumes that non-Windows OSes provide unistd.h. For OSes where this
 // is not the case, we need to include headers that provide the functions
 // mentioned above.
 # include <unistd.h>
 # include <strings.h>
-#elif !GTEST_OS_WINDOWS_MOBILE
-# include <direct.h>
-# include <io.h>
-#endif
+#endif  // GTEST_OS_WINDOWS
 
 #if GTEST_OS_LINUX_ANDROID
 // Used to define __ANDROID_API__ matching the target NDK API level.
 #  include <android/api-level.h>  // NOLINT
 #endif
 
-// Defines this to true iff Google Test can use POSIX regular expressions.
+// Defines this to true if and only if Google Test can use POSIX regular
+// expressions.
 #ifndef GTEST_HAS_POSIX_RE
 # if GTEST_OS_LINUX_ANDROID
 // On Android, <regex.h> is only available starting with Gingerbread.
 #  define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9)
 # else
-#  define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS)
+#define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS && !GTEST_OS_XTENSA)
 # endif
 #endif
 
-#if GTEST_HAS_POSIX_RE
+#if GTEST_USES_PCRE
+// The appropriate headers have already been included.
+
+#elif GTEST_HAS_POSIX_RE
 
 // On some platforms, <regex.h> needs someone to define size_t, and
 // won't compile otherwise.  We can #include it here as we already
@@ -433,21 +661,34 @@
 // simple regex implementation instead.
 # define GTEST_USES_SIMPLE_RE 1
 
-#endif  // GTEST_HAS_POSIX_RE
+#endif  // GTEST_USES_PCRE
 
 #ifndef GTEST_HAS_EXCEPTIONS
 // The user didn't tell us whether exceptions are enabled, so we need
 // to figure it out.
-# if defined(_MSC_VER) || defined(__BORLANDC__)
-// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS
+# if defined(_MSC_VER) && defined(_CPPUNWIND)
+// MSVC defines _CPPUNWIND to 1 if and only if exceptions are enabled.
+#  define GTEST_HAS_EXCEPTIONS 1
+# elif defined(__BORLANDC__)
+// C++Builder's implementation of the STL uses the _HAS_EXCEPTIONS
 // macro to enable exceptions, so we'll do the same.
 // Assumes that exceptions are enabled by default.
 #  ifndef _HAS_EXCEPTIONS
 #   define _HAS_EXCEPTIONS 1
 #  endif  // _HAS_EXCEPTIONS
 #  define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
+# elif defined(__clang__)
+// clang defines __EXCEPTIONS if and only if exceptions are enabled before clang
+// 220714, but if and only if cleanups are enabled after that. In Obj-C++ files,
+// there can be cleanups for ObjC exceptions which also need cleanups, even if
+// C++ exceptions are disabled. clang has __has_feature(cxx_exceptions) which
+// checks for C++ exceptions starting at clang r206352, but which checked for
+// cleanups prior to that. To reliably check for C++ exception availability with
+// clang, check for
+// __EXCEPTIONS && __has_feature(cxx_exceptions).
+#  define GTEST_HAS_EXCEPTIONS (__EXCEPTIONS && __has_feature(cxx_exceptions))
 # elif defined(__GNUC__) && __EXCEPTIONS
-// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled.
+// gcc defines __EXCEPTIONS to 1 if and only if exceptions are enabled.
 #  define GTEST_HAS_EXCEPTIONS 1
 # elif defined(__SUNPRO_CC)
 // Sun Pro CC supports exceptions.  However, there is no compile-time way of
@@ -455,7 +696,7 @@
 // they are enabled unless the user tells us otherwise.
 #  define GTEST_HAS_EXCEPTIONS 1
 # elif defined(__IBMCPP__) && __EXCEPTIONS
-// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled.
+// xlC defines __EXCEPTIONS to 1 if and only if exceptions are enabled.
 #  define GTEST_HAS_EXCEPTIONS 1
 # elif defined(__HP_aCC)
 // Exception handling is in effect by default in HP aCC compiler. It has to
@@ -468,44 +709,18 @@
 # endif  // defined(_MSC_VER) || defined(__BORLANDC__)
 #endif  // GTEST_HAS_EXCEPTIONS
 
-#if !defined(GTEST_HAS_STD_STRING)
-// Even though we don't use this macro any longer, we keep it in case
-// some clients still depend on it.
-# define GTEST_HAS_STD_STRING 1
-#elif !GTEST_HAS_STD_STRING
-// The user told us that ::std::string isn't available.
-# error "Google Test cannot be used where ::std::string isn't available."
-#endif  // !defined(GTEST_HAS_STD_STRING)
-
-#ifndef GTEST_HAS_GLOBAL_STRING
-// The user didn't tell us whether ::string is available, so we need
-// to figure it out.
-
-# define GTEST_HAS_GLOBAL_STRING 0
-
-#endif  // GTEST_HAS_GLOBAL_STRING
-
 #ifndef GTEST_HAS_STD_WSTRING
 // The user didn't tell us whether ::std::wstring is available, so we need
 // to figure it out.
-// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring
-//   is available.
-
 // Cygwin 1.7 and below doesn't support ::std::wstring.
 // Solaris' libc++ doesn't support it either.  Android has
 // no support for it at least as recent as Froyo (2.2).
-# define GTEST_HAS_STD_WSTRING \
-    (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS))
+#define GTEST_HAS_STD_WSTRING                                         \
+  (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
+     GTEST_OS_HAIKU || GTEST_OS_ESP32 || GTEST_OS_ESP8266 || GTEST_OS_XTENSA))
 
 #endif  // GTEST_HAS_STD_WSTRING
 
-#ifndef GTEST_HAS_GLOBAL_WSTRING
-// The user didn't tell us whether ::wstring is available, so we need
-// to figure it out.
-# define GTEST_HAS_GLOBAL_WSTRING \
-    (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING)
-#endif  // GTEST_HAS_GLOBAL_WSTRING
-
 // Determines whether RTTI is available.
 #ifndef GTEST_HAS_RTTI
 // The user didn't tell us whether RTTI is enabled, so we need to
@@ -513,14 +728,15 @@
 
 # ifdef _MSC_VER
 
-#  ifdef _CPPRTTI  // MSVC defines this macro iff RTTI is enabled.
+#ifdef _CPPRTTI  // MSVC defines this macro if and only if RTTI is enabled.
 #   define GTEST_HAS_RTTI 1
 #  else
 #   define GTEST_HAS_RTTI 0
 #  endif
 
-// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled.
-# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302)
+// Starting with version 4.3.2, gcc defines __GXX_RTTI if and only if RTTI is
+// enabled.
+# elif defined(__GNUC__)
 
 #  ifdef __GXX_RTTI
 // When building against STLport with the Android NDK and with
@@ -571,13 +787,16 @@
 
 // Determines whether Google Test can use the pthreads library.
 #ifndef GTEST_HAS_PTHREAD
-// The user didn't tell us explicitly, so we assume pthreads support is
-// available on Linux and Mac.
+// The user didn't tell us explicitly, so we make reasonable assumptions about
+// which platforms have pthreads support.
 //
 // To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
 // to your compiler flags.
-# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX \
-    || GTEST_OS_QNX)
+#define GTEST_HAS_PTHREAD                                                      \
+  (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX || GTEST_OS_QNX ||          \
+   GTEST_OS_FREEBSD || GTEST_OS_NACL || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA || \
+   GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_OPENBSD ||          \
+   GTEST_OS_HAIKU)
 #endif  // GTEST_HAS_PTHREAD
 
 #if GTEST_HAS_PTHREAD
@@ -589,2494 +808,2462 @@
 # include <time.h>  // NOLINT
 #endif
 
-// Determines whether Google Test can use tr1/tuple.  You can define
-// this macro to 0 to prevent Google Test from using tuple (any
-// feature depending on tuple with be disabled in this mode).
-#ifndef GTEST_HAS_TR1_TUPLE
-# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR)
-// STLport, provided with the Android NDK, has neither <tr1/tuple> or <tuple>.
-#  define GTEST_HAS_TR1_TUPLE 0
-# else
-// The user didn't tell us not to do it, so we assume it's OK.
-#  define GTEST_HAS_TR1_TUPLE 1
-# endif
-#endif  // GTEST_HAS_TR1_TUPLE
-
-// Determines whether Google Test's own tr1 tuple implementation
-// should be used.
-#ifndef GTEST_USE_OWN_TR1_TUPLE
+// Determines whether clone(2) is supported.
+// Usually it will only be available on Linux, excluding
+// Linux on the Itanium architecture.
+// Also see http://linux.die.net/man/2/clone.
+#ifndef GTEST_HAS_CLONE
 // The user didn't tell us, so we need to figure it out.
 
-// We use our own TR1 tuple if we aren't sure the user has an
-// implementation of it already.  At this time, libstdc++ 4.0.0+ and
-// MSVC 2010 are the only mainstream standard libraries that come
-// with a TR1 tuple implementation.  NVIDIA's CUDA NVCC compiler
-// pretends to be GCC by defining __GNUC__ and friends, but cannot
-// compile GCC's tuple implementation.  MSVC 2008 (9.0) provides TR1
-// tuple in a 323 MB Feature Pack download, which we cannot assume the
-// user has.  QNX's QCC compiler is a modified GCC but it doesn't
-// support TR1 tuple.  libc++ only provides std::tuple, in C++11 mode,
-// and it can be used with some compilers that define __GNUC__.
-# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \
-      && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) || _MSC_VER >= 1600
-#  define GTEST_ENV_HAS_TR1_TUPLE_ 1
-# endif
+# if GTEST_OS_LINUX && !defined(__ia64__)
+#  if GTEST_OS_LINUX_ANDROID
+// On Android, clone() became available at different API levels for each 32-bit
+// architecture.
+#    if defined(__LP64__) || \
+        (defined(__arm__) && __ANDROID_API__ >= 9) || \
+        (defined(__mips__) && __ANDROID_API__ >= 12) || \
+        (defined(__i386__) && __ANDROID_API__ >= 17)
+#     define GTEST_HAS_CLONE 1
+#    else
+#     define GTEST_HAS_CLONE 0
+#    endif
+#  else
+#   define GTEST_HAS_CLONE 1
+#  endif
+# else
+#  define GTEST_HAS_CLONE 0
+# endif  // GTEST_OS_LINUX && !defined(__ia64__)
 
-// C++11 specifies that <tuple> provides std::tuple. Use that if gtest is used
-// in C++11 mode and libstdc++ isn't very old (binaries targeting OS X 10.6
-// can build with clang but need to use gcc4.2's libstdc++).
-# if GTEST_LANG_CXX11 && (!defined(__GLIBCXX__) || __GLIBCXX__ > 20110325)
-#  define GTEST_ENV_HAS_STD_TUPLE_ 1
-# endif
+#endif  // GTEST_HAS_CLONE
 
-# if GTEST_ENV_HAS_TR1_TUPLE_ || GTEST_ENV_HAS_STD_TUPLE_
-#  define GTEST_USE_OWN_TR1_TUPLE 0
+// Determines whether to support stream redirection. This is used to test
+// output correctness and to implement death tests.
+#ifndef GTEST_HAS_STREAM_REDIRECTION
+// By default, we assume that stream redirection is supported on all
+// platforms except known mobile ones.
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
+    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_XTENSA
+#  define GTEST_HAS_STREAM_REDIRECTION 0
 # else
-#  define GTEST_USE_OWN_TR1_TUPLE 1
-# endif
+#  define GTEST_HAS_STREAM_REDIRECTION 1
+# endif  // !GTEST_OS_WINDOWS_MOBILE
+#endif  // GTEST_HAS_STREAM_REDIRECTION
+
+// Determines whether to support death tests.
+// pops up a dialog window that cannot be suppressed programmatically.
+#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS ||             \
+     (GTEST_OS_MAC && !GTEST_OS_IOS) ||                                   \
+     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER) || GTEST_OS_WINDOWS_MINGW ||  \
+     GTEST_OS_AIX || GTEST_OS_HPUX || GTEST_OS_OPENBSD || GTEST_OS_QNX || \
+     GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA ||           \
+     GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_HAIKU)
+# define GTEST_HAS_DEATH_TEST 1
+#endif
+
+// Determines whether to support type-driven tests.
+
+// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
+// Sun Pro CC, IBM Visual Age, and HP aCC support.
+#if defined(__GNUC__) || defined(_MSC_VER) || defined(__SUNPRO_CC) || \
+    defined(__IBMCPP__) || defined(__HP_aCC)
+# define GTEST_HAS_TYPED_TEST 1
+# define GTEST_HAS_TYPED_TEST_P 1
+#endif
 
-#endif  // GTEST_USE_OWN_TR1_TUPLE
+// Determines whether the system compiler uses UTF-16 for encoding wide strings.
+#define GTEST_WIDE_STRING_USES_UTF16_ \
+  (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_AIX || GTEST_OS_OS2)
 
-// To avoid conditional compilation everywhere, we make it
-// gtest-port.h's responsibility to #include the header implementing
-// tr1/tuple.
-#if GTEST_HAS_TR1_TUPLE
+// Determines whether test results can be streamed to a socket.
+#if GTEST_OS_LINUX || GTEST_OS_GNU_KFREEBSD || GTEST_OS_DRAGONFLY || \
+    GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_OPENBSD
+# define GTEST_CAN_STREAM_RESULTS_ 1
+#endif
 
-# if GTEST_USE_OWN_TR1_TUPLE
-// This file was GENERATED by command:
-//     pump.py gtest-tuple.h.pump
-// DO NOT EDIT BY HAND!!!
+// Defines some utility macros.
 
-// Copyright 2009 Google Inc.
-// All Rights Reserved.
+// The GNU compiler emits a warning if nested "if" statements are followed by
+// an "else" statement and braces are not used to explicitly disambiguate the
+// "else" binding.  This leads to problems with code like:
 //
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
+//   if (gate)
+//     ASSERT_*(condition) << "Some message";
 //
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
+// The "switch (0) case 0:" idiom is used to suppress this.
+#ifdef __INTEL_COMPILER
+# define GTEST_AMBIGUOUS_ELSE_BLOCKER_
+#else
+# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default:  // NOLINT
+#endif
+
+// Use this annotation at the end of a struct/class definition to
+// prevent the compiler from optimizing away instances that are never
+// used.  This is useful when all interesting logic happens inside the
+// c'tor and / or d'tor.  Example:
 //
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//   struct Foo {
+//     Foo() { ... }
+//   } GTEST_ATTRIBUTE_UNUSED_;
 //
-// Author: wan@google.com (Zhanyong Wan)
-
-// Implements a subset of TR1 tuple needed by Google Test and Google Mock.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
-
-#include <utility>  // For ::std::pair.
+// Also use it after a variable or parameter declaration to tell the
+// compiler the variable/parameter does not have to be used.
+#if defined(__GNUC__) && !defined(COMPILER_ICC)
+# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
+#elif defined(__clang__)
+# if __has_attribute(unused)
+#  define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
+# endif
+#endif
+#ifndef GTEST_ATTRIBUTE_UNUSED_
+# define GTEST_ATTRIBUTE_UNUSED_
+#endif
 
-// The compiler used in Symbian has a bug that prevents us from declaring the
-// tuple template as a friend (it complains that tuple is redefined).  This
-// hack bypasses the bug by declaring the members that should otherwise be
-// private as public.
-// Sun Studio versions < 12 also have the above bug.
-#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
-# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
+// Use this annotation before a function that takes a printf format string.
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(COMPILER_ICC)
+# if defined(__MINGW_PRINTF_FORMAT)
+// MinGW has two different printf implementations. Ensure the format macro
+// matches the selected implementation. See
+// https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/.
+#  define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \
+       __attribute__((__format__(__MINGW_PRINTF_FORMAT, string_index, \
+                                 first_to_check)))
+# else
+#  define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \
+       __attribute__((__format__(__printf__, string_index, first_to_check)))
+# endif
 #else
-# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
-    template <GTEST_10_TYPENAMES_(U)> friend class tuple; \
-   private:
+# define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check)
 #endif
 
-// GTEST_n_TUPLE_(T) is the type of an n-tuple.
-#define GTEST_0_TUPLE_(T) tuple<>
-#define GTEST_1_TUPLE_(T) tuple<T##0, void, void, void, void, void, void, \
-    void, void, void>
-#define GTEST_2_TUPLE_(T) tuple<T##0, T##1, void, void, void, void, void, \
-    void, void, void>
-#define GTEST_3_TUPLE_(T) tuple<T##0, T##1, T##2, void, void, void, void, \
-    void, void, void>
-#define GTEST_4_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, void, void, void, \
-    void, void, void>
-#define GTEST_5_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, void, void, \
-    void, void, void>
-#define GTEST_6_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, void, \
-    void, void, void>
-#define GTEST_7_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
-    void, void, void>
-#define GTEST_8_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
-    T##7, void, void>
-#define GTEST_9_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
-    T##7, T##8, void>
-#define GTEST_10_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
-    T##7, T##8, T##9>
-
-// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
-#define GTEST_0_TYPENAMES_(T)
-#define GTEST_1_TYPENAMES_(T) typename T##0
-#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1
-#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2
-#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
-    typename T##3
-#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
-    typename T##3, typename T##4
-#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
-    typename T##3, typename T##4, typename T##5
-#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
-    typename T##3, typename T##4, typename T##5, typename T##6
-#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
-    typename T##3, typename T##4, typename T##5, typename T##6, typename T##7
-#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
-    typename T##3, typename T##4, typename T##5, typename T##6, \
-    typename T##7, typename T##8
-#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
-    typename T##3, typename T##4, typename T##5, typename T##6, \
-    typename T##7, typename T##8, typename T##9
-
-// In theory, defining stuff in the ::std namespace is undefined
-// behavior.  We can do this as we are playing the role of a standard
-// library vendor.
-namespace std {
-namespace tr1 {
 
-template <typename T0 = void, typename T1 = void, typename T2 = void,
-    typename T3 = void, typename T4 = void, typename T5 = void,
-    typename T6 = void, typename T7 = void, typename T8 = void,
-    typename T9 = void>
-class tuple;
+// A macro to disallow copy operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_ASSIGN_(type) \
+  type& operator=(type const &) = delete
 
-// Anything in namespace gtest_internal is Google Test's INTERNAL
-// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
-namespace gtest_internal {
+// A macro to disallow copy constructor and operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type) \
+  type(type const&) = delete;                 \
+  type& operator=(type const&) = delete
 
-// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
-template <typename T>
-struct ByRef { typedef const T& type; };  // NOLINT
-template <typename T>
-struct ByRef<T&> { typedef T& type; };  // NOLINT
+// A macro to disallow move operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_MOVE_ASSIGN_(type) \
+  type& operator=(type &&) noexcept = delete
 
-// A handy wrapper for ByRef.
-#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type
+// A macro to disallow move constructor and operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_MOVE_AND_ASSIGN_(type) \
+  type(type&&) noexcept = delete;             \
+  type& operator=(type&&) noexcept = delete
 
-// AddRef<T>::type is T if T is a reference; otherwise it's T&.  This
-// is the same as tr1::add_reference<T>::type.
-template <typename T>
-struct AddRef { typedef T& type; };  // NOLINT
-template <typename T>
-struct AddRef<T&> { typedef T& type; };  // NOLINT
-
-// A handy wrapper for AddRef.
-#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type
+// Tell the compiler to warn about unused return values for functions declared
+// with this macro.  The macro should be used on function declarations
+// following the argument list:
+//
+//   Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
+#if defined(__GNUC__) && !defined(COMPILER_ICC)
+# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result))
+#else
+# define GTEST_MUST_USE_RESULT_
+#endif  // __GNUC__ && !COMPILER_ICC
 
-// A helper for implementing get<k>().
-template <int k> class Get;
+// MS C++ compiler emits warning when a conditional expression is compile time
+// constant. In some contexts this warning is false positive and needs to be
+// suppressed. Use the following two macros in such cases:
+//
+// GTEST_INTENTIONAL_CONST_COND_PUSH_()
+// while (true) {
+// GTEST_INTENTIONAL_CONST_COND_POP_()
+// }
+# define GTEST_INTENTIONAL_CONST_COND_PUSH_() \
+    GTEST_DISABLE_MSC_WARNINGS_PUSH_(4127)
+# define GTEST_INTENTIONAL_CONST_COND_POP_() \
+    GTEST_DISABLE_MSC_WARNINGS_POP_()
 
-// A helper for implementing tuple_element<k, T>.  kIndexValid is true
-// iff k < the number of fields in tuple type T.
-template <bool kIndexValid, int kIndex, class Tuple>
-struct TupleElement;
+// Determine whether the compiler supports Microsoft's Structured Exception
+// Handling.  This is supported by several Windows compilers but generally
+// does not exist on any other system.
+#ifndef GTEST_HAS_SEH
+// The user didn't tell us, so we need to figure it out.
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 0, GTEST_10_TUPLE_(T) > {
-  typedef T0 type;
-};
+# if defined(_MSC_VER) || defined(__BORLANDC__)
+// These two compilers are known to support SEH.
+#  define GTEST_HAS_SEH 1
+# else
+// Assume no SEH.
+#  define GTEST_HAS_SEH 0
+# endif
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 1, GTEST_10_TUPLE_(T) > {
-  typedef T1 type;
-};
+#endif  // GTEST_HAS_SEH
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 2, GTEST_10_TUPLE_(T) > {
-  typedef T2 type;
-};
+#ifndef GTEST_IS_THREADSAFE
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 3, GTEST_10_TUPLE_(T) > {
-  typedef T3 type;
-};
+#define GTEST_IS_THREADSAFE                                                 \
+  (GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ ||                                     \
+   (GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT) || \
+   GTEST_HAS_PTHREAD)
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 4, GTEST_10_TUPLE_(T) > {
-  typedef T4 type;
-};
+#endif  // GTEST_IS_THREADSAFE
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 5, GTEST_10_TUPLE_(T) > {
-  typedef T5 type;
-};
+// GTEST_API_ qualifies all symbols that must be exported. The definitions below
+// are guarded by #ifndef to give embedders a chance to define GTEST_API_ in
+// gtest/internal/custom/gtest-port.h
+#ifndef GTEST_API_
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 6, GTEST_10_TUPLE_(T) > {
-  typedef T6 type;
-};
+#ifdef _MSC_VER
+# if GTEST_LINKED_AS_SHARED_LIBRARY
+#  define GTEST_API_ __declspec(dllimport)
+# elif GTEST_CREATE_SHARED_LIBRARY
+#  define GTEST_API_ __declspec(dllexport)
+# endif
+#elif __GNUC__ >= 4 || defined(__clang__)
+# define GTEST_API_ __attribute__((visibility ("default")))
+#endif  // _MSC_VER
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 7, GTEST_10_TUPLE_(T) > {
-  typedef T7 type;
-};
+#endif  // GTEST_API_
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 8, GTEST_10_TUPLE_(T) > {
-  typedef T8 type;
-};
+#ifndef GTEST_API_
+# define GTEST_API_
+#endif  // GTEST_API_
 
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 9, GTEST_10_TUPLE_(T) > {
-  typedef T9 type;
-};
+#ifndef GTEST_DEFAULT_DEATH_TEST_STYLE
+# define GTEST_DEFAULT_DEATH_TEST_STYLE  "fast"
+#endif  // GTEST_DEFAULT_DEATH_TEST_STYLE
 
-}  // namespace gtest_internal
+#ifdef __GNUC__
+// Ask the compiler to never inline a given function.
+# define GTEST_NO_INLINE_ __attribute__((noinline))
+#else
+# define GTEST_NO_INLINE_
+#endif
 
-template <>
-class tuple<> {
- public:
-  tuple() {}
-  tuple(const tuple& /* t */)  {}
-  tuple& operator=(const tuple& /* t */) { return *this; }
-};
+// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
+#if !defined(GTEST_HAS_CXXABI_H_)
+# if defined(__GLIBCXX__) || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))
+#  define GTEST_HAS_CXXABI_H_ 1
+# else
+#  define GTEST_HAS_CXXABI_H_ 0
+# endif
+#endif
 
-template <GTEST_1_TYPENAMES_(T)>
-class GTEST_1_TUPLE_(T) {
- public:
-  template <int k> friend class gtest_internal::Get;
+// A function level attribute to disable checking for use of uninitialized
+// memory when built with MemorySanitizer.
+#if defined(__clang__)
+# if __has_feature(memory_sanitizer)
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ \
+       __attribute__((no_sanitize_memory))
+# else
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+# endif  // __has_feature(memory_sanitizer)
+#else
+# define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+#endif  // __clang__
+
+// A function level attribute to disable AddressSanitizer instrumentation.
+#if defined(__clang__)
+# if __has_feature(address_sanitizer)
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ \
+       __attribute__((no_sanitize_address))
+# else
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+# endif  // __has_feature(address_sanitizer)
+#else
+# define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+#endif  // __clang__
+
+// A function level attribute to disable HWAddressSanitizer instrumentation.
+#if defined(__clang__)
+# if __has_feature(hwaddress_sanitizer)
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_ \
+       __attribute__((no_sanitize("hwaddress")))
+# else
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+# endif  // __has_feature(hwaddress_sanitizer)
+#else
+# define GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+#endif  // __clang__
+
+// A function level attribute to disable ThreadSanitizer instrumentation.
+#if defined(__clang__)
+# if __has_feature(thread_sanitizer)
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ \
+       __attribute__((no_sanitize_thread))
+# else
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
+# endif  // __has_feature(thread_sanitizer)
+#else
+# define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
+#endif  // __clang__
 
-  tuple() : f0_() {}
+namespace testing {
 
-  explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {}
+class Message;
 
-  tuple(const tuple& t) : f0_(t.f0_) {}
+// Legacy imports for backwards compatibility.
+// New code should use std:: names directly.
+using std::get;
+using std::make_tuple;
+using std::tuple;
+using std::tuple_element;
+using std::tuple_size;
 
-  template <GTEST_1_TYPENAMES_(U)>
-  tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {}
+namespace internal {
 
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+// A secret type that Google Test users don't know about.  It has no
+// definition on purpose.  Therefore it's impossible to create a
+// Secret object, which is what we want.
+class Secret;
 
-  template <GTEST_1_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_1_TUPLE_(U)& t) {
-    return CopyFrom(t);
-  }
+// The GTEST_COMPILE_ASSERT_ is a legacy macro used to verify that a compile
+// time expression is true (in new code, use static_assert instead). For
+// example, you could use it to verify the size of a static array:
+//
+//   GTEST_COMPILE_ASSERT_(GTEST_ARRAY_SIZE_(names) == NUM_NAMES,
+//                         names_incorrect_size);
+//
+// The second argument to the macro must be a valid C++ identifier. If the
+// expression is false, compiler will issue an error containing this identifier.
+#define GTEST_COMPILE_ASSERT_(expr, msg) static_assert(expr, #msg)
 
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
+// A helper for suppressing warnings on constant condition.  It just
+// returns 'condition'.
+GTEST_API_ bool IsTrue(bool condition);
 
-  template <GTEST_1_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    return *this;
-  }
+// Defines RE.
 
-  T0 f0_;
-};
+#if GTEST_USES_PCRE
+// if used, PCRE is injected by custom/gtest-port.h
+#elif GTEST_USES_POSIX_RE || GTEST_USES_SIMPLE_RE
 
-template <GTEST_2_TYPENAMES_(T)>
-class GTEST_2_TUPLE_(T) {
+// A simple C++ wrapper for <regex.h>.  It uses the POSIX Extended
+// Regular Expression syntax.
+class GTEST_API_ RE {
  public:
-  template <int k> friend class gtest_internal::Get;
-
-  tuple() : f0_(), f1_() {}
-
-  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0),
-      f1_(f1) {}
+  // A copy constructor is required by the Standard to initialize object
+  // references from r-values.
+  RE(const RE& other) { Init(other.pattern()); }
 
-  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {}
+  // Constructs an RE from a string.
+  RE(const ::std::string& regex) { Init(regex.c_str()); }  // NOLINT
 
-  template <GTEST_2_TYPENAMES_(U)>
-  tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {}
-  template <typename U0, typename U1>
-  tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}
+  RE(const char* regex) { Init(regex); }  // NOLINT
+  ~RE();
 
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+  // Returns the string representation of the regex.
+  const char* pattern() const { return pattern_; }
 
-  template <GTEST_2_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_2_TUPLE_(U)& t) {
-    return CopyFrom(t);
+  // FullMatch(str, re) returns true if and only if regular expression re
+  // matches the entire str.
+  // PartialMatch(str, re) returns true if and only if regular expression re
+  // matches a substring of str (including str itself).
+  static bool FullMatch(const ::std::string& str, const RE& re) {
+    return FullMatch(str.c_str(), re);
   }
-  template <typename U0, typename U1>
-  tuple& operator=(const ::std::pair<U0, U1>& p) {
-    f0_ = p.first;
-    f1_ = p.second;
-    return *this;
+  static bool PartialMatch(const ::std::string& str, const RE& re) {
+    return PartialMatch(str.c_str(), re);
   }
 
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
-
-  template <GTEST_2_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    f1_ = t.f1_;
-    return *this;
-  }
+  static bool FullMatch(const char* str, const RE& re);
+  static bool PartialMatch(const char* str, const RE& re);
 
-  T0 f0_;
-  T1 f1_;
-};
+ private:
+  void Init(const char* regex);
+  const char* pattern_;
+  bool is_valid_;
 
-template <GTEST_3_TYPENAMES_(T)>
-class GTEST_3_TUPLE_(T) {
- public:
-  template <int k> friend class gtest_internal::Get;
+# if GTEST_USES_POSIX_RE
 
-  tuple() : f0_(), f1_(), f2_() {}
+  regex_t full_regex_;     // For FullMatch().
+  regex_t partial_regex_;  // For PartialMatch().
 
-  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
-      GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {}
+# else  // GTEST_USES_SIMPLE_RE
 
-  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
+  const char* full_pattern_;  // For FullMatch();
 
-  template <GTEST_3_TYPENAMES_(U)>
-  tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
+# endif
+};
 
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+#endif  // GTEST_USES_PCRE
 
-  template <GTEST_3_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_3_TUPLE_(U)& t) {
-    return CopyFrom(t);
-  }
+// Formats a source file path and a line number as they would appear
+// in an error message from the compiler used to compile this code.
+GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);
 
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
+// Formats a file location for compiler-independent XML output.
+// Although this function is not platform dependent, we put it next to
+// FormatFileLocation in order to contrast the two functions.
+GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
+                                                               int line);
 
-  template <GTEST_3_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    f1_ = t.f1_;
-    f2_ = t.f2_;
-    return *this;
-  }
+// Defines logging utilities:
+//   GTEST_LOG_(severity) - logs messages at the specified severity level. The
+//                          message itself is streamed into the macro.
+//   LogToStderr()  - directs all log messages to stderr.
+//   FlushInfoLog() - flushes informational log messages.
 
-  T0 f0_;
-  T1 f1_;
-  T2 f2_;
+enum GTestLogSeverity {
+  GTEST_INFO,
+  GTEST_WARNING,
+  GTEST_ERROR,
+  GTEST_FATAL
 };
 
-template <GTEST_4_TYPENAMES_(T)>
-class GTEST_4_TUPLE_(T) {
+// Formats log entry severity, provides a stream object for streaming the
+// log message, and terminates the message with a newline when going out of
+// scope.
+class GTEST_API_ GTestLog {
  public:
-  template <int k> friend class gtest_internal::Get;
-
-  tuple() : f0_(), f1_(), f2_(), f3_() {}
-
-  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
-      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2),
-      f3_(f3) {}
+  GTestLog(GTestLogSeverity severity, const char* file, int line);
 
-  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {}
+  // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
+  ~GTestLog();
 
-  template <GTEST_4_TYPENAMES_(U)>
-  tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
-      f3_(t.f3_) {}
+  ::std::ostream& GetStream() { return ::std::cerr; }
 
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+ private:
+  const GTestLogSeverity severity_;
 
-  template <GTEST_4_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_4_TUPLE_(U)& t) {
-    return CopyFrom(t);
-  }
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
+};
 
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
+#if !defined(GTEST_LOG_)
 
-  template <GTEST_4_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    f1_ = t.f1_;
-    f2_ = t.f2_;
-    f3_ = t.f3_;
-    return *this;
-  }
+# define GTEST_LOG_(severity) \
+    ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
+                                  __FILE__, __LINE__).GetStream()
 
-  T0 f0_;
-  T1 f1_;
-  T2 f2_;
-  T3 f3_;
-};
+inline void LogToStderr() {}
+inline void FlushInfoLog() { fflush(nullptr); }
 
-template <GTEST_5_TYPENAMES_(T)>
-class GTEST_5_TUPLE_(T) {
- public:
-  template <int k> friend class gtest_internal::Get;
+#endif  // !defined(GTEST_LOG_)
 
-  tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {}
+#if !defined(GTEST_CHECK_)
+// INTERNAL IMPLEMENTATION - DO NOT USE.
+//
+// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
+// is not satisfied.
+//  Synopsys:
+//    GTEST_CHECK_(boolean_condition);
+//     or
+//    GTEST_CHECK_(boolean_condition) << "Additional message";
+//
+//    This checks the condition and if the condition is not satisfied
+//    it prints message about the condition violation, including the
+//    condition itself, plus additional message streamed into it, if any,
+//    and then it aborts the program. It aborts the program irrespective of
+//    whether it is built in the debug mode or not.
+# define GTEST_CHECK_(condition) \
+    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+    if (::testing::internal::IsTrue(condition)) \
+      ; \
+    else \
+      GTEST_LOG_(FATAL) << "Condition " #condition " failed. "
+#endif  // !defined(GTEST_CHECK_)
 
-  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
-      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3,
-      GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {}
+// An all-mode assert to verify that the given POSIX-style function
+// call returns 0 (indicating success).  Known limitation: this
+// doesn't expand to a balanced 'if' statement, so enclose the macro
+// in {} if you need to use it as the only statement in an 'if'
+// branch.
+#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
+  if (const int gtest_error = (posix_call)) \
+    GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
+                      << gtest_error
 
-  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
-      f4_(t.f4_) {}
+// Transforms "T" into "const T&" according to standard reference collapsing
+// rules (this is only needed as a backport for C++98 compilers that do not
+// support reference collapsing). Specifically, it transforms:
+//
+//   char         ==> const char&
+//   const char   ==> const char&
+//   char&        ==> char&
+//   const char&  ==> const char&
+//
+// Note that the non-const reference will not have "const" added. This is
+// standard, and necessary so that "T" can always bind to "const T&".
+template <typename T>
+struct ConstRef { typedef const T& type; };
+template <typename T>
+struct ConstRef<T&> { typedef T& type; };
 
-  template <GTEST_5_TYPENAMES_(U)>
-  tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
-      f3_(t.f3_), f4_(t.f4_) {}
+// The argument T must depend on some template parameters.
+#define GTEST_REFERENCE_TO_CONST_(T) \
+  typename ::testing::internal::ConstRef<T>::type
 
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Use ImplicitCast_ as a safe version of static_cast for upcasting in
+// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
+// const Foo*).  When you use ImplicitCast_, the compiler checks that
+// the cast is safe.  Such explicit ImplicitCast_s are necessary in
+// surprisingly many situations where C++ demands an exact type match
+// instead of an argument type convertable to a target type.
+//
+// The syntax for using ImplicitCast_ is the same as for static_cast:
+//
+//   ImplicitCast_<ToType>(expr)
+//
+// ImplicitCast_ would have been part of the C++ standard library,
+// but the proposal was submitted too late.  It will probably make
+// its way into the language in the future.
+//
+// This relatively ugly name is intentional. It prevents clashes with
+// similar functions users may have (e.g., implicit_cast). The internal
+// namespace alone is not enough because the function can be found by ADL.
+template<typename To>
+inline To ImplicitCast_(To x) { return x; }
 
-  template <GTEST_5_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_5_TUPLE_(U)& t) {
-    return CopyFrom(t);
+// When you upcast (that is, cast a pointer from type Foo to type
+// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
+// always succeed.  When you downcast (that is, cast a pointer from
+// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
+// how do you know the pointer is really of type SubclassOfFoo?  It
+// could be a bare Foo, or of type DifferentSubclassOfFoo.  Thus,
+// when you downcast, you should use this macro.  In debug mode, we
+// use dynamic_cast<> to double-check the downcast is legal (we die
+// if it's not).  In normal mode, we do the efficient static_cast<>
+// instead.  Thus, it's important to test in debug mode to make sure
+// the cast is legal!
+//    This is the only place in the code we should use dynamic_cast<>.
+// In particular, you SHOULDN'T be using dynamic_cast<> in order to
+// do RTTI (eg code like this:
+//    if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
+//    if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
+// You should design the code some other way not to need this.
+//
+// This relatively ugly name is intentional. It prevents clashes with
+// similar functions users may have (e.g., down_cast). The internal
+// namespace alone is not enough because the function can be found by ADL.
+template<typename To, typename From>  // use like this: DownCast_<T*>(foo);
+inline To DownCast_(From* f) {  // so we only accept pointers
+  // Ensures that To is a sub-type of From *.  This test is here only
+  // for compile-time type checking, and has no overhead in an
+  // optimized build at run-time, as it will be optimized away
+  // completely.
+  GTEST_INTENTIONAL_CONST_COND_PUSH_()
+  if (false) {
+  GTEST_INTENTIONAL_CONST_COND_POP_()
+  const To to = nullptr;
+  ::testing::internal::ImplicitCast_<From*>(to);
   }
 
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
+#if GTEST_HAS_RTTI
+  // RTTI: debug mode only!
+  GTEST_CHECK_(f == nullptr || dynamic_cast<To>(f) != nullptr);
+#endif
+  return static_cast<To>(f);
+}
 
-  template <GTEST_5_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    f1_ = t.f1_;
-    f2_ = t.f2_;
-    f3_ = t.f3_;
-    f4_ = t.f4_;
-    return *this;
-  }
+// Downcasts the pointer of type Base to Derived.
+// Derived must be a subclass of Base. The parameter MUST
+// point to a class of type Derived, not any subclass of it.
+// When RTTI is available, the function performs a runtime
+// check to enforce this.
+template <class Derived, class Base>
+Derived* CheckedDowncastToActualType(Base* base) {
+#if GTEST_HAS_RTTI
+  GTEST_CHECK_(typeid(*base) == typeid(Derived));
+#endif
 
-  T0 f0_;
-  T1 f1_;
-  T2 f2_;
-  T3 f3_;
-  T4 f4_;
-};
+#if GTEST_HAS_DOWNCAST_
+  return ::down_cast<Derived*>(base);
+#elif GTEST_HAS_RTTI
+  return dynamic_cast<Derived*>(base);  // NOLINT
+#else
+  return static_cast<Derived*>(base);  // Poor man's downcast.
+#endif
+}
 
-template <GTEST_6_TYPENAMES_(T)>
-class GTEST_6_TUPLE_(T) {
- public:
-  template <int k> friend class gtest_internal::Get;
+#if GTEST_HAS_STREAM_REDIRECTION
 
-  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {}
+// Defines the stderr capturer:
+//   CaptureStdout     - starts capturing stdout.
+//   GetCapturedStdout - stops capturing stdout and returns the captured string.
+//   CaptureStderr     - starts capturing stderr.
+//   GetCapturedStderr - stops capturing stderr and returns the captured string.
+//
+GTEST_API_ void CaptureStdout();
+GTEST_API_ std::string GetCapturedStdout();
+GTEST_API_ void CaptureStderr();
+GTEST_API_ std::string GetCapturedStderr();
 
-  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
-      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
-      GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
-      f5_(f5) {}
+#endif  // GTEST_HAS_STREAM_REDIRECTION
+// Returns the size (in bytes) of a file.
+GTEST_API_ size_t GetFileSize(FILE* file);
 
-  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
-      f4_(t.f4_), f5_(t.f5_) {}
+// Reads the entire content of a file as a string.
+GTEST_API_ std::string ReadEntireFile(FILE* file);
 
-  template <GTEST_6_TYPENAMES_(U)>
-  tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
-      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {}
+// All command line arguments.
+GTEST_API_ std::vector<std::string> GetArgvs();
 
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+#if GTEST_HAS_DEATH_TEST
 
-  template <GTEST_6_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_6_TUPLE_(U)& t) {
-    return CopyFrom(t);
-  }
+std::vector<std::string> GetInjectableArgvs();
+// Deprecated: pass the args vector by value instead.
+void SetInjectableArgvs(const std::vector<std::string>* new_argvs);
+void SetInjectableArgvs(const std::vector<std::string>& new_argvs);
+void ClearInjectableArgvs();
 
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
+#endif  // GTEST_HAS_DEATH_TEST
 
-  template <GTEST_6_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    f1_ = t.f1_;
-    f2_ = t.f2_;
-    f3_ = t.f3_;
-    f4_ = t.f4_;
-    f5_ = t.f5_;
-    return *this;
-  }
+// Defines synchronization primitives.
+#if GTEST_IS_THREADSAFE
+# if GTEST_HAS_PTHREAD
+// Sleeps for (roughly) n milliseconds.  This function is only for testing
+// Google Test's own constructs.  Don't use it in user tests, either
+// directly or indirectly.
+inline void SleepMilliseconds(int n) {
+  const timespec time = {
+    0,                  // 0 seconds.
+    n * 1000L * 1000L,  // And n ms.
+  };
+  nanosleep(&time, nullptr);
+}
+# endif  // GTEST_HAS_PTHREAD
 
-  T0 f0_;
-  T1 f1_;
-  T2 f2_;
-  T3 f3_;
-  T4 f4_;
-  T5 f5_;
-};
+# if GTEST_HAS_NOTIFICATION_
+// Notification has already been imported into the namespace.
+// Nothing to do here.
 
-template <GTEST_7_TYPENAMES_(T)>
-class GTEST_7_TUPLE_(T) {
+# elif GTEST_HAS_PTHREAD
+// Allows a controller thread to pause execution of newly created
+// threads until notified.  Instances of this class must be created
+// and destroyed in the controller thread.
+//
+// This class is only for testing Google Test's own constructs. Do not
+// use it in user tests, either directly or indirectly.
+class Notification {
  public:
-  template <int k> friend class gtest_internal::Get;
-
-  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {}
-
-  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
-      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
-      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2),
-      f3_(f3), f4_(f4), f5_(f5), f6_(f6) {}
-
-  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
-      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
-
-  template <GTEST_7_TYPENAMES_(U)>
-  tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
-      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
-
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
-  template <GTEST_7_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_7_TUPLE_(U)& t) {
-    return CopyFrom(t);
+  Notification() : notified_(false) {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, nullptr));
   }
-
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
-
-  template <GTEST_7_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    f1_ = t.f1_;
-    f2_ = t.f2_;
-    f3_ = t.f3_;
-    f4_ = t.f4_;
-    f5_ = t.f5_;
-    f6_ = t.f6_;
-    return *this;
+  ~Notification() {
+    pthread_mutex_destroy(&mutex_);
   }
 
-  T0 f0_;
-  T1 f1_;
-  T2 f2_;
-  T3 f3_;
-  T4 f4_;
-  T5 f5_;
-  T6 f6_;
-};
-
-template <GTEST_8_TYPENAMES_(T)>
-class GTEST_8_TUPLE_(T) {
- public:
-  template <int k> friend class gtest_internal::Get;
-
-  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {}
-
-  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
-      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
-      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6,
-      GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
-      f5_(f5), f6_(f6), f7_(f7) {}
-
-  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
-      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
-
-  template <GTEST_8_TYPENAMES_(U)>
-  tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
-      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
-
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
-  template <GTEST_8_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_8_TUPLE_(U)& t) {
-    return CopyFrom(t);
+  // Notifies all threads created with this notification to start. Must
+  // be called from the controller thread.
+  void Notify() {
+    pthread_mutex_lock(&mutex_);
+    notified_ = true;
+    pthread_mutex_unlock(&mutex_);
   }
 
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
-
-  template <GTEST_8_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    f1_ = t.f1_;
-    f2_ = t.f2_;
-    f3_ = t.f3_;
-    f4_ = t.f4_;
-    f5_ = t.f5_;
-    f6_ = t.f6_;
-    f7_ = t.f7_;
-    return *this;
+  // Blocks until the controller thread notifies. Must be called from a test
+  // thread.
+  void WaitForNotification() {
+    for (;;) {
+      pthread_mutex_lock(&mutex_);
+      const bool notified = notified_;
+      pthread_mutex_unlock(&mutex_);
+      if (notified)
+        break;
+      SleepMilliseconds(10);
+    }
   }
 
-  T0 f0_;
-  T1 f1_;
-  T2 f2_;
-  T3 f3_;
-  T4 f4_;
-  T5 f5_;
-  T6 f6_;
-  T7 f7_;
-};
+ private:
+  pthread_mutex_t mutex_;
+  bool notified_;
 
-template <GTEST_9_TYPENAMES_(T)>
-class GTEST_9_TUPLE_(T) {
- public:
-  template <int k> friend class gtest_internal::Get;
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
+};
 
-  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {}
+# elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
 
-  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
-      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
-      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
-      GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
-      f5_(f5), f6_(f6), f7_(f7), f8_(f8) {}
+GTEST_API_ void SleepMilliseconds(int n);
 
-  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
-      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
+// Provides leak-safe Windows kernel handle ownership.
+// Used in death tests and in threading support.
+class GTEST_API_ AutoHandle {
+ public:
+  // Assume that Win32 HANDLE type is equivalent to void*. Doing so allows us to
+  // avoid including <windows.h> in this header file. Including <windows.h> is
+  // undesirable because it defines a lot of symbols and macros that tend to
+  // conflict with client code. This assumption is verified by
+  // WindowsTypesTest.HANDLEIsVoidStar.
+  typedef void* Handle;
+  AutoHandle();
+  explicit AutoHandle(Handle handle);
 
-  template <GTEST_9_TYPENAMES_(U)>
-  tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
-      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
+  ~AutoHandle();
 
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+  Handle Get() const;
+  void Reset();
+  void Reset(Handle handle);
 
-  template <GTEST_9_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_9_TUPLE_(U)& t) {
-    return CopyFrom(t);
-  }
+ private:
+  // Returns true if and only if the handle is a valid handle object that can be
+  // closed.
+  bool IsCloseable() const;
 
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
-
-  template <GTEST_9_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    f1_ = t.f1_;
-    f2_ = t.f2_;
-    f3_ = t.f3_;
-    f4_ = t.f4_;
-    f5_ = t.f5_;
-    f6_ = t.f6_;
-    f7_ = t.f7_;
-    f8_ = t.f8_;
-    return *this;
-  }
+  Handle handle_;
 
-  T0 f0_;
-  T1 f1_;
-  T2 f2_;
-  T3 f3_;
-  T4 f4_;
-  T5 f5_;
-  T6 f6_;
-  T7 f7_;
-  T8 f8_;
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
 };
 
-template <GTEST_10_TYPENAMES_(T)>
-class tuple {
+// Allows a controller thread to pause execution of newly created
+// threads until notified.  Instances of this class must be created
+// and destroyed in the controller thread.
+//
+// This class is only for testing Google Test's own constructs. Do not
+// use it in user tests, either directly or indirectly.
+class GTEST_API_ Notification {
  public:
-  template <int k> friend class gtest_internal::Get;
-
-  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(),
-      f9_() {}
-
-  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
-      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
-      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
-      GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2),
-      f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {}
-
-  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
-      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {}
-
-  template <GTEST_10_TYPENAMES_(U)>
-  tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
-      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_),
-      f9_(t.f9_) {}
-
-  tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
-  template <GTEST_10_TYPENAMES_(U)>
-  tuple& operator=(const GTEST_10_TUPLE_(U)& t) {
-    return CopyFrom(t);
-  }
+  Notification();
+  void Notify();
+  void WaitForNotification();
 
-  GTEST_DECLARE_TUPLE_AS_FRIEND_
-
-  template <GTEST_10_TYPENAMES_(U)>
-  tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) {
-    f0_ = t.f0_;
-    f1_ = t.f1_;
-    f2_ = t.f2_;
-    f3_ = t.f3_;
-    f4_ = t.f4_;
-    f5_ = t.f5_;
-    f6_ = t.f6_;
-    f7_ = t.f7_;
-    f8_ = t.f8_;
-    f9_ = t.f9_;
-    return *this;
-  }
+ private:
+  AutoHandle event_;
 
-  T0 f0_;
-  T1 f1_;
-  T2 f2_;
-  T3 f3_;
-  T4 f4_;
-  T5 f5_;
-  T6 f6_;
-  T7 f7_;
-  T8 f8_;
-  T9 f9_;
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
 };
+# endif  // GTEST_HAS_NOTIFICATION_
 
-// 6.1.3.2 Tuple creation functions.
-
-// Known limitations: we don't support passing an
-// std::tr1::reference_wrapper<T> to make_tuple().  And we don't
-// implement tie().
+// On MinGW, we can have both GTEST_OS_WINDOWS and GTEST_HAS_PTHREAD
+// defined, but we don't want to use MinGW's pthreads implementation, which
+// has conformance problems with some versions of the POSIX standard.
+# if GTEST_HAS_PTHREAD && !GTEST_OS_WINDOWS_MINGW
 
-inline tuple<> make_tuple() { return tuple<>(); }
-
-template <GTEST_1_TYPENAMES_(T)>
-inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) {
-  return GTEST_1_TUPLE_(T)(f0);
-}
+// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
+// Consequently, it cannot select a correct instantiation of ThreadWithParam
+// in order to call its Run(). Introducing ThreadWithParamBase as a
+// non-templated base class for ThreadWithParam allows us to bypass this
+// problem.
+class ThreadWithParamBase {
+ public:
+  virtual ~ThreadWithParamBase() {}
+  virtual void Run() = 0;
+};
 
-template <GTEST_2_TYPENAMES_(T)>
-inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) {
-  return GTEST_2_TUPLE_(T)(f0, f1);
+// pthread_create() accepts a pointer to a function type with the C linkage.
+// According to the Standard (7.5/1), function types with different linkages
+// are different even if they are otherwise identical.  Some compilers (for
+// example, SunStudio) treat them as different types.  Since class methods
+// cannot be defined with C-linkage we need to define a free C-function to
+// pass into pthread_create().
+extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
+  static_cast<ThreadWithParamBase*>(thread)->Run();
+  return nullptr;
 }
 
-template <GTEST_3_TYPENAMES_(T)>
-inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) {
-  return GTEST_3_TUPLE_(T)(f0, f1, f2);
-}
+// Helper class for testing Google Test's multi-threading constructs.
+// To use it, write:
+//
+//   void ThreadFunc(int param) { /* Do things with param */ }
+//   Notification thread_can_start;
+//   ...
+//   // The thread_can_start parameter is optional; you can supply NULL.
+//   ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
+//   thread_can_start.Notify();
+//
+// These classes are only for testing Google Test's own constructs. Do
+// not use them in user tests, either directly or indirectly.
+template <typename T>
+class ThreadWithParam : public ThreadWithParamBase {
+ public:
+  typedef void UserThreadFunc(T);
 
-template <GTEST_4_TYPENAMES_(T)>
-inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
-    const T3& f3) {
-  return GTEST_4_TUPLE_(T)(f0, f1, f2, f3);
-}
+  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
+      : func_(func),
+        param_(param),
+        thread_can_start_(thread_can_start),
+        finished_(false) {
+    ThreadWithParamBase* const base = this;
+    // The thread can be created only after all fields except thread_
+    // have been initialized.
+    GTEST_CHECK_POSIX_SUCCESS_(
+        pthread_create(&thread_, nullptr, &ThreadFuncWithCLinkage, base));
+  }
+  ~ThreadWithParam() override { Join(); }
 
-template <GTEST_5_TYPENAMES_(T)>
-inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
-    const T3& f3, const T4& f4) {
-  return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4);
-}
+  void Join() {
+    if (!finished_) {
+      GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, nullptr));
+      finished_ = true;
+    }
+  }
 
-template <GTEST_6_TYPENAMES_(T)>
-inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
-    const T3& f3, const T4& f4, const T5& f5) {
-  return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5);
-}
+  void Run() override {
+    if (thread_can_start_ != nullptr) thread_can_start_->WaitForNotification();
+    func_(param_);
+  }
 
-template <GTEST_7_TYPENAMES_(T)>
-inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
-    const T3& f3, const T4& f4, const T5& f5, const T6& f6) {
-  return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6);
-}
+ private:
+  UserThreadFunc* const func_;  // User-supplied thread function.
+  const T param_;  // User-supplied parameter to the thread function.
+  // When non-NULL, used to block execution until the controller thread
+  // notifies.
+  Notification* const thread_can_start_;
+  bool finished_;  // true if and only if we know that the thread function has
+                   // finished.
+  pthread_t thread_;  // The native thread object.
 
-template <GTEST_8_TYPENAMES_(T)>
-inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
-    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) {
-  return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7);
-}
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
+};
+# endif  // !GTEST_OS_WINDOWS && GTEST_HAS_PTHREAD ||
+         // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
 
-template <GTEST_9_TYPENAMES_(T)>
-inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
-    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
-    const T8& f8) {
-  return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8);
-}
+# if GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
+// Mutex and ThreadLocal have already been imported into the namespace.
+// Nothing to do here.
 
-template <GTEST_10_TYPENAMES_(T)>
-inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
-    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
-    const T8& f8, const T9& f9) {
-  return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
-}
+# elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
 
-// 6.1.3.3 Tuple helper classes.
+// Mutex implements mutex on Windows platforms.  It is used in conjunction
+// with class MutexLock:
+//
+//   Mutex mutex;
+//   ...
+//   MutexLock lock(&mutex);  // Acquires the mutex and releases it at the
+//                            // end of the current scope.
+//
+// A static Mutex *must* be defined or declared using one of the following
+// macros:
+//   GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
+//   GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
+//
+// (A non-static Mutex is defined/declared in the usual way).
+class GTEST_API_ Mutex {
+ public:
+  enum MutexType { kStatic = 0, kDynamic = 1 };
+  // We rely on kStaticMutex being 0 as it is to what the linker initializes
+  // type_ in static mutexes.  critical_section_ will be initialized lazily
+  // in ThreadSafeLazyInit().
+  enum StaticConstructorSelector { kStaticMutex = 0 };
 
-template <typename Tuple> struct tuple_size;
+  // This constructor intentionally does nothing.  It relies on type_ being
+  // statically initialized to 0 (effectively setting it to kStatic) and on
+  // ThreadSafeLazyInit() to lazily initialize the rest of the members.
+  explicit Mutex(StaticConstructorSelector /*dummy*/) {}
 
-template <GTEST_0_TYPENAMES_(T)>
-struct tuple_size<GTEST_0_TUPLE_(T) > {
-  static const int value = 0;
-};
+  Mutex();
+  ~Mutex();
 
-template <GTEST_1_TYPENAMES_(T)>
-struct tuple_size<GTEST_1_TUPLE_(T) > {
-  static const int value = 1;
-};
+  void Lock();
 
-template <GTEST_2_TYPENAMES_(T)>
-struct tuple_size<GTEST_2_TUPLE_(T) > {
-  static const int value = 2;
-};
+  void Unlock();
 
-template <GTEST_3_TYPENAMES_(T)>
-struct tuple_size<GTEST_3_TUPLE_(T) > {
-  static const int value = 3;
-};
+  // Does nothing if the current thread holds the mutex. Otherwise, crashes
+  // with high probability.
+  void AssertHeld();
 
-template <GTEST_4_TYPENAMES_(T)>
-struct tuple_size<GTEST_4_TUPLE_(T) > {
-  static const int value = 4;
-};
+ private:
+  // Initializes owner_thread_id_ and critical_section_ in static mutexes.
+  void ThreadSafeLazyInit();
 
-template <GTEST_5_TYPENAMES_(T)>
-struct tuple_size<GTEST_5_TUPLE_(T) > {
-  static const int value = 5;
-};
+  // Per https://blogs.msdn.microsoft.com/oldnewthing/20040223-00/?p=40503,
+  // we assume that 0 is an invalid value for thread IDs.
+  unsigned int owner_thread_id_;
 
-template <GTEST_6_TYPENAMES_(T)>
-struct tuple_size<GTEST_6_TUPLE_(T) > {
-  static const int value = 6;
-};
+  // For static mutexes, we rely on these members being initialized to zeros
+  // by the linker.
+  MutexType type_;
+  long critical_section_init_phase_;  // NOLINT
+  GTEST_CRITICAL_SECTION* critical_section_;
 
-template <GTEST_7_TYPENAMES_(T)>
-struct tuple_size<GTEST_7_TUPLE_(T) > {
-  static const int value = 7;
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
 };
 
-template <GTEST_8_TYPENAMES_(T)>
-struct tuple_size<GTEST_8_TUPLE_(T) > {
-  static const int value = 8;
-};
+# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+    extern ::testing::internal::Mutex mutex
 
-template <GTEST_9_TYPENAMES_(T)>
-struct tuple_size<GTEST_9_TUPLE_(T) > {
-  static const int value = 9;
-};
+# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
+    ::testing::internal::Mutex mutex(::testing::internal::Mutex::kStaticMutex)
 
-template <GTEST_10_TYPENAMES_(T)>
-struct tuple_size<GTEST_10_TUPLE_(T) > {
-  static const int value = 10;
-};
+// We cannot name this class MutexLock because the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. That macro is used as a defensive measure to prevent against
+// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
+// "MutexLock l(&mu)".  Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+  explicit GTestMutexLock(Mutex* mutex)
+      : mutex_(mutex) { mutex_->Lock(); }
 
-template <int k, class Tuple>
-struct tuple_element {
-  typedef typename gtest_internal::TupleElement<
-      k < (tuple_size<Tuple>::value), k, Tuple>::type type;
-};
+  ~GTestMutexLock() { mutex_->Unlock(); }
 
-#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type
+ private:
+  Mutex* const mutex_;
 
-// 6.1.3.4 Element access.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
+};
 
-namespace gtest_internal {
+typedef GTestMutexLock MutexLock;
 
-template <>
-class Get<0> {
+// Base class for ValueHolder<T>.  Allows a caller to hold and delete a value
+// without knowing its type.
+class ThreadLocalValueHolderBase {
  public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
-  Field(Tuple& t) { return t.f0_; }  // NOLINT
-
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
-  ConstField(const Tuple& t) { return t.f0_; }
+  virtual ~ThreadLocalValueHolderBase() {}
 };
 
-template <>
-class Get<1> {
+// Provides a way for a thread to send notifications to a ThreadLocal
+// regardless of its parameter type.
+class ThreadLocalBase {
  public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
-  Field(Tuple& t) { return t.f1_; }  // NOLINT
+  // Creates a new ValueHolder<T> object holding a default value passed to
+  // this ThreadLocal<T>'s constructor and returns it.  It is the caller's
+  // responsibility not to call this when the ThreadLocal<T> instance already
+  // has a value on the current thread.
+  virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const = 0;
+
+ protected:
+  ThreadLocalBase() {}
+  virtual ~ThreadLocalBase() {}
 
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
-  ConstField(const Tuple& t) { return t.f1_; }
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocalBase);
 };
 
-template <>
-class Get<2> {
+// Maps a thread to a set of ThreadLocals that have values instantiated on that
+// thread and notifies them when the thread exits.  A ThreadLocal instance is
+// expected to persist until all threads it has values on have terminated.
+class GTEST_API_ ThreadLocalRegistry {
  public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
-  Field(Tuple& t) { return t.f2_; }  // NOLINT
+  // Registers thread_local_instance as having value on the current thread.
+  // Returns a value that can be used to identify the thread from other threads.
+  static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
+      const ThreadLocalBase* thread_local_instance);
 
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
-  ConstField(const Tuple& t) { return t.f2_; }
+  // Invoked when a ThreadLocal instance is destroyed.
+  static void OnThreadLocalDestroyed(
+      const ThreadLocalBase* thread_local_instance);
 };
 
-template <>
-class Get<3> {
+class GTEST_API_ ThreadWithParamBase {
  public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
-  Field(Tuple& t) { return t.f3_; }  // NOLINT
+  void Join();
 
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
-  ConstField(const Tuple& t) { return t.f3_; }
-};
+ protected:
+  class Runnable {
+   public:
+    virtual ~Runnable() {}
+    virtual void Run() = 0;
+  };
 
-template <>
-class Get<4> {
- public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
-  Field(Tuple& t) { return t.f4_; }  // NOLINT
+  ThreadWithParamBase(Runnable *runnable, Notification* thread_can_start);
+  virtual ~ThreadWithParamBase();
 
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
-  ConstField(const Tuple& t) { return t.f4_; }
+ private:
+  AutoHandle thread_;
 };
 
-template <>
-class Get<5> {
+// Helper class for testing Google Test's multi-threading constructs.
+template <typename T>
+class ThreadWithParam : public ThreadWithParamBase {
  public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
-  Field(Tuple& t) { return t.f5_; }  // NOLINT
+  typedef void UserThreadFunc(T);
 
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
-  ConstField(const Tuple& t) { return t.f5_; }
-};
+  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
+      : ThreadWithParamBase(new RunnableImpl(func, param), thread_can_start) {
+  }
+  virtual ~ThreadWithParam() {}
 
-template <>
-class Get<6> {
- public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
-  Field(Tuple& t) { return t.f6_; }  // NOLINT
+ private:
+  class RunnableImpl : public Runnable {
+   public:
+    RunnableImpl(UserThreadFunc* func, T param)
+        : func_(func),
+          param_(param) {
+    }
+    virtual ~RunnableImpl() {}
+    virtual void Run() {
+      func_(param_);
+    }
 
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
-  ConstField(const Tuple& t) { return t.f6_; }
-};
+   private:
+    UserThreadFunc* const func_;
+    const T param_;
 
-template <>
-class Get<7> {
- public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
-  Field(Tuple& t) { return t.f7_; }  // NOLINT
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(RunnableImpl);
+  };
 
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
-  ConstField(const Tuple& t) { return t.f7_; }
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
 };
 
-template <>
-class Get<8> {
+// Implements thread-local storage on Windows systems.
+//
+//   // Thread 1
+//   ThreadLocal<int> tl(100);  // 100 is the default value for each thread.
+//
+//   // Thread 2
+//   tl.set(150);  // Changes the value for thread 2 only.
+//   EXPECT_EQ(150, tl.get());
+//
+//   // Thread 1
+//   EXPECT_EQ(100, tl.get());  // In thread 1, tl has the original value.
+//   tl.set(200);
+//   EXPECT_EQ(200, tl.get());
+//
+// The template type argument T must have a public copy constructor.
+// In addition, the default ThreadLocal constructor requires T to have
+// a public default constructor.
+//
+// The users of a TheadLocal instance have to make sure that all but one
+// threads (including the main one) using that instance have exited before
+// destroying it. Otherwise, the per-thread objects managed for them by the
+// ThreadLocal instance are not guaranteed to be destroyed on all platforms.
+//
+// Google Test only uses global ThreadLocal objects.  That means they
+// will die after main() has returned.  Therefore, no per-thread
+// object managed by Google Test will be leaked as long as all threads
+// using Google Test have exited when main() returns.
+template <typename T>
+class ThreadLocal : public ThreadLocalBase {
  public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
-  Field(Tuple& t) { return t.f8_; }  // NOLINT
+  ThreadLocal() : default_factory_(new DefaultValueHolderFactory()) {}
+  explicit ThreadLocal(const T& value)
+      : default_factory_(new InstanceValueHolderFactory(value)) {}
 
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
-  ConstField(const Tuple& t) { return t.f8_; }
-};
+  ~ThreadLocal() { ThreadLocalRegistry::OnThreadLocalDestroyed(this); }
 
-template <>
-class Get<9> {
- public:
-  template <class Tuple>
-  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
-  Field(Tuple& t) { return t.f9_; }  // NOLINT
+  T* pointer() { return GetOrCreateValue(); }
+  const T* pointer() const { return GetOrCreateValue(); }
+  const T& get() const { return *pointer(); }
+  void set(const T& value) { *pointer() = value; }
 
-  template <class Tuple>
-  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
-  ConstField(const Tuple& t) { return t.f9_; }
-};
+ private:
+  // Holds a value of T.  Can be deleted via its base class without the caller
+  // knowing the type of T.
+  class ValueHolder : public ThreadLocalValueHolderBase {
+   public:
+    ValueHolder() : value_() {}
+    explicit ValueHolder(const T& value) : value_(value) {}
 
-}  // namespace gtest_internal
+    T* pointer() { return &value_; }
 
-template <int k, GTEST_10_TYPENAMES_(T)>
-GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
-get(GTEST_10_TUPLE_(T)& t) {
-  return gtest_internal::Get<k>::Field(t);
-}
+   private:
+    T value_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
+  };
 
-template <int k, GTEST_10_TYPENAMES_(T)>
-GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k,  GTEST_10_TUPLE_(T)))
-get(const GTEST_10_TUPLE_(T)& t) {
-  return gtest_internal::Get<k>::ConstField(t);
-}
 
-// 6.1.3.5 Relational operators
+  T* GetOrCreateValue() const {
+    return static_cast<ValueHolder*>(
+        ThreadLocalRegistry::GetValueOnCurrentThread(this))->pointer();
+  }
 
-// We only implement == and !=, as we don't have a need for the rest yet.
+  virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const {
+    return default_factory_->MakeNewHolder();
+  }
 
-namespace gtest_internal {
+  class ValueHolderFactory {
+   public:
+    ValueHolderFactory() {}
+    virtual ~ValueHolderFactory() {}
+    virtual ValueHolder* MakeNewHolder() const = 0;
 
-// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
-// first k fields of t1 equals the first k fields of t2.
-// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
-// k1 != k2.
-template <int kSize1, int kSize2>
-struct SameSizeTuplePrefixComparator;
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
+  };
 
-template <>
-struct SameSizeTuplePrefixComparator<0, 0> {
-  template <class Tuple1, class Tuple2>
-  static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
-    return true;
-  }
-};
+  class DefaultValueHolderFactory : public ValueHolderFactory {
+   public:
+    DefaultValueHolderFactory() {}
+    ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
 
-template <int k>
-struct SameSizeTuplePrefixComparator<k, k> {
-  template <class Tuple1, class Tuple2>
-  static bool Eq(const Tuple1& t1, const Tuple2& t2) {
-    return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
-        ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
-  }
-};
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
+  };
 
-}  // namespace gtest_internal
+  class InstanceValueHolderFactory : public ValueHolderFactory {
+   public:
+    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
+    ValueHolder* MakeNewHolder() const override {
+      return new ValueHolder(value_);
+    }
 
-template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
-inline bool operator==(const GTEST_10_TUPLE_(T)& t,
-                       const GTEST_10_TUPLE_(U)& u) {
-  return gtest_internal::SameSizeTuplePrefixComparator<
-      tuple_size<GTEST_10_TUPLE_(T) >::value,
-      tuple_size<GTEST_10_TUPLE_(U) >::value>::Eq(t, u);
-}
+   private:
+    const T value_;  // The value for each thread.
 
-template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
-inline bool operator!=(const GTEST_10_TUPLE_(T)& t,
-                       const GTEST_10_TUPLE_(U)& u) { return !(t == u); }
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
+  };
 
-// 6.1.4 Pairs.
-// Unimplemented.
+  std::unique_ptr<ValueHolderFactory> default_factory_;
 
-}  // namespace tr1
-}  // namespace std
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
+};
 
-#undef GTEST_0_TUPLE_
-#undef GTEST_1_TUPLE_
-#undef GTEST_2_TUPLE_
-#undef GTEST_3_TUPLE_
-#undef GTEST_4_TUPLE_
-#undef GTEST_5_TUPLE_
-#undef GTEST_6_TUPLE_
-#undef GTEST_7_TUPLE_
-#undef GTEST_8_TUPLE_
-#undef GTEST_9_TUPLE_
-#undef GTEST_10_TUPLE_
-
-#undef GTEST_0_TYPENAMES_
-#undef GTEST_1_TYPENAMES_
-#undef GTEST_2_TYPENAMES_
-#undef GTEST_3_TYPENAMES_
-#undef GTEST_4_TYPENAMES_
-#undef GTEST_5_TYPENAMES_
-#undef GTEST_6_TYPENAMES_
-#undef GTEST_7_TYPENAMES_
-#undef GTEST_8_TYPENAMES_
-#undef GTEST_9_TYPENAMES_
-#undef GTEST_10_TYPENAMES_
-
-#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
-#undef GTEST_BY_REF_
-#undef GTEST_ADD_REF_
-#undef GTEST_TUPLE_ELEMENT_
-
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
-# elif GTEST_ENV_HAS_STD_TUPLE_
-#  include <tuple>
-// C++11 puts its tuple into the ::std namespace rather than
-// ::std::tr1.  gtest expects tuple to live in ::std::tr1, so put it there.
-// This causes undefined behavior, but supported compilers react in
-// the way we intend.
-namespace std {
-namespace tr1 {
-using ::std::get;
-using ::std::make_tuple;
-using ::std::tuple;
-using ::std::tuple_element;
-using ::std::tuple_size;
-}
-}
+# elif GTEST_HAS_PTHREAD
 
-# elif GTEST_OS_SYMBIAN
-
-// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to
-// use STLport's tuple implementation, which unfortunately doesn't
-// work as the copy of STLport distributed with Symbian is incomplete.
-// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to
-// use its own tuple implementation.
-#  ifdef BOOST_HAS_TR1_TUPLE
-#   undef BOOST_HAS_TR1_TUPLE
-#  endif  // BOOST_HAS_TR1_TUPLE
-
-// This prevents <boost/tr1/detail/config.hpp>, which defines
-// BOOST_HAS_TR1_TUPLE, from being #included by Boost's <tuple>.
-#  define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED
-#  include <tuple>
-
-# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000)
-// GCC 4.0+ implements tr1/tuple in the <tr1/tuple> header.  This does
-// not conform to the TR1 spec, which requires the header to be <tuple>.
-
-#  if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
-// Until version 4.3.2, gcc has a bug that causes <tr1/functional>,
-// which is #included by <tr1/tuple>, to not compile when RTTI is
-// disabled.  _TR1_FUNCTIONAL is the header guard for
-// <tr1/functional>.  Hence the following #define is a hack to prevent
-// <tr1/functional> from being included.
-#   define _TR1_FUNCTIONAL 1
-#   include <tr1/tuple>
-#   undef _TR1_FUNCTIONAL  // Allows the user to #include
-                        // <tr1/functional> if he chooses to.
-#  else
-#   include <tr1/tuple>  // NOLINT
-#  endif  // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
+// MutexBase and Mutex implement mutex on pthreads-based platforms.
+class MutexBase {
+ public:
+  // Acquires this mutex.
+  void Lock() {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
+    owner_ = pthread_self();
+    has_owner_ = true;
+  }
 
-# else
-// If the compiler is not GCC 4.0+, we assume the user is using a
-// spec-conforming TR1 implementation.
-#  include <tuple>  // NOLINT
-# endif  // GTEST_USE_OWN_TR1_TUPLE
+  // Releases this mutex.
+  void Unlock() {
+    // Since the lock is being released the owner_ field should no longer be
+    // considered valid. We don't protect writing to has_owner_ here, as it's
+    // the caller's responsibility to ensure that the current thread holds the
+    // mutex when this is called.
+    has_owner_ = false;
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
+  }
 
-#endif  // GTEST_HAS_TR1_TUPLE
+  // Does nothing if the current thread holds the mutex. Otherwise, crashes
+  // with high probability.
+  void AssertHeld() const {
+    GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self()))
+        << "The current thread is not holding the mutex @" << this;
+  }
 
-// Determines whether clone(2) is supported.
-// Usually it will only be available on Linux, excluding
-// Linux on the Itanium architecture.
-// Also see http://linux.die.net/man/2/clone.
-#ifndef GTEST_HAS_CLONE
-// The user didn't tell us, so we need to figure it out.
+  // A static mutex may be used before main() is entered.  It may even
+  // be used before the dynamic initialization stage.  Therefore we
+  // must be able to initialize a static mutex object at link time.
+  // This means MutexBase has to be a POD and its member variables
+  // have to be public.
+ public:
+  pthread_mutex_t mutex_;  // The underlying pthread mutex.
+  // has_owner_ indicates whether the owner_ field below contains a valid thread
+  // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All
+  // accesses to the owner_ field should be protected by a check of this field.
+  // An alternative might be to memset() owner_ to all zeros, but there's no
+  // guarantee that a zero'd pthread_t is necessarily invalid or even different
+  // from pthread_self().
+  bool has_owner_;
+  pthread_t owner_;  // The thread holding the mutex.
+};
 
-# if GTEST_OS_LINUX && !defined(__ia64__)
-#  if GTEST_OS_LINUX_ANDROID
-// On Android, clone() is only available on ARM starting with Gingerbread.
-#    if defined(__arm__) && __ANDROID_API__ >= 9
-#     define GTEST_HAS_CLONE 1
-#    else
-#     define GTEST_HAS_CLONE 0
-#    endif
-#  else
-#   define GTEST_HAS_CLONE 1
-#  endif
-# else
-#  define GTEST_HAS_CLONE 0
-# endif  // GTEST_OS_LINUX && !defined(__ia64__)
+// Forward-declares a static mutex.
+#  define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+     extern ::testing::internal::MutexBase mutex
 
-#endif  // GTEST_HAS_CLONE
+// Defines and statically (i.e. at link time) initializes a static mutex.
+// The initialization list here does not explicitly initialize each field,
+// instead relying on default initialization for the unspecified fields. In
+// particular, the owner_ field (a pthread_t) is not explicitly initialized.
+// This allows initialization to work whether pthread_t is a scalar or struct.
+// The flag -Wmissing-field-initializers must not be specified for this to work.
+#define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
+  ::testing::internal::MutexBase mutex = {PTHREAD_MUTEX_INITIALIZER, false, 0}
 
-// Determines whether to support stream redirection. This is used to test
-// output correctness and to implement death tests.
-#ifndef GTEST_HAS_STREAM_REDIRECTION
-// By default, we assume that stream redirection is supported on all
-// platforms except known mobile ones.
-# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN
-#  define GTEST_HAS_STREAM_REDIRECTION 0
-# else
-#  define GTEST_HAS_STREAM_REDIRECTION 1
-# endif  // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN
-#endif  // GTEST_HAS_STREAM_REDIRECTION
+// The Mutex class can only be used for mutexes created at runtime. It
+// shares its API with MutexBase otherwise.
+class Mutex : public MutexBase {
+ public:
+  Mutex() {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, nullptr));
+    has_owner_ = false;
+  }
+  ~Mutex() {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_));
+  }
 
-// Determines whether to support death tests.
-// Google Test does not support death tests for VC 7.1 and earlier as
-// abort() in a VC 7.1 application compiled as GUI in debug config
-// pops up a dialog window that cannot be suppressed programmatically.
-#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
-     (GTEST_OS_MAC && !GTEST_OS_IOS) || GTEST_OS_IOS_SIMULATOR || \
-     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \
-     GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \
-     GTEST_OS_OPENBSD || GTEST_OS_QNX)
-# define GTEST_HAS_DEATH_TEST 1
-# include <vector>  // NOLINT
-#endif
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
+};
 
-// We don't support MSVC 7.1 with exceptions disabled now.  Therefore
-// all the compilers we care about are adequate for supporting
-// value-parameterized tests.
-#define GTEST_HAS_PARAM_TEST 1
+// We cannot name this class MutexLock because the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. That macro is used as a defensive measure to prevent against
+// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
+// "MutexLock l(&mu)".  Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+  explicit GTestMutexLock(MutexBase* mutex)
+      : mutex_(mutex) { mutex_->Lock(); }
 
-// Determines whether to support type-driven tests.
+  ~GTestMutexLock() { mutex_->Unlock(); }
 
-// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
-// Sun Pro CC, IBM Visual Age, and HP aCC support.
-#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \
-    defined(__IBMCPP__) || defined(__HP_aCC)
-# define GTEST_HAS_TYPED_TEST 1
-# define GTEST_HAS_TYPED_TEST_P 1
-#endif
+ private:
+  MutexBase* const mutex_;
 
-// Determines whether to support Combine(). This only makes sense when
-// value-parameterized tests are enabled.  The implementation doesn't
-// work on Sun Studio since it doesn't understand templated conversion
-// operators.
-#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC)
-# define GTEST_HAS_COMBINE 1
-#endif
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
+};
 
-// Determines whether the system compiler uses UTF-16 for encoding wide strings.
-#define GTEST_WIDE_STRING_USES_UTF16_ \
-    (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX)
+typedef GTestMutexLock MutexLock;
 
-// Determines whether test results can be streamed to a socket.
-#if GTEST_OS_LINUX
-# define GTEST_CAN_STREAM_RESULTS_ 1
-#endif
+// Helpers for ThreadLocal.
 
-// Defines some utility macros.
+// pthread_key_create() requires DeleteThreadLocalValue() to have
+// C-linkage.  Therefore it cannot be templatized to access
+// ThreadLocal<T>.  Hence the need for class
+// ThreadLocalValueHolderBase.
+class ThreadLocalValueHolderBase {
+ public:
+  virtual ~ThreadLocalValueHolderBase() {}
+};
 
-// The GNU compiler emits a warning if nested "if" statements are followed by
-// an "else" statement and braces are not used to explicitly disambiguate the
-// "else" binding.  This leads to problems with code like:
-//
-//   if (gate)
-//     ASSERT_*(condition) << "Some message";
-//
-// The "switch (0) case 0:" idiom is used to suppress this.
-#ifdef __INTEL_COMPILER
-# define GTEST_AMBIGUOUS_ELSE_BLOCKER_
-#else
-# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default:  // NOLINT
-#endif
+// Called by pthread to delete thread-local data stored by
+// pthread_setspecific().
+extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
+  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
+}
 
-// Use this annotation at the end of a struct/class definition to
-// prevent the compiler from optimizing away instances that are never
-// used.  This is useful when all interesting logic happens inside the
-// c'tor and / or d'tor.  Example:
-//
-//   struct Foo {
-//     Foo() { ... }
-//   } GTEST_ATTRIBUTE_UNUSED_;
-//
-// Also use it after a variable or parameter declaration to tell the
-// compiler the variable/parameter does not have to be used.
-#if defined(__GNUC__) && !defined(COMPILER_ICC)
-# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
-#else
-# define GTEST_ATTRIBUTE_UNUSED_
-#endif
+// Implements thread-local storage on pthreads-based systems.
+template <typename T>
+class GTEST_API_ ThreadLocal {
+ public:
+  ThreadLocal()
+      : key_(CreateKey()), default_factory_(new DefaultValueHolderFactory()) {}
+  explicit ThreadLocal(const T& value)
+      : key_(CreateKey()),
+        default_factory_(new InstanceValueHolderFactory(value)) {}
 
-// A macro to disallow operator=
-// This should be used in the private: declarations for a class.
-#define GTEST_DISALLOW_ASSIGN_(type)\
-  void operator=(type const &)
+  ~ThreadLocal() {
+    // Destroys the managed object for the current thread, if any.
+    DeleteThreadLocalValue(pthread_getspecific(key_));
 
-// A macro to disallow copy constructor and operator=
-// This should be used in the private: declarations for a class.
-#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\
-  type(type const &);\
-  GTEST_DISALLOW_ASSIGN_(type)
+    // Releases resources associated with the key.  This will *not*
+    // delete managed objects for other threads.
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
+  }
 
-// Tell the compiler to warn about unused return values for functions declared
-// with this macro.  The macro should be used on function declarations
-// following the argument list:
-//
-//   Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
-#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC)
-# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result))
-#else
-# define GTEST_MUST_USE_RESULT_
-#endif  // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC
+  T* pointer() { return GetOrCreateValue(); }
+  const T* pointer() const { return GetOrCreateValue(); }
+  const T& get() const { return *pointer(); }
+  void set(const T& value) { *pointer() = value; }
 
-// Determine whether the compiler supports Microsoft's Structured Exception
-// Handling.  This is supported by several Windows compilers but generally
-// does not exist on any other system.
-#ifndef GTEST_HAS_SEH
-// The user didn't tell us, so we need to figure it out.
+ private:
+  // Holds a value of type T.
+  class ValueHolder : public ThreadLocalValueHolderBase {
+   public:
+    ValueHolder() : value_() {}
+    explicit ValueHolder(const T& value) : value_(value) {}
 
-# if defined(_MSC_VER) || defined(__BORLANDC__)
-// These two compilers are known to support SEH.
-#  define GTEST_HAS_SEH 1
-# else
-// Assume no SEH.
-#  define GTEST_HAS_SEH 0
-# endif
+    T* pointer() { return &value_; }
 
-#endif  // GTEST_HAS_SEH
+   private:
+    T value_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
+  };
 
-#ifdef _MSC_VER
+  static pthread_key_t CreateKey() {
+    pthread_key_t key;
+    // When a thread exits, DeleteThreadLocalValue() will be called on
+    // the object managed for that thread.
+    GTEST_CHECK_POSIX_SUCCESS_(
+        pthread_key_create(&key, &DeleteThreadLocalValue));
+    return key;
+  }
 
-# if GTEST_LINKED_AS_SHARED_LIBRARY
-#  define GTEST_API_ __declspec(dllimport)
-# elif GTEST_CREATE_SHARED_LIBRARY
-#  define GTEST_API_ __declspec(dllexport)
-# endif
+  T* GetOrCreateValue() const {
+    ThreadLocalValueHolderBase* const holder =
+        static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
+    if (holder != nullptr) {
+      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
+    }
 
-#endif  // _MSC_VER
+    ValueHolder* const new_holder = default_factory_->MakeNewHolder();
+    ThreadLocalValueHolderBase* const holder_base = new_holder;
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
+    return new_holder->pointer();
+  }
 
-#ifndef GTEST_API_
-# define GTEST_API_
-#endif
+  class ValueHolderFactory {
+   public:
+    ValueHolderFactory() {}
+    virtual ~ValueHolderFactory() {}
+    virtual ValueHolder* MakeNewHolder() const = 0;
 
-#ifdef __GNUC__
-// Ask the compiler to never inline a given function.
-# define GTEST_NO_INLINE_ __attribute__((noinline))
-#else
-# define GTEST_NO_INLINE_
-#endif
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
+  };
 
-// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
-#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION)
-# define GTEST_HAS_CXXABI_H_ 1
-#else
-# define GTEST_HAS_CXXABI_H_ 0
-#endif
+  class DefaultValueHolderFactory : public ValueHolderFactory {
+   public:
+    DefaultValueHolderFactory() {}
+    ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
 
-namespace testing {
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
+  };
 
-class Message;
+  class InstanceValueHolderFactory : public ValueHolderFactory {
+   public:
+    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
+    ValueHolder* MakeNewHolder() const override {
+      return new ValueHolder(value_);
+    }
 
-namespace internal {
+   private:
+    const T value_;  // The value for each thread.
 
-// A secret type that Google Test users don't know about.  It has no
-// definition on purpose.  Therefore it's impossible to create a
-// Secret object, which is what we want.
-class Secret;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
+  };
 
-// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time
-// expression is true. For example, you could use it to verify the
-// size of a static array:
-//
-//   GTEST_COMPILE_ASSERT_(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
-//                         content_type_names_incorrect_size);
-//
-// or to make sure a struct is smaller than a certain size:
-//
-//   GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large);
-//
-// The second argument to the macro is the name of the variable. If
-// the expression is false, most compilers will issue a warning/error
-// containing the name of the variable.
+  // A key pthreads uses for looking up per-thread values.
+  const pthread_key_t key_;
+  std::unique_ptr<ValueHolderFactory> default_factory_;
 
-template <bool>
-struct CompileAssert {
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
 };
 
-#define GTEST_COMPILE_ASSERT_(expr, msg) \
-  typedef ::testing::internal::CompileAssert<(static_cast<bool>(expr))> \
-      msg[static_cast<bool>(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_
+# endif  // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
 
-// Implementation details of GTEST_COMPILE_ASSERT_:
-//
-// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1
-//   elements (and thus is invalid) when the expression is false.
-//
-// - The simpler definition
-//
-//    #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1]
-//
-//   does not work, as gcc supports variable-length arrays whose sizes
-//   are determined at run-time (this is gcc's extension and not part
-//   of the C++ standard).  As a result, gcc fails to reject the
-//   following code with the simple definition:
-//
-//     int foo;
-//     GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is
-//                                      // not a compile-time constant.
-//
-// - By using the type CompileAssert<(bool(expr))>, we ensures that
-//   expr is a compile-time constant.  (Template arguments must be
-//   determined at compile-time.)
-//
-// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
-//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
-//
-//     CompileAssert<bool(expr)>
-//
-//   instead, these compilers will refuse to compile
-//
-//     GTEST_COMPILE_ASSERT_(5 > 0, some_message);
-//
-//   (They seem to think the ">" in "5 > 0" marks the end of the
-//   template argument list.)
-//
-// - The array size is (bool(expr) ? 1 : -1), instead of simply
-//
-//     ((expr) ? 1 : -1).
-//
-//   This is to avoid running into a bug in MS VC 7.1, which
-//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
+#else  // GTEST_IS_THREADSAFE
 
-// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h.
-//
-// This template is declared, but intentionally undefined.
-template <typename T1, typename T2>
-struct StaticAssertTypeEqHelper;
+// A dummy implementation of synchronization primitives (mutex, lock,
+// and thread-local variable).  Necessary for compiling Google Test where
+// mutex is not supported - using Google Test in multiple threads is not
+// supported on such platforms.
 
-template <typename T>
-struct StaticAssertTypeEqHelper<T, T> {};
+class Mutex {
+ public:
+  Mutex() {}
+  void Lock() {}
+  void Unlock() {}
+  void AssertHeld() const {}
+};
 
-#if GTEST_HAS_GLOBAL_STRING
-typedef ::string string;
-#else
-typedef ::std::string string;
-#endif  // GTEST_HAS_GLOBAL_STRING
+# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+  extern ::testing::internal::Mutex mutex
 
-#if GTEST_HAS_GLOBAL_WSTRING
-typedef ::wstring wstring;
-#elif GTEST_HAS_STD_WSTRING
-typedef ::std::wstring wstring;
-#endif  // GTEST_HAS_GLOBAL_WSTRING
+# define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex
 
-// A helper for suppressing warnings on constant condition.  It just
-// returns 'condition'.
-GTEST_API_ bool IsTrue(bool condition);
+// We cannot name this class MutexLock because the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. That macro is used as a defensive measure to prevent against
+// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
+// "MutexLock l(&mu)".  Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+  explicit GTestMutexLock(Mutex*) {}  // NOLINT
+};
 
-// Defines scoped_ptr.
+typedef GTestMutexLock MutexLock;
 
-// This implementation of scoped_ptr is PARTIAL - it only contains
-// enough stuff to satisfy Google Test's need.
 template <typename T>
-class scoped_ptr {
+class GTEST_API_ ThreadLocal {
  public:
-  typedef T element_type;
-
-  explicit scoped_ptr(T* p = NULL) : ptr_(p) {}
-  ~scoped_ptr() { reset(); }
-
-  T& operator*() const { return *ptr_; }
-  T* operator->() const { return ptr_; }
-  T* get() const { return ptr_; }
-
-  T* release() {
-    T* const ptr = ptr_;
-    ptr_ = NULL;
-    return ptr;
-  }
-
-  void reset(T* p = NULL) {
-    if (p != ptr_) {
-      if (IsTrue(sizeof(T) > 0)) {  // Makes sure T is a complete type.
-        delete ptr_;
-      }
-      ptr_ = p;
-    }
-  }
-
+  ThreadLocal() : value_() {}
+  explicit ThreadLocal(const T& value) : value_(value) {}
+  T* pointer() { return &value_; }
+  const T* pointer() const { return &value_; }
+  const T& get() const { return value_; }
+  void set(const T& value) { value_ = value; }
  private:
-  T* ptr_;
-
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
+  T value_;
 };
 
-// Defines RE.
-
-// A simple C++ wrapper for <regex.h>.  It uses the POSIX Extended
-// Regular Expression syntax.
-class GTEST_API_ RE {
- public:
-  // A copy constructor is required by the Standard to initialize object
-  // references from r-values.
-  RE(const RE& other) { Init(other.pattern()); }
-
-  // Constructs an RE from a string.
-  RE(const ::std::string& regex) { Init(regex.c_str()); }  // NOLINT
-
-#if GTEST_HAS_GLOBAL_STRING
+#endif  // GTEST_IS_THREADSAFE
 
-  RE(const ::string& regex) { Init(regex.c_str()); }  // NOLINT
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+GTEST_API_ size_t GetThreadCount();
 
-#endif  // GTEST_HAS_GLOBAL_STRING
+#if GTEST_OS_WINDOWS
+# define GTEST_PATH_SEP_ "\\"
+# define GTEST_HAS_ALT_PATH_SEP_ 1
+#else
+# define GTEST_PATH_SEP_ "/"
+# define GTEST_HAS_ALT_PATH_SEP_ 0
+#endif  // GTEST_OS_WINDOWS
 
-  RE(const char* regex) { Init(regex); }  // NOLINT
-  ~RE();
+// Utilities for char.
 
-  // Returns the string representation of the regex.
-  const char* pattern() const { return pattern_; }
+// isspace(int ch) and friends accept an unsigned char or EOF.  char
+// may be signed, depending on the compiler (or compiler flags).
+// Therefore we need to cast a char to unsigned char before calling
+// isspace(), etc.
 
-  // FullMatch(str, re) returns true iff regular expression re matches
-  // the entire str.
-  // PartialMatch(str, re) returns true iff regular expression re
-  // matches a substring of str (including str itself).
-  //
-  // TODO(wan@google.com): make FullMatch() and PartialMatch() work
-  // when str contains NUL characters.
-  static bool FullMatch(const ::std::string& str, const RE& re) {
-    return FullMatch(str.c_str(), re);
-  }
-  static bool PartialMatch(const ::std::string& str, const RE& re) {
-    return PartialMatch(str.c_str(), re);
-  }
+inline bool IsAlpha(char ch) {
+  return isalpha(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsAlNum(char ch) {
+  return isalnum(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsDigit(char ch) {
+  return isdigit(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsLower(char ch) {
+  return islower(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsSpace(char ch) {
+  return isspace(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsUpper(char ch) {
+  return isupper(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsXDigit(char ch) {
+  return isxdigit(static_cast<unsigned char>(ch)) != 0;
+}
+#ifdef __cpp_char8_t
+inline bool IsXDigit(char8_t ch) {
+  return isxdigit(static_cast<unsigned char>(ch)) != 0;
+}
+#endif
+inline bool IsXDigit(char16_t ch) {
+  const unsigned char low_byte = static_cast<unsigned char>(ch);
+  return ch == low_byte && isxdigit(low_byte) != 0;
+}
+inline bool IsXDigit(char32_t ch) {
+  const unsigned char low_byte = static_cast<unsigned char>(ch);
+  return ch == low_byte && isxdigit(low_byte) != 0;
+}
+inline bool IsXDigit(wchar_t ch) {
+  const unsigned char low_byte = static_cast<unsigned char>(ch);
+  return ch == low_byte && isxdigit(low_byte) != 0;
+}
 
-#if GTEST_HAS_GLOBAL_STRING
+inline char ToLower(char ch) {
+  return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
+}
+inline char ToUpper(char ch) {
+  return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
+}
 
-  static bool FullMatch(const ::string& str, const RE& re) {
-    return FullMatch(str.c_str(), re);
-  }
-  static bool PartialMatch(const ::string& str, const RE& re) {
-    return PartialMatch(str.c_str(), re);
-  }
+inline std::string StripTrailingSpaces(std::string str) {
+  std::string::iterator it = str.end();
+  while (it != str.begin() && IsSpace(*--it))
+    it = str.erase(it);
+  return str;
+}
 
-#endif  // GTEST_HAS_GLOBAL_STRING
+// The testing::internal::posix namespace holds wrappers for common
+// POSIX functions.  These wrappers hide the differences between
+// Windows/MSVC and POSIX systems.  Since some compilers define these
+// standard functions as macros, the wrapper cannot have the same name
+// as the wrapped function.
 
-  static bool FullMatch(const char* str, const RE& re);
-  static bool PartialMatch(const char* str, const RE& re);
+namespace posix {
 
- private:
-  void Init(const char* regex);
+// Functions with a different name on Windows.
 
-  // We use a const char* instead of an std::string, as Google Test used to be
-  // used where std::string is not available.  TODO(wan@google.com): change to
-  // std::string.
-  const char* pattern_;
-  bool is_valid_;
+#if GTEST_OS_WINDOWS
 
-#if GTEST_USES_POSIX_RE
+typedef struct _stat StatStruct;
 
-  regex_t full_regex_;     // For FullMatch().
-  regex_t partial_regex_;  // For PartialMatch().
+# ifdef __BORLANDC__
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+# else  // !__BORLANDC__
+#  if GTEST_OS_WINDOWS_MOBILE
+inline int DoIsATTY(int /* fd */) { return 0; }
+#  else
+inline int DoIsATTY(int fd) { return _isatty(fd); }
+#  endif  // GTEST_OS_WINDOWS_MOBILE
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return _stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return _strdup(src); }
+# endif  // __BORLANDC__
 
-#else  // GTEST_USES_SIMPLE_RE
+# if GTEST_OS_WINDOWS_MOBILE
+inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
+// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
+// time and thus not defined there.
+# else
+inline int FileNo(FILE* file) { return _fileno(file); }
+inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
+inline int RmDir(const char* dir) { return _rmdir(dir); }
+inline bool IsDir(const StatStruct& st) {
+  return (_S_IFDIR & st.st_mode) != 0;
+}
+# endif  // GTEST_OS_WINDOWS_MOBILE
 
-  const char* full_pattern_;  // For FullMatch();
+#elif GTEST_OS_ESP8266
+typedef struct stat StatStruct;
 
-#endif
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) {
+  // stat function not implemented on ESP8266
+  return 0;
+}
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return strcasecmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
 
-  GTEST_DISALLOW_ASSIGN_(RE);
-};
+#else
 
-// Formats a source file path and a line number as they would appear
-// in an error message from the compiler used to compile this code.
-GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);
+typedef struct stat StatStruct;
 
-// Formats a file location for compiler-independent XML output.
-// Although this function is not platform dependent, we put it next to
-// FormatFileLocation in order to contrast the two functions.
-GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
-                                                               int line);
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return strcasecmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
 
-// Defines logging utilities:
-//   GTEST_LOG_(severity) - logs messages at the specified severity level. The
-//                          message itself is streamed into the macro.
-//   LogToStderr()  - directs all log messages to stderr.
-//   FlushInfoLog() - flushes informational log messages.
+#endif  // GTEST_OS_WINDOWS
 
-enum GTestLogSeverity {
-  GTEST_INFO,
-  GTEST_WARNING,
-  GTEST_ERROR,
-  GTEST_FATAL
-};
+inline int IsATTY(int fd) {
+  // DoIsATTY might change errno (for example ENOTTY in case you redirect stdout
+  // to a file on Linux), which is unexpected, so save the previous value, and
+  // restore it after the call.
+  int savedErrno = errno;
+  int isAttyValue = DoIsATTY(fd);
+  errno = savedErrno;
 
-// Formats log entry severity, provides a stream object for streaming the
-// log message, and terminates the message with a newline when going out of
-// scope.
-class GTEST_API_ GTestLog {
- public:
-  GTestLog(GTestLogSeverity severity, const char* file, int line);
+  return isAttyValue;
+}
 
-  // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
-  ~GTestLog();
+// Functions deprecated by MSVC 8.0.
 
-  ::std::ostream& GetStream() { return ::std::cerr; }
+GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
 
- private:
-  const GTestLogSeverity severity_;
+// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
+// StrError() aren't needed on Windows CE at this time and thus not
+// defined there.
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
-};
+#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && \
+    !GTEST_OS_WINDOWS_RT && !GTEST_OS_ESP8266 && !GTEST_OS_XTENSA
+inline int ChDir(const char* dir) { return chdir(dir); }
+#endif
+inline FILE* FOpen(const char* path, const char* mode) {
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+  struct wchar_codecvt : public std::codecvt<wchar_t, char, std::mbstate_t> {};
+  std::wstring_convert<wchar_codecvt> converter;
+  std::wstring wide_path = converter.from_bytes(path);
+  std::wstring wide_mode = converter.from_bytes(mode);
+  return _wfopen(wide_path.c_str(), wide_mode.c_str());
+#else  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+  return fopen(path, mode);
+#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+}
+#if !GTEST_OS_WINDOWS_MOBILE
+inline FILE *FReopen(const char* path, const char* mode, FILE* stream) {
+  return freopen(path, mode, stream);
+}
+inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
+#endif
+inline int FClose(FILE* fp) { return fclose(fp); }
+#if !GTEST_OS_WINDOWS_MOBILE
+inline int Read(int fd, void* buf, unsigned int count) {
+  return static_cast<int>(read(fd, buf, count));
+}
+inline int Write(int fd, const void* buf, unsigned int count) {
+  return static_cast<int>(write(fd, buf, count));
+}
+inline int Close(int fd) { return close(fd); }
+inline const char* StrError(int errnum) { return strerror(errnum); }
+#endif
+inline const char* GetEnv(const char* name) {
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
+    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_XTENSA
+  // We are on an embedded platform, which has no environment variables.
+  static_cast<void>(name);  // To prevent 'unused argument' warning.
+  return nullptr;
+#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
+  // Environment variables which we programmatically clear will be set to the
+  // empty string rather than unset (NULL).  Handle that case.
+  const char* const env = getenv(name);
+  return (env != nullptr && env[0] != '\0') ? env : nullptr;
+#else
+  return getenv(name);
+#endif
+}
 
-#define GTEST_LOG_(severity) \
-    ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
-                                  __FILE__, __LINE__).GetStream()
+GTEST_DISABLE_MSC_DEPRECATED_POP_()
 
-inline void LogToStderr() {}
-inline void FlushInfoLog() { fflush(NULL); }
+#if GTEST_OS_WINDOWS_MOBILE
+// Windows CE has no C library. The abort() function is used in
+// several places in Google Test. This implementation provides a reasonable
+// imitation of standard behaviour.
+[[noreturn]] void Abort();
+#else
+[[noreturn]] inline void Abort() { abort(); }
+#endif  // GTEST_OS_WINDOWS_MOBILE
 
-// INTERNAL IMPLEMENTATION - DO NOT USE.
-//
-// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
-// is not satisfied.
-//  Synopsys:
-//    GTEST_CHECK_(boolean_condition);
-//     or
-//    GTEST_CHECK_(boolean_condition) << "Additional message";
+}  // namespace posix
+
+// MSVC "deprecates" snprintf and issues warnings wherever it is used.  In
+// order to avoid these warnings, we need to use _snprintf or _snprintf_s on
+// MSVC-based platforms.  We map the GTEST_SNPRINTF_ macro to the appropriate
+// function in order to achieve that.  We use macro definition here because
+// snprintf is a variadic function.
+#if _MSC_VER && !GTEST_OS_WINDOWS_MOBILE
+// MSVC 2005 and above support variadic macros.
+# define GTEST_SNPRINTF_(buffer, size, format, ...) \
+     _snprintf_s(buffer, size, size, format, __VA_ARGS__)
+#elif defined(_MSC_VER)
+// Windows CE does not define _snprintf_s
+# define GTEST_SNPRINTF_ _snprintf
+#else
+# define GTEST_SNPRINTF_ snprintf
+#endif
+
+// The biggest signed integer type the compiler supports.
 //
-//    This checks the condition and if the condition is not satisfied
-//    it prints message about the condition violation, including the
-//    condition itself, plus additional message streamed into it, if any,
-//    and then it aborts the program. It aborts the program irrespective of
-//    whether it is built in the debug mode or not.
-#define GTEST_CHECK_(condition) \
-    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-    if (::testing::internal::IsTrue(condition)) \
-      ; \
-    else \
-      GTEST_LOG_(FATAL) << "Condition " #condition " failed. "
+// long long is guaranteed to be at least 64-bits in C++11.
+using BiggestInt = long long;  // NOLINT
 
-// An all-mode assert to verify that the given POSIX-style function
-// call returns 0 (indicating success).  Known limitation: this
-// doesn't expand to a balanced 'if' statement, so enclose the macro
-// in {} if you need to use it as the only statement in an 'if'
-// branch.
-#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
-  if (const int gtest_error = (posix_call)) \
-    GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
-                      << gtest_error
+// The maximum number a BiggestInt can represent.
+constexpr BiggestInt kMaxBiggestInt = (std::numeric_limits<BiggestInt>::max)();
 
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// This template class serves as a compile-time function from size to
+// type.  It maps a size in bytes to a primitive type with that
+// size. e.g.
 //
-// Use ImplicitCast_ as a safe version of static_cast for upcasting in
-// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
-// const Foo*).  When you use ImplicitCast_, the compiler checks that
-// the cast is safe.  Such explicit ImplicitCast_s are necessary in
-// surprisingly many situations where C++ demands an exact type match
-// instead of an argument type convertible to a target type.
+//   TypeWithSize<4>::UInt
 //
-// The syntax for using ImplicitCast_ is the same as for static_cast:
+// is typedef-ed to be unsigned int (unsigned integer made up of 4
+// bytes).
 //
-//   ImplicitCast_<ToType>(expr)
+// Such functionality should belong to STL, but I cannot find it
+// there.
 //
-// ImplicitCast_ would have been part of the C++ standard library,
-// but the proposal was submitted too late.  It will probably make
-// its way into the language in the future.
+// Google Test uses this class in the implementation of floating-point
+// comparison.
 //
-// This relatively ugly name is intentional. It prevents clashes with
-// similar functions users may have (e.g., implicit_cast). The internal
-// namespace alone is not enough because the function can be found by ADL.
-template<typename To>
-inline To ImplicitCast_(To x) { return x; }
+// For now it only handles UInt (unsigned int) as that's all Google Test
+// needs.  Other types can be easily added in the future if need
+// arises.
+template <size_t size>
+class TypeWithSize {
+ public:
+  // This prevents the user from using TypeWithSize<N> with incorrect
+  // values of N.
+  using UInt = void;
+};
 
-// When you upcast (that is, cast a pointer from type Foo to type
-// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
-// always succeed.  When you downcast (that is, cast a pointer from
-// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
-// how do you know the pointer is really of type SubclassOfFoo?  It
-// could be a bare Foo, or of type DifferentSubclassOfFoo.  Thus,
-// when you downcast, you should use this macro.  In debug mode, we
-// use dynamic_cast<> to double-check the downcast is legal (we die
-// if it's not).  In normal mode, we do the efficient static_cast<>
-// instead.  Thus, it's important to test in debug mode to make sure
-// the cast is legal!
-//    This is the only place in the code we should use dynamic_cast<>.
-// In particular, you SHOULDN'T be using dynamic_cast<> in order to
-// do RTTI (eg code like this:
-//    if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
-//    if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
-// You should design the code some other way not to need this.
-//
-// This relatively ugly name is intentional. It prevents clashes with
-// similar functions users may have (e.g., down_cast). The internal
-// namespace alone is not enough because the function can be found by ADL.
-template<typename To, typename From>  // use like this: DownCast_<T*>(foo);
-inline To DownCast_(From* f) {  // so we only accept pointers
-  // Ensures that To is a sub-type of From *.  This test is here only
-  // for compile-time type checking, and has no overhead in an
-  // optimized build at run-time, as it will be optimized away
-  // completely.
-  if (false) {
-    const To to = NULL;
-    ::testing::internal::ImplicitCast_<From*>(to);
-  }
+// The specialization for size 4.
+template <>
+class TypeWithSize<4> {
+ public:
+  using Int = std::int32_t;
+  using UInt = std::uint32_t;
+};
 
-#if GTEST_HAS_RTTI
-  // RTTI: debug mode only!
-  GTEST_CHECK_(f == NULL || dynamic_cast<To>(f) != NULL);
-#endif
-  return static_cast<To>(f);
-}
+// The specialization for size 8.
+template <>
+class TypeWithSize<8> {
+ public:
+  using Int = std::int64_t;
+  using UInt = std::uint64_t;
+};
 
-// Downcasts the pointer of type Base to Derived.
-// Derived must be a subclass of Base. The parameter MUST
-// point to a class of type Derived, not any subclass of it.
-// When RTTI is available, the function performs a runtime
-// check to enforce this.
-template <class Derived, class Base>
-Derived* CheckedDowncastToActualType(Base* base) {
-#if GTEST_HAS_RTTI
-  GTEST_CHECK_(typeid(*base) == typeid(Derived));
-  return dynamic_cast<Derived*>(base);  // NOLINT
-#else
-  return static_cast<Derived*>(base);  // Poor man's downcast.
-#endif
-}
+// Integer types of known sizes.
+using TimeInMillis = int64_t;  // Represents time in milliseconds.
 
-#if GTEST_HAS_STREAM_REDIRECTION
+// Utilities for command line flags and environment variables.
 
-// Defines the stderr capturer:
-//   CaptureStdout     - starts capturing stdout.
-//   GetCapturedStdout - stops capturing stdout and returns the captured string.
-//   CaptureStderr     - starts capturing stderr.
-//   GetCapturedStderr - stops capturing stderr and returns the captured string.
-//
-GTEST_API_ void CaptureStdout();
-GTEST_API_ std::string GetCapturedStdout();
-GTEST_API_ void CaptureStderr();
-GTEST_API_ std::string GetCapturedStderr();
+// Macro for referencing flags.
+#if !defined(GTEST_FLAG)
+# define GTEST_FLAG(name) FLAGS_gtest_##name
+#endif  // !defined(GTEST_FLAG)
 
-#endif  // GTEST_HAS_STREAM_REDIRECTION
+#if !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)
+# define GTEST_USE_OWN_FLAGFILE_FLAG_ 1
+#endif  // !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)
 
+#if !defined(GTEST_DECLARE_bool_)
+# define GTEST_FLAG_SAVER_ ::testing::internal::GTestFlagSaver
 
-#if GTEST_HAS_DEATH_TEST
+// Macros for declaring flags.
+# define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
+# define GTEST_DECLARE_int32_(name) \
+    GTEST_API_ extern std::int32_t GTEST_FLAG(name)
+# define GTEST_DECLARE_string_(name) \
+    GTEST_API_ extern ::std::string GTEST_FLAG(name)
 
-const ::std::vector<testing::internal::string>& GetInjectableArgvs();
-void SetInjectableArgvs(const ::std::vector<testing::internal::string>*
-                             new_argvs);
+// Macros for defining flags.
+# define GTEST_DEFINE_bool_(name, default_val, doc) \
+    GTEST_API_ bool GTEST_FLAG(name) = (default_val)
+# define GTEST_DEFINE_int32_(name, default_val, doc) \
+    GTEST_API_ std::int32_t GTEST_FLAG(name) = (default_val)
+# define GTEST_DEFINE_string_(name, default_val, doc) \
+    GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val)
 
-// A copy of all command line arguments.  Set by InitGoogleTest().
-extern ::std::vector<testing::internal::string> g_argvs;
+#endif  // !defined(GTEST_DECLARE_bool_)
 
-#endif  // GTEST_HAS_DEATH_TEST
+// Thread annotations
+#if !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
+# define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
+# define GTEST_LOCK_EXCLUDED_(locks)
+#endif  // !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
 
-// Defines synchronization primitives.
+// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
+// to *value and returns true; otherwise leaves *value unchanged and returns
+// false.
+GTEST_API_ bool ParseInt32(const Message& src_text, const char* str,
+                           int32_t* value);
 
-#if GTEST_HAS_PTHREAD
+// Parses a bool/int32_t/string from the environment variable
+// corresponding to the given Google Test flag.
+bool BoolFromGTestEnv(const char* flag, bool default_val);
+GTEST_API_ int32_t Int32FromGTestEnv(const char* flag, int32_t default_val);
+std::string OutputFlagAlsoCheckEnvVar();
+const char* StringFromGTestEnv(const char* flag, const char* default_val);
 
-// Sleeps for (roughly) n milli-seconds.  This function is only for
-// testing Google Test's own constructs.  Don't use it in user tests,
-// either directly or indirectly.
-inline void SleepMilliseconds(int n) {
-  const timespec time = {
-    0,                  // 0 seconds.
-    n * 1000L * 1000L,  // And n ms.
-  };
-  nanosleep(&time, NULL);
-}
+}  // namespace internal
+}  // namespace testing
 
-// Allows a controller thread to pause execution of newly created
-// threads until notified.  Instances of this class must be created
-// and destroyed in the controller thread.
-//
-// This class is only for testing Google Test's own constructs. Do not
-// use it in user tests, either directly or indirectly.
-class Notification {
- public:
-  Notification() : notified_(false) {
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
-  }
-  ~Notification() {
-    pthread_mutex_destroy(&mutex_);
-  }
+#if !defined(GTEST_INTERNAL_DEPRECATED)
+
+// Internal Macro to mark an API deprecated, for googletest usage only
+// Usage: class GTEST_INTERNAL_DEPRECATED(message) MyClass or
+// GTEST_INTERNAL_DEPRECATED(message) <return_type> myFunction(); Every usage of
+// a deprecated entity will trigger a warning when compiled with
+// `-Wdeprecated-declarations` option (clang, gcc, any __GNUC__ compiler).
+// For msvc /W3 option will need to be used
+// Note that for 'other' compilers this macro evaluates to nothing to prevent
+// compilations errors.
+#if defined(_MSC_VER)
+#define GTEST_INTERNAL_DEPRECATED(message) __declspec(deprecated(message))
+#elif defined(__GNUC__)
+#define GTEST_INTERNAL_DEPRECATED(message) __attribute__((deprecated(message)))
+#else
+#define GTEST_INTERNAL_DEPRECATED(message)
+#endif
 
-  // Notifies all threads created with this notification to start. Must
-  // be called from the controller thread.
-  void Notify() {
-    pthread_mutex_lock(&mutex_);
-    notified_ = true;
-    pthread_mutex_unlock(&mutex_);
-  }
+#endif  // !defined(GTEST_INTERNAL_DEPRECATED)
 
-  // Blocks until the controller thread notifies. Must be called from a test
-  // thread.
-  void WaitForNotification() {
-    for (;;) {
-      pthread_mutex_lock(&mutex_);
-      const bool notified = notified_;
-      pthread_mutex_unlock(&mutex_);
-      if (notified)
-        break;
-      SleepMilliseconds(10);
-    }
-  }
+#if GTEST_HAS_ABSL
+// Always use absl::any for UniversalPrinter<> specializations if googletest
+// is built with absl support.
+#define GTEST_INTERNAL_HAS_ANY 1
+#include "absl/types/any.h"
+namespace testing {
+namespace internal {
+using Any = ::absl::any;
+}  // namespace internal
+}  // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<any>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::any for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_ANY 1
+#include <any>
+namespace testing {
+namespace internal {
+using Any = ::std::any;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::any is not
+// supported.
+#endif  // __has_include(<any>) && __cplusplus >= 201703L
+#endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::optional for UniversalPrinter<> specializations if
+// googletest is built with absl support.
+#define GTEST_INTERNAL_HAS_OPTIONAL 1
+#include "absl/types/optional.h"
+namespace testing {
+namespace internal {
+template <typename T>
+using Optional = ::absl::optional<T>;
+}  // namespace internal
+}  // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<optional>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::optional for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_OPTIONAL 1
+#include <optional>
+namespace testing {
+namespace internal {
+template <typename T>
+using Optional = ::std::optional<T>;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::optional is not
+// supported.
+#endif  // __has_include(<optional>) && __cplusplus >= 201703L
+#endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::string_view for Matcher<> specializations if googletest
+// is built with absl support.
+# define GTEST_INTERNAL_HAS_STRING_VIEW 1
+#include "absl/strings/string_view.h"
+namespace testing {
+namespace internal {
+using StringView = ::absl::string_view;
+}  // namespace internal
+}  // namespace testing
+#else
+# ifdef __has_include
+#   if __has_include(<string_view>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::string_view for Matcher<>
+// specializations.
+#   define GTEST_INTERNAL_HAS_STRING_VIEW 1
+#include <string_view>
+namespace testing {
+namespace internal {
+using StringView = ::std::string_view;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::string_view is not
+// supported.
+#  endif  // __has_include(<string_view>) && __cplusplus >= 201703L
+# endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::variant for UniversalPrinter<> specializations if googletest
+// is built with absl support.
+#define GTEST_INTERNAL_HAS_VARIANT 1
+#include "absl/types/variant.h"
+namespace testing {
+namespace internal {
+template <typename... T>
+using Variant = ::absl::variant<T...>;
+}  // namespace internal
+}  // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<variant>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::variant for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_VARIANT 1
+#include <variant>
+namespace testing {
+namespace internal {
+template <typename... T>
+using Variant = ::std::variant<T...>;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::variant is not supported.
+#endif  // __has_include(<variant>) && __cplusplus >= 201703L
+#endif  // __has_include
+#endif  // GTEST_HAS_ABSL
 
- private:
-  pthread_mutex_t mutex_;
-  bool notified_;
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
-};
+#if GTEST_OS_LINUX
+# include <stdlib.h>
+# include <sys/types.h>
+# include <sys/wait.h>
+# include <unistd.h>
+#endif  // GTEST_OS_LINUX
 
-// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
-// Consequently, it cannot select a correct instantiation of ThreadWithParam
-// in order to call its Run(). Introducing ThreadWithParamBase as a
-// non-templated base class for ThreadWithParam allows us to bypass this
-// problem.
-class ThreadWithParamBase {
- public:
-  virtual ~ThreadWithParamBase() {}
-  virtual void Run() = 0;
-};
+#if GTEST_HAS_EXCEPTIONS
+# include <stdexcept>
+#endif
 
-// pthread_create() accepts a pointer to a function type with the C linkage.
-// According to the Standard (7.5/1), function types with different linkages
-// are different even if they are otherwise identical.  Some compilers (for
-// example, SunStudio) treat them as different types.  Since class methods
-// cannot be defined with C-linkage we need to define a free C-function to
-// pass into pthread_create().
-extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
-  static_cast<ThreadWithParamBase*>(thread)->Run();
-  return NULL;
-}
+#include <ctype.h>
+#include <float.h>
+#include <string.h>
+#include <cstdint>
+#include <iomanip>
+#include <limits>
+#include <map>
+#include <set>
+#include <string>
+#include <type_traits>
+#include <vector>
 
-// Helper class for testing Google Test's multi-threading constructs.
-// To use it, write:
+// Copyright 2005, Google Inc.
+// All rights reserved.
 //
-//   void ThreadFunc(int param) { /* Do things with param */ }
-//   Notification thread_can_start;
-//   ...
-//   // The thread_can_start parameter is optional; you can supply NULL.
-//   ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
-//   thread_can_start.Notify();
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
 //
-// These classes are only for testing Google Test's own constructs. Do
-// not use them in user tests, either directly or indirectly.
-template <typename T>
-class ThreadWithParam : public ThreadWithParamBase {
- public:
-  typedef void (*UserThreadFunc)(T);
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-  ThreadWithParam(
-      UserThreadFunc func, T param, Notification* thread_can_start)
-      : func_(func),
-        param_(param),
-        thread_can_start_(thread_can_start),
-        finished_(false) {
-    ThreadWithParamBase* const base = this;
-    // The thread can be created only after all fields except thread_
-    // have been initialized.
-    GTEST_CHECK_POSIX_SUCCESS_(
-        pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base));
-  }
-  ~ThreadWithParam() { Join(); }
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the Message class.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
+// program!
 
-  void Join() {
-    if (!finished_) {
-      GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0));
-      finished_ = true;
-    }
-  }
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-  virtual void Run() {
-    if (thread_can_start_ != NULL)
-      thread_can_start_->WaitForNotification();
-    func_(param_);
-  }
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
 
- private:
-  const UserThreadFunc func_;  // User-supplied thread function.
-  const T param_;  // User-supplied parameter to the thread function.
-  // When non-NULL, used to block execution until the controller thread
-  // notifies.
-  Notification* const thread_can_start_;
-  bool finished_;  // true iff we know that the thread function has finished.
-  pthread_t thread_;  // The native thread object.
+#include <limits>
+#include <memory>
+#include <sstream>
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
-};
 
-// MutexBase and Mutex implement mutex on pthreads-based platforms. They
-// are used in conjunction with class MutexLock:
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// Ensures that there is at least one operator<< in the global namespace.
+// See Message& operator<<(...) below for why.
+void operator<<(const testing::internal::Secret&, int);
+
+namespace testing {
+
+// The Message class works like an ostream repeater.
 //
-//   Mutex mutex;
-//   ...
-//   MutexLock lock(&mutex);  // Acquires the mutex and releases it at the end
-//                            // of the current scope.
+// Typical usage:
 //
-// MutexBase implements behavior for both statically and dynamically
-// allocated mutexes.  Do not use MutexBase directly.  Instead, write
-// the following to define a static mutex:
+//   1. You stream a bunch of values to a Message object.
+//      It will remember the text in a stringstream.
+//   2. Then you stream the Message object to an ostream.
+//      This causes the text in the Message to be streamed
+//      to the ostream.
 //
-//   GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
+// For example;
 //
-// You can forward declare a static mutex like this:
+//   testing::Message foo;
+//   foo << 1 << " != " << 2;
+//   std::cout << foo;
 //
-//   GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
+// will print "1 != 2".
 //
-// To create a dynamic mutex, just define an object of type Mutex.
-class MutexBase {
- public:
-  // Acquires this mutex.
-  void Lock() {
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
-    owner_ = pthread_self();
-    has_owner_ = true;
-  }
-
-  // Releases this mutex.
-  void Unlock() {
-    // Since the lock is being released the owner_ field should no longer be
-    // considered valid. We don't protect writing to has_owner_ here, as it's
-    // the caller's responsibility to ensure that the current thread holds the
-    // mutex when this is called.
-    has_owner_ = false;
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
-  }
+// Message is not intended to be inherited from.  In particular, its
+// destructor is not virtual.
+//
+// Note that stringstream behaves differently in gcc and in MSVC.  You
+// can stream a NULL char pointer to it in the former, but not in the
+// latter (it causes an access violation if you do).  The Message
+// class hides this difference by treating a NULL char pointer as
+// "(null)".
+class GTEST_API_ Message {
+ private:
+  // The type of basic IO manipulators (endl, ends, and flush) for
+  // narrow streams.
+  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
 
-  // Does nothing if the current thread holds the mutex. Otherwise, crashes
-  // with high probability.
-  void AssertHeld() const {
-    GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self()))
-        << "The current thread is not holding the mutex @" << this;
+ public:
+  // Constructs an empty Message.
+  Message();
+
+  // Copy constructor.
+  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
+    *ss_ << msg.GetString();
   }
 
-  // A static mutex may be used before main() is entered.  It may even
-  // be used before the dynamic initialization stage.  Therefore we
-  // must be able to initialize a static mutex object at link time.
-  // This means MutexBase has to be a POD and its member variables
-  // have to be public.
- public:
-  pthread_mutex_t mutex_;  // The underlying pthread mutex.
-  // has_owner_ indicates whether the owner_ field below contains a valid thread
-  // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All
-  // accesses to the owner_ field should be protected by a check of this field.
-  // An alternative might be to memset() owner_ to all zeros, but there's no
-  // guarantee that a zero'd pthread_t is necessarily invalid or even different
-  // from pthread_self().
-  bool has_owner_;
-  pthread_t owner_;  // The thread holding the mutex.
-};
+  // Constructs a Message from a C-string.
+  explicit Message(const char* str) : ss_(new ::std::stringstream) {
+    *ss_ << str;
+  }
 
-// Forward-declares a static mutex.
-# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
-    extern ::testing::internal::MutexBase mutex
+  // Streams a non-pointer value to this object.
+  template <typename T>
+  inline Message& operator <<(const T& val) {
+    // Some libraries overload << for STL containers.  These
+    // overloads are defined in the global namespace instead of ::std.
+    //
+    // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
+    // overloads are visible in either the std namespace or the global
+    // namespace, but not other namespaces, including the testing
+    // namespace which Google Test's Message class is in.
+    //
+    // To allow STL containers (and other types that has a << operator
+    // defined in the global namespace) to be used in Google Test
+    // assertions, testing::Message must access the custom << operator
+    // from the global namespace.  With this using declaration,
+    // overloads of << defined in the global namespace and those
+    // visible via Koenig lookup are both exposed in this function.
+    using ::operator <<;
+    *ss_ << val;
+    return *this;
+  }
 
-// Defines and statically (i.e. at link time) initializes a static mutex.
-// The initialization list here does not explicitly initialize each field,
-// instead relying on default initialization for the unspecified fields. In
-// particular, the owner_ field (a pthread_t) is not explicitly initialized.
-// This allows initialization to work whether pthread_t is a scalar or struct.
-// The flag -Wmissing-field-initializers must not be specified for this to work.
-# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
-    ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, false }
+  // Streams a pointer value to this object.
+  //
+  // This function is an overload of the previous one.  When you
+  // stream a pointer to a Message, this definition will be used as it
+  // is more specialized.  (The C++ Standard, section
+  // [temp.func.order].)  If you stream a non-pointer, then the
+  // previous definition will be used.
+  //
+  // The reason for this overload is that streaming a NULL pointer to
+  // ostream is undefined behavior.  Depending on the compiler, you
+  // may get "0", "(nil)", "(null)", or an access violation.  To
+  // ensure consistent result across compilers, we always treat NULL
+  // as "(null)".
+  template <typename T>
+  inline Message& operator <<(T* const& pointer) {  // NOLINT
+    if (pointer == nullptr) {
+      *ss_ << "(null)";
+    } else {
+      *ss_ << pointer;
+    }
+    return *this;
+  }
 
-// The Mutex class can only be used for mutexes created at runtime. It
-// shares its API with MutexBase otherwise.
-class Mutex : public MutexBase {
- public:
-  Mutex() {
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
-    has_owner_ = false;
+  // Since the basic IO manipulators are overloaded for both narrow
+  // and wide streams, we have to provide this specialized definition
+  // of operator <<, even though its body is the same as the
+  // templatized version above.  Without this definition, streaming
+  // endl or other basic IO manipulators to Message will confuse the
+  // compiler.
+  Message& operator <<(BasicNarrowIoManip val) {
+    *ss_ << val;
+    return *this;
   }
-  ~Mutex() {
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_));
+
+  // Instead of 1/0, we want to see true/false for bool values.
+  Message& operator <<(bool b) {
+    return *this << (b ? "true" : "false");
   }
 
- private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
-};
+  // These two overloads allow streaming a wide C string to a Message
+  // using the UTF-8 encoding.
+  Message& operator <<(const wchar_t* wide_c_str);
+  Message& operator <<(wchar_t* wide_c_str);
 
-// We cannot name this class MutexLock as the ctor declaration would
-// conflict with a macro named MutexLock, which is defined on some
-// platforms.  Hence the typedef trick below.
-class GTestMutexLock {
- public:
-  explicit GTestMutexLock(MutexBase* mutex)
-      : mutex_(mutex) { mutex_->Lock(); }
+#if GTEST_HAS_STD_WSTRING
+  // Converts the given wide string to a narrow string using the UTF-8
+  // encoding, and streams the result to this Message object.
+  Message& operator <<(const ::std::wstring& wstr);
+#endif  // GTEST_HAS_STD_WSTRING
 
-  ~GTestMutexLock() { mutex_->Unlock(); }
+  // Gets the text streamed to this object so far as an std::string.
+  // Each '\0' character in the buffer is replaced with "\\0".
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  std::string GetString() const;
 
  private:
-  MutexBase* const mutex_;
+  // We'll hold the text streamed to this object here.
+  const std::unique_ptr< ::std::stringstream> ss_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
+  // We declare (but don't implement) this to prevent the compiler
+  // from implementing the assignment operator.
+  void operator=(const Message&);
 };
 
-typedef GTestMutexLock MutexLock;
-
-// Helpers for ThreadLocal.
+// Streams a Message to an ostream.
+inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
+  return os << sb.GetString();
+}
 
-// pthread_key_create() requires DeleteThreadLocalValue() to have
-// C-linkage.  Therefore it cannot be templatized to access
-// ThreadLocal<T>.  Hence the need for class
-// ThreadLocalValueHolderBase.
-class ThreadLocalValueHolderBase {
- public:
-  virtual ~ThreadLocalValueHolderBase() {}
-};
+namespace internal {
 
-// Called by pthread to delete thread-local data stored by
-// pthread_setspecific().
-extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
-  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
+// Converts a streamable value to an std::string.  A NULL pointer is
+// converted to "(null)".  When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+template <typename T>
+std::string StreamableToString(const T& streamable) {
+  return (Message() << streamable).GetString();
 }
 
-// Implements thread-local storage on pthreads-based systems.
+}  // namespace internal
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+// Copyright 2008, Google Inc.
+// All rights reserved.
 //
-//   // Thread 1
-//   ThreadLocal<int> tl(100);  // 100 is the default value for each thread.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
 //
-//   // Thread 2
-//   tl.set(150);  // Changes the value for thread 2 only.
-//   EXPECT_EQ(150, tl.get());
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
 //
-//   // Thread 1
-//   EXPECT_EQ(100, tl.get());  // In thread 1, tl has the original value.
-//   tl.set(200);
-//   EXPECT_EQ(200, tl.get());
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
-// The template type argument T must have a public copy constructor.
-// In addition, the default ThreadLocal constructor requires T to have
-// a public default constructor.
+// Google Test filepath utilities
 //
-// An object managed for a thread by a ThreadLocal instance is deleted
-// when the thread exits.  Or, if the ThreadLocal instance dies in
-// that thread, when the ThreadLocal dies.  It's the user's
-// responsibility to ensure that all other threads using a ThreadLocal
-// have exited when it dies, or the per-thread objects for those
-// threads will not be deleted.
+// This header file declares classes and functions used internally by
+// Google Test.  They are subject to change without notice.
 //
-// Google Test only uses global ThreadLocal objects.  That means they
-// will die after main() has returned.  Therefore, no per-thread
-// object managed by Google Test will be leaked as long as all threads
-// using Google Test have exited when main() returns.
-template <typename T>
-class ThreadLocal {
- public:
-  ThreadLocal() : key_(CreateKey()),
-                  default_() {}
-  explicit ThreadLocal(const T& value) : key_(CreateKey()),
-                                         default_(value) {}
-
-  ~ThreadLocal() {
-    // Destroys the managed object for the current thread, if any.
-    DeleteThreadLocalValue(pthread_getspecific(key_));
-
-    // Releases resources associated with the key.  This will *not*
-    // delete managed objects for other threads.
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
-  }
-
-  T* pointer() { return GetOrCreateValue(); }
-  const T* pointer() const { return GetOrCreateValue(); }
-  const T& get() const { return *pointer(); }
-  void set(const T& value) { *pointer() = value; }
+// This file is #included in gtest/internal/gtest-internal.h.
+// Do not include this header file separately!
 
- private:
-  // Holds a value of type T.
-  class ValueHolder : public ThreadLocalValueHolderBase {
-   public:
-    explicit ValueHolder(const T& value) : value_(value) {}
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-    T* pointer() { return &value_; }
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
 
-   private:
-    T value_;
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
-  };
-
-  static pthread_key_t CreateKey() {
-    pthread_key_t key;
-    // When a thread exits, DeleteThreadLocalValue() will be called on
-    // the object managed for that thread.
-    GTEST_CHECK_POSIX_SUCCESS_(
-        pthread_key_create(&key, &DeleteThreadLocalValue));
-    return key;
-  }
-
-  T* GetOrCreateValue() const {
-    ThreadLocalValueHolderBase* const holder =
-        static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
-    if (holder != NULL) {
-      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
-    }
-
-    ValueHolder* const new_holder = new ValueHolder(default_);
-    ThreadLocalValueHolderBase* const holder_base = new_holder;
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
-    return new_holder->pointer();
-  }
-
-  // A key pthreads uses for looking up per-thread values.
-  const pthread_key_t key_;
-  const T default_;  // The default value for each thread.
-
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
-};
-
-# define GTEST_IS_THREADSAFE 1
-
-#else  // GTEST_HAS_PTHREAD
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file declares the String class and functions used internally by
+// Google Test.  They are subject to change without notice. They should not used
+// by code external to Google Test.
+//
+// This header file is #included by gtest-internal.h.
+// It should not be #included by other files.
 
-// A dummy implementation of synchronization primitives (mutex, lock,
-// and thread-local variable).  Necessary for compiling Google Test where
-// mutex is not supported - using Google Test in multiple threads is not
-// supported on such platforms.
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-class Mutex {
- public:
-  Mutex() {}
-  void Lock() {}
-  void Unlock() {}
-  void AssertHeld() const {}
-};
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
 
-# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
-  extern ::testing::internal::Mutex mutex
+#ifdef __BORLANDC__
+// string.h is not guaranteed to provide strcpy on C++ Builder.
+# include <mem.h>
+#endif
 
-# define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex
+#include <string.h>
+#include <cstdint>
+#include <string>
 
-class GTestMutexLock {
- public:
-  explicit GTestMutexLock(Mutex*) {}  // NOLINT
-};
 
-typedef GTestMutexLock MutexLock;
+namespace testing {
+namespace internal {
 
-template <typename T>
-class ThreadLocal {
+// String - an abstract class holding static string utilities.
+class GTEST_API_ String {
  public:
-  ThreadLocal() : value_() {}
-  explicit ThreadLocal(const T& value) : value_(value) {}
-  T* pointer() { return &value_; }
-  const T* pointer() const { return &value_; }
-  const T& get() const { return value_; }
-  void set(const T& value) { value_ = value; }
- private:
-  T value_;
-};
+  // Static utility methods
 
-// The above synchronization primitives have dummy implementations.
-// Therefore Google Test is not thread-safe.
-# define GTEST_IS_THREADSAFE 0
+  // Clones a 0-terminated C string, allocating memory using new.  The
+  // caller is responsible for deleting the return value using
+  // delete[].  Returns the cloned string, or NULL if the input is
+  // NULL.
+  //
+  // This is different from strdup() in string.h, which allocates
+  // memory using malloc().
+  static const char* CloneCString(const char* c_str);
 
-#endif  // GTEST_HAS_PTHREAD
+#if GTEST_OS_WINDOWS_MOBILE
+  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
+  // able to pass strings to Win32 APIs on CE we need to convert them
+  // to 'Unicode', UTF-16.
 
-// Returns the number of threads running in the process, or 0 to indicate that
-// we cannot detect it.
-GTEST_API_ size_t GetThreadCount();
+  // Creates a UTF-16 wide string from the given ANSI string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the wide string, or NULL if the
+  // input is NULL.
+  //
+  // The wide string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static LPCWSTR AnsiToUtf16(const char* c_str);
 
-// Passing non-POD classes through ellipsis (...) crashes the ARM
-// compiler and generates a warning in Sun Studio.  The Nokia Symbian
-// and the IBM XL C/C++ compiler try to instantiate a copy constructor
-// for objects passed through ellipsis (...), failing for uncopyable
-// objects.  We define this to ensure that only POD is passed through
-// ellipsis on these systems.
-#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC)
-// We lose support for NULL detection where the compiler doesn't like
-// passing non-POD classes through ellipsis (...).
-# define GTEST_ELLIPSIS_NEEDS_POD_ 1
-#else
-# define GTEST_CAN_COMPARE_NULL 1
+  // Creates an ANSI string from the given wide string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the ANSI string, or NULL if the
+  // input is NULL.
+  //
+  // The returned string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
 #endif
 
-// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between
-// const T& and const T* in a function template.  These compilers
-// _can_ decide between class template specializations for T and T*,
-// so a tr1::type_traits-like is_pointer works.
-#if defined(__SYMBIAN32__) || defined(__IBMCPP__)
-# define GTEST_NEEDS_IS_POINTER_ 1
-#endif
+  // Compares two C strings.  Returns true if and only if they have the same
+  // content.
+  //
+  // Unlike strcmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CStringEquals(const char* lhs, const char* rhs);
 
-template <bool bool_value>
-struct bool_constant {
-  typedef bool_constant<bool_value> type;
-  static const bool value = bool_value;
-};
-template <bool bool_value> const bool bool_constant<bool_value>::value;
+  // Converts a wide C string to a String using the UTF-8 encoding.
+  // NULL will be converted to "(null)".  If an error occurred during
+  // the conversion, "(failed to convert from wide string)" is
+  // returned.
+  static std::string ShowWideCString(const wchar_t* wide_c_str);
 
-typedef bool_constant<false> false_type;
-typedef bool_constant<true> true_type;
+  // Compares two wide C strings.  Returns true if and only if they have the
+  // same content.
+  //
+  // Unlike wcscmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
 
-template <typename T>
-struct is_pointer : public false_type {};
+  // Compares two C strings, ignoring case.  Returns true if and only if
+  // they have the same content.
+  //
+  // Unlike strcasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CaseInsensitiveCStringEquals(const char* lhs,
+                                           const char* rhs);
 
-template <typename T>
-struct is_pointer<T*> : public true_type {};
+  // Compares two wide C strings, ignoring case.  Returns true if and only if
+  // they have the same content.
+  //
+  // Unlike wcscasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL wide C string,
+  // including the empty string.
+  // NB: The implementations on different platforms slightly differ.
+  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+  // environment variable. On GNU platform this method uses wcscasecmp
+  // which compares according to LC_CTYPE category of the current locale.
+  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+  // current locale.
+  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+                                               const wchar_t* rhs);
 
-template <typename Iterator>
-struct IteratorTraits {
-  typedef typename Iterator::value_type value_type;
-};
+  // Returns true if and only if the given string ends with the given suffix,
+  // ignoring case. Any string is considered to end with an empty suffix.
+  static bool EndsWithCaseInsensitive(
+      const std::string& str, const std::string& suffix);
 
-template <typename T>
-struct IteratorTraits<T*> {
-  typedef T value_type;
-};
+  // Formats an int value as "%02d".
+  static std::string FormatIntWidth2(int value);  // "%02d" for width == 2
 
-template <typename T>
-struct IteratorTraits<const T*> {
-  typedef T value_type;
-};
+  // Formats an int value to given width with leading zeros.
+  static std::string FormatIntWidthN(int value, int width);
 
-#if GTEST_OS_WINDOWS
-# define GTEST_PATH_SEP_ "\\"
-# define GTEST_HAS_ALT_PATH_SEP_ 1
-// The biggest signed integer type the compiler supports.
-typedef __int64 BiggestInt;
-#else
-# define GTEST_PATH_SEP_ "/"
-# define GTEST_HAS_ALT_PATH_SEP_ 0
-typedef long long BiggestInt;  // NOLINT
-#endif  // GTEST_OS_WINDOWS
+  // Formats an int value as "%X".
+  static std::string FormatHexInt(int value);
 
-// Utilities for char.
+  // Formats an int value as "%X".
+  static std::string FormatHexUInt32(uint32_t value);
 
-// isspace(int ch) and friends accept an unsigned char or EOF.  char
-// may be signed, depending on the compiler (or compiler flags).
-// Therefore we need to cast a char to unsigned char before calling
-// isspace(), etc.
+  // Formats a byte as "%02X".
+  static std::string FormatByte(unsigned char value);
 
-inline bool IsAlpha(char ch) {
-  return isalpha(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsAlNum(char ch) {
-  return isalnum(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsDigit(char ch) {
-  return isdigit(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsLower(char ch) {
-  return islower(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsSpace(char ch) {
-  return isspace(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsUpper(char ch) {
-  return isupper(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsXDigit(char ch) {
-  return isxdigit(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsXDigit(wchar_t ch) {
-  const unsigned char low_byte = static_cast<unsigned char>(ch);
-  return ch == low_byte && isxdigit(low_byte) != 0;
-}
+ private:
+  String();  // Not meant to be instantiated.
+};  // class String
 
-inline char ToLower(char ch) {
-  return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
-}
-inline char ToUpper(char ch) {
-  return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
-}
+// Gets the content of the stringstream's buffer as an std::string.  Each '\0'
+// character in the buffer is replaced with "\\0".
+GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
 
-// The testing::internal::posix namespace holds wrappers for common
-// POSIX functions.  These wrappers hide the differences between
-// Windows/MSVC and POSIX systems.  Since some compilers define these
-// standard functions as macros, the wrapper cannot have the same name
-// as the wrapped function.
+}  // namespace internal
+}  // namespace testing
 
-namespace posix {
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
 
-// Functions with a different name on Windows.
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
 
-#if GTEST_OS_WINDOWS
+namespace testing {
+namespace internal {
 
-typedef struct _stat StatStruct;
-
-# ifdef __BORLANDC__
-inline int IsATTY(int fd) { return isatty(fd); }
-inline int StrCaseCmp(const char* s1, const char* s2) {
-  return stricmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return strdup(src); }
-# else  // !__BORLANDC__
-#  if GTEST_OS_WINDOWS_MOBILE
-inline int IsATTY(int /* fd */) { return 0; }
-#  else
-inline int IsATTY(int fd) { return _isatty(fd); }
-#  endif  // GTEST_OS_WINDOWS_MOBILE
-inline int StrCaseCmp(const char* s1, const char* s2) {
-  return _stricmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return _strdup(src); }
-# endif  // __BORLANDC__
-
-# if GTEST_OS_WINDOWS_MOBILE
-inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
-// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
-// time and thus not defined there.
-# else
-inline int FileNo(FILE* file) { return _fileno(file); }
-inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
-inline int RmDir(const char* dir) { return _rmdir(dir); }
-inline bool IsDir(const StatStruct& st) {
-  return (_S_IFDIR & st.st_mode) != 0;
-}
-# endif  // GTEST_OS_WINDOWS_MOBILE
-
-#else
-
-typedef struct stat StatStruct;
+// FilePath - a class for file and directory pathname manipulation which
+// handles platform-specific conventions (like the pathname separator).
+// Used for helper functions for naming files in a directory for xml output.
+// Except for Set methods, all methods are const or static, which provides an
+// "immutable value object" -- useful for peace of mind.
+// A FilePath with a value ending in a path separator ("like/this/") represents
+// a directory, otherwise it is assumed to represent a file. In either case,
+// it may or may not represent an actual file or directory in the file system.
+// Names are NOT checked for syntax correctness -- no checking for illegal
+// characters, malformed paths, etc.
 
-inline int FileNo(FILE* file) { return fileno(file); }
-inline int IsATTY(int fd) { return isatty(fd); }
-inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
-inline int StrCaseCmp(const char* s1, const char* s2) {
-  return strcasecmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return strdup(src); }
-inline int RmDir(const char* dir) { return rmdir(dir); }
-inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
+class GTEST_API_ FilePath {
+ public:
+  FilePath() : pathname_("") { }
+  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }
 
-#endif  // GTEST_OS_WINDOWS
+  explicit FilePath(const std::string& pathname) : pathname_(pathname) {
+    Normalize();
+  }
 
-// Functions deprecated by MSVC 8.0.
+  FilePath& operator=(const FilePath& rhs) {
+    Set(rhs);
+    return *this;
+  }
 
-#ifdef _MSC_VER
-// Temporarily disable warning 4996 (deprecated function).
-# pragma warning(push)
-# pragma warning(disable:4996)
-#endif
+  void Set(const FilePath& rhs) {
+    pathname_ = rhs.pathname_;
+  }
 
-inline const char* StrNCpy(char* dest, const char* src, size_t n) {
-  return strncpy(dest, src, n);
-}
+  const std::string& string() const { return pathname_; }
+  const char* c_str() const { return pathname_.c_str(); }
 
-// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
-// StrError() aren't needed on Windows CE at this time and thus not
-// defined there.
+  // Returns the current working directory, or "" if unsuccessful.
+  static FilePath GetCurrentDir();
 
-#if !GTEST_OS_WINDOWS_MOBILE
-inline int ChDir(const char* dir) { return chdir(dir); }
-#endif
-inline FILE* FOpen(const char* path, const char* mode) {
-  return fopen(path, mode);
-}
-#if !GTEST_OS_WINDOWS_MOBILE
-inline FILE *FReopen(const char* path, const char* mode, FILE* stream) {
-  return freopen(path, mode, stream);
-}
-inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
-#endif
-inline int FClose(FILE* fp) { return fclose(fp); }
-#if !GTEST_OS_WINDOWS_MOBILE
-inline int Read(int fd, void* buf, unsigned int count) {
-  return static_cast<int>(read(fd, buf, count));
-}
-inline int Write(int fd, const void* buf, unsigned int count) {
-  return static_cast<int>(write(fd, buf, count));
-}
-inline int Close(int fd) { return close(fd); }
-inline const char* StrError(int errnum) { return strerror(errnum); }
-#endif
-inline const char* GetEnv(const char* name) {
-#if GTEST_OS_WINDOWS_MOBILE
-  // We are on Windows CE, which has no environment variables.
-  return NULL;
-#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
-  // Environment variables which we programmatically clear will be set to the
-  // empty string rather than unset (NULL).  Handle that case.
-  const char* const env = getenv(name);
-  return (env != NULL && env[0] != '\0') ? env : NULL;
-#else
-  return getenv(name);
-#endif
-}
+  // Given directory = "dir", base_name = "test", number = 0,
+  // extension = "xml", returns "dir/test.xml". If number is greater
+  // than zero (e.g., 12), returns "dir/test_12.xml".
+  // On Windows platform, uses \ as the separator rather than /.
+  static FilePath MakeFileName(const FilePath& directory,
+                               const FilePath& base_name,
+                               int number,
+                               const char* extension);
 
-#ifdef _MSC_VER
-# pragma warning(pop)  // Restores the warning state.
-#endif
+  // Given directory = "dir", relative_path = "test.xml",
+  // returns "dir/test.xml".
+  // On Windows, uses \ as the separator rather than /.
+  static FilePath ConcatPaths(const FilePath& directory,
+                              const FilePath& relative_path);
 
-#if GTEST_OS_WINDOWS_MOBILE
-// Windows CE has no C library. The abort() function is used in
-// several places in Google Test. This implementation provides a reasonable
-// imitation of standard behaviour.
-void Abort();
-#else
-inline void Abort() { abort(); }
-#endif  // GTEST_OS_WINDOWS_MOBILE
+  // Returns a pathname for a file that does not currently exist. The pathname
+  // will be directory/base_name.extension or
+  // directory/base_name_<number>.extension if directory/base_name.extension
+  // already exists. The number will be incremented until a pathname is found
+  // that does not already exist.
+  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+  // There could be a race condition if two or more processes are calling this
+  // function at the same time -- they could both pick the same filename.
+  static FilePath GenerateUniqueFileName(const FilePath& directory,
+                                         const FilePath& base_name,
+                                         const char* extension);
 
-}  // namespace posix
+  // Returns true if and only if the path is "".
+  bool IsEmpty() const { return pathname_.empty(); }
 
-// MSVC "deprecates" snprintf and issues warnings wherever it is used.  In
-// order to avoid these warnings, we need to use _snprintf or _snprintf_s on
-// MSVC-based platforms.  We map the GTEST_SNPRINTF_ macro to the appropriate
-// function in order to achieve that.  We use macro definition here because
-// snprintf is a variadic function.
-#if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
-// MSVC 2005 and above support variadic macros.
-# define GTEST_SNPRINTF_(buffer, size, format, ...) \
-     _snprintf_s(buffer, size, size, format, __VA_ARGS__)
-#elif defined(_MSC_VER)
-// Windows CE does not define _snprintf_s and MSVC prior to 2005 doesn't
-// complain about _snprintf.
-# define GTEST_SNPRINTF_ _snprintf
-#else
-# define GTEST_SNPRINTF_ snprintf
-#endif
+  // If input name has a trailing separator character, removes it and returns
+  // the name, otherwise return the name string unmodified.
+  // On Windows platform, uses \ as the separator, other platforms use /.
+  FilePath RemoveTrailingPathSeparator() const;
 
-// The maximum number a BiggestInt can represent.  This definition
-// works no matter BiggestInt is represented in one's complement or
-// two's complement.
-//
-// We cannot rely on numeric_limits in STL, as __int64 and long long
-// are not part of standard C++ and numeric_limits doesn't need to be
-// defined for them.
-const BiggestInt kMaxBiggestInt =
-    ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1));
+  // Returns a copy of the FilePath with the directory part removed.
+  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
+  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
+  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
+  // returns an empty FilePath ("").
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveDirectoryName() const;
 
-// This template class serves as a compile-time function from size to
-// type.  It maps a size in bytes to a primitive type with that
-// size. e.g.
-//
-//   TypeWithSize<4>::UInt
-//
-// is typedef-ed to be unsigned int (unsigned integer made up of 4
-// bytes).
-//
-// Such functionality should belong to STL, but I cannot find it
-// there.
-//
-// Google Test uses this class in the implementation of floating-point
-// comparison.
-//
-// For now it only handles UInt (unsigned int) as that's all Google Test
-// needs.  Other types can be easily added in the future if need
-// arises.
-template <size_t size>
-class TypeWithSize {
- public:
-  // This prevents the user from using TypeWithSize<N> with incorrect
-  // values of N.
-  typedef void UInt;
-};
+  // RemoveFileName returns the directory path with the filename removed.
+  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveFileName() const;
 
-// The specialization for size 4.
-template <>
-class TypeWithSize<4> {
- public:
-  // unsigned int has size 4 in both gcc and MSVC.
-  //
-  // As base/basictypes.h doesn't compile on Windows, we cannot use
-  // uint32, uint64, and etc here.
-  typedef int Int;
-  typedef unsigned int UInt;
-};
+  // Returns a copy of the FilePath with the case-insensitive extension removed.
+  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+  // FilePath("dir/file"). If a case-insensitive extension is not
+  // found, returns a copy of the original FilePath.
+  FilePath RemoveExtension(const char* extension) const;
 
-// The specialization for size 8.
-template <>
-class TypeWithSize<8> {
- public:
-#if GTEST_OS_WINDOWS
-  typedef __int64 Int;
-  typedef unsigned __int64 UInt;
-#else
-  typedef long long Int;  // NOLINT
-  typedef unsigned long long UInt;  // NOLINT
-#endif  // GTEST_OS_WINDOWS
-};
+  // Creates directories so that path exists. Returns true if successful or if
+  // the directories already exist; returns false if unable to create
+  // directories for any reason. Will also return false if the FilePath does
+  // not represent a directory (that is, it doesn't end with a path separator).
+  bool CreateDirectoriesRecursively() const;
 
-// Integer types of known sizes.
-typedef TypeWithSize<4>::Int Int32;
-typedef TypeWithSize<4>::UInt UInt32;
-typedef TypeWithSize<8>::Int Int64;
-typedef TypeWithSize<8>::UInt UInt64;
-typedef TypeWithSize<8>::Int TimeInMillis;  // Represents time in milliseconds.
+  // Create the directory so that path exists. Returns true if successful or
+  // if the directory already exists; returns false if unable to create the
+  // directory for any reason, including if the parent directory does not
+  // exist. Not named "CreateDirectory" because that's a macro on Windows.
+  bool CreateFolder() const;
 
-// Utilities for command line flags and environment variables.
+  // Returns true if FilePath describes something in the file-system,
+  // either a file, directory, or whatever, and that something exists.
+  bool FileOrDirectoryExists() const;
 
-// Macro for referencing flags.
-#define GTEST_FLAG(name) FLAGS_gtest_##name
+  // Returns true if pathname describes a directory in the file-system
+  // that exists.
+  bool DirectoryExists() const;
 
-// Macros for declaring flags.
-#define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
-#define GTEST_DECLARE_int32_(name) \
-    GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name)
-#define GTEST_DECLARE_string_(name) \
-    GTEST_API_ extern ::std::string GTEST_FLAG(name)
+  // Returns true if FilePath ends with a path separator, which indicates that
+  // it is intended to represent a directory. Returns false otherwise.
+  // This does NOT check that a directory (or file) actually exists.
+  bool IsDirectory() const;
 
-// Macros for defining flags.
-#define GTEST_DEFINE_bool_(name, default_val, doc) \
-    GTEST_API_ bool GTEST_FLAG(name) = (default_val)
-#define GTEST_DEFINE_int32_(name, default_val, doc) \
-    GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val)
-#define GTEST_DEFINE_string_(name, default_val, doc) \
-    GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val)
+  // Returns true if pathname describes a root directory. (Windows has one
+  // root directory per disk drive.)
+  bool IsRootDirectory() const;
 
-// Thread annotations
-#define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
-#define GTEST_LOCK_EXCLUDED_(locks)
+  // Returns true if pathname describes an absolute path.
+  bool IsAbsolutePath() const;
 
-// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
-// to *value and returns true; otherwise leaves *value unchanged and returns
-// false.
-// TODO(chandlerc): Find a better way to refactor flag and environment parsing
-// out of both gtest-port.cc and gtest.cc to avoid exporting this utility
-// function.
-bool ParseInt32(const Message& src_text, const char* str, Int32* value);
-
-// Parses a bool/Int32/string from the environment variable
-// corresponding to the given Google Test flag.
-bool BoolFromGTestEnv(const char* flag, bool default_val);
-GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val);
-const char* StringFromGTestEnv(const char* flag, const char* default_val);
+ private:
+  // Replaces multiple consecutive separators with a single separator.
+  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+  // redundancies that might be in a pathname involving "." or "..".
+  //
+  // A pathname with multiple consecutive separators may occur either through
+  // user error or as a result of some scripts or APIs that generate a pathname
+  // with a trailing separator. On other platforms the same API or script
+  // may NOT generate a pathname with a trailing "/". Then elsewhere that
+  // pathname may have another "/" and pathname components added to it,
+  // without checking for the separator already being there.
+  // The script language and operating system may allow paths like "foo//bar"
+  // but some of the functions in FilePath will not handle that correctly. In
+  // particular, RemoveTrailingPathSeparator() only removes one separator, and
+  // it is called in CreateDirectoriesRecursively() assuming that it will change
+  // a pathname from directory syntax (trailing separator) to filename syntax.
+  //
+  // On Windows this method also replaces the alternate path separator '/' with
+  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
+  // "bar\\foo".
 
-}  // namespace internal
-}  // namespace testing
+  void Normalize();
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+  // Returns a pointer to the last occurrence of a valid path separator in
+  // the FilePath. On Windows, for example, both '/' and '\' are valid path
+  // separators. Returns NULL if no path separator was found.
+  const char* FindLastPathSeparator() const;
 
-#if GTEST_OS_LINUX
-# include <stdlib.h>
-# include <sys/types.h>
-# include <sys/wait.h>
-# include <unistd.h>
-#endif  // GTEST_OS_LINUX
+  std::string pathname_;
+};  // class FilePath
 
-#if GTEST_HAS_EXCEPTIONS
-# include <stdexcept>
-#endif
+}  // namespace internal
+}  // namespace testing
 
-#include <ctype.h>
-#include <float.h>
-#include <string.h>
-#include <iomanip>
-#include <limits>
-#include <set>
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
-// Copyright 2005, Google Inc.
-// All rights reserved.
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -3103,5024 +3290,1731 @@ const char* StringFromGTestEnv(const char* flag, const char* default_val);
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file defines the Message class.
-//
-// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
-// leave some internal implementation details in this header file.
-// They are clearly marked by comments like this:
-//
-//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-//
-// Such code is NOT meant to be used by a user directly, and is subject
-// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
-// program!
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
-#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
 
-#include <limits>
+// Type utilities needed for implementing typed and type-parameterized
+// tests.
 
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-// Ensures that there is at least one operator<< in the global namespace.
-// See Message& operator<<(...) below for why.
-void operator<<(const testing::internal::Secret&, int);
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
 
-namespace testing {
 
-// The Message class works like an ostream repeater.
-//
-// Typical usage:
-//
-//   1. You stream a bunch of values to a Message object.
-//      It will remember the text in a stringstream.
-//   2. Then you stream the Message object to an ostream.
-//      This causes the text in the Message to be streamed
-//      to the ostream.
-//
-// For example;
-//
-//   testing::Message foo;
-//   foo << 1 << " != " << 2;
-//   std::cout << foo;
-//
-// will print "1 != 2".
-//
-// Message is not intended to be inherited from.  In particular, its
-// destructor is not virtual.
-//
-// Note that stringstream behaves differently in gcc and in MSVC.  You
-// can stream a NULL char pointer to it in the former, but not in the
-// latter (it causes an access violation if you do).  The Message
-// class hides this difference by treating a NULL char pointer as
-// "(null)".
-class GTEST_API_ Message {
- private:
-  // The type of basic IO manipulators (endl, ends, and flush) for
-  // narrow streams.
-  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
+// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
+// libstdc++ (which is where cxxabi.h comes from).
+# if GTEST_HAS_CXXABI_H_
+#  include <cxxabi.h>
+# elif defined(__HP_aCC)
+#  include <acxx_demangle.h>
+# endif  // GTEST_HASH_CXXABI_H_
 
- public:
-  // Constructs an empty Message.
-  Message();
+namespace testing {
+namespace internal {
 
-  // Copy constructor.
-  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
-    *ss_ << msg.GetString();
+// Canonicalizes a given name with respect to the Standard C++ Library.
+// This handles removing the inline namespace within `std` that is
+// used by various standard libraries (e.g., `std::__1`).  Names outside
+// of namespace std are returned unmodified.
+inline std::string CanonicalizeForStdLibVersioning(std::string s) {
+  static const char prefix[] = "std::__";
+  if (s.compare(0, strlen(prefix), prefix) == 0) {
+    std::string::size_type end = s.find("::", strlen(prefix));
+    if (end != s.npos) {
+      // Erase everything between the initial `std` and the second `::`.
+      s.erase(strlen("std"), end - strlen("std"));
+    }
   }
+  return s;
+}
 
-  // Constructs a Message from a C-string.
-  explicit Message(const char* str) : ss_(new ::std::stringstream) {
-    *ss_ << str;
-  }
+#if GTEST_HAS_RTTI
+// GetTypeName(const std::type_info&) returns a human-readable name of type T.
+inline std::string GetTypeName(const std::type_info& type) {
+  const char* const name = type.name();
+#if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
+  int status = 0;
+  // gcc's implementation of typeid(T).name() mangles the type name,
+  // so we have to demangle it.
+#if GTEST_HAS_CXXABI_H_
+  using abi::__cxa_demangle;
+#endif  // GTEST_HAS_CXXABI_H_
+  char* const readable_name = __cxa_demangle(name, nullptr, nullptr, &status);
+  const std::string name_str(status == 0 ? readable_name : name);
+  free(readable_name);
+  return CanonicalizeForStdLibVersioning(name_str);
+#else
+  return name;
+#endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC
+}
+#endif  // GTEST_HAS_RTTI
 
-#if GTEST_OS_SYMBIAN
-  // Streams a value (either a pointer or not) to this object.
-  template <typename T>
-  inline Message& operator <<(const T& value) {
-    StreamHelper(typename internal::is_pointer<T>::type(), value);
-    return *this;
-  }
+// GetTypeName<T>() returns a human-readable name of type T if and only if
+// RTTI is enabled, otherwise it returns a dummy type name.
+// NB: This function is also used in Google Mock, so don't move it inside of
+// the typed-test-only section below.
+template <typename T>
+std::string GetTypeName() {
+#if GTEST_HAS_RTTI
+  return GetTypeName(typeid(T));
 #else
-  // Streams a non-pointer value to this object.
-  template <typename T>
-  inline Message& operator <<(const T& val) {
-    // Some libraries overload << for STL containers.  These
-    // overloads are defined in the global namespace instead of ::std.
-    //
-    // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
-    // overloads are visible in either the std namespace or the global
-    // namespace, but not other namespaces, including the testing
-    // namespace which Google Test's Message class is in.
-    //
-    // To allow STL containers (and other types that has a << operator
-    // defined in the global namespace) to be used in Google Test
-    // assertions, testing::Message must access the custom << operator
-    // from the global namespace.  With this using declaration,
-    // overloads of << defined in the global namespace and those
-    // visible via Koenig lookup are both exposed in this function.
-    using ::operator <<;
-    *ss_ << val;
-    return *this;
-  }
+  return "<type>";
+#endif  // GTEST_HAS_RTTI
+}
 
-  // Streams a pointer value to this object.
-  //
-  // This function is an overload of the previous one.  When you
-  // stream a pointer to a Message, this definition will be used as it
-  // is more specialized.  (The C++ Standard, section
-  // [temp.func.order].)  If you stream a non-pointer, then the
-  // previous definition will be used.
-  //
-  // The reason for this overload is that streaming a NULL pointer to
-  // ostream is undefined behavior.  Depending on the compiler, you
-  // may get "0", "(nil)", "(null)", or an access violation.  To
-  // ensure consistent result across compilers, we always treat NULL
-  // as "(null)".
+// A unique type indicating an empty node
+struct None {};
+
+# define GTEST_TEMPLATE_ template <typename T> class
+
+// The template "selector" struct TemplateSel<Tmpl> is used to
+// represent Tmpl, which must be a class template with one type
+// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
+// as the type Tmpl<T>.  This allows us to actually instantiate the
+// template "selected" by TemplateSel<Tmpl>.
+//
+// This trick is necessary for simulating typedef for class templates,
+// which C++ doesn't support directly.
+template <GTEST_TEMPLATE_ Tmpl>
+struct TemplateSel {
   template <typename T>
-  inline Message& operator <<(T* const& pointer) {  // NOLINT
-    if (pointer == NULL) {
-      *ss_ << "(null)";
-    } else {
-      *ss_ << pointer;
-    }
-    return *this;
-  }
-#endif  // GTEST_OS_SYMBIAN
+  struct Bind {
+    typedef Tmpl<T> type;
+  };
+};
 
-  // Since the basic IO manipulators are overloaded for both narrow
-  // and wide streams, we have to provide this specialized definition
-  // of operator <<, even though its body is the same as the
-  // templatized version above.  Without this definition, streaming
-  // endl or other basic IO manipulators to Message will confuse the
-  // compiler.
-  Message& operator <<(BasicNarrowIoManip val) {
-    *ss_ << val;
-    return *this;
-  }
+# define GTEST_BIND_(TmplSel, T) \
+  TmplSel::template Bind<T>::type
 
-  // Instead of 1/0, we want to see true/false for bool values.
-  Message& operator <<(bool b) {
-    return *this << (b ? "true" : "false");
-  }
+template <GTEST_TEMPLATE_ Head_, GTEST_TEMPLATE_... Tail_>
+struct Templates {
+  using Head = TemplateSel<Head_>;
+  using Tail = Templates<Tail_...>;
+};
 
-  // These two overloads allow streaming a wide C string to a Message
-  // using the UTF-8 encoding.
-  Message& operator <<(const wchar_t* wide_c_str);
-  Message& operator <<(wchar_t* wide_c_str);
+template <GTEST_TEMPLATE_ Head_>
+struct Templates<Head_> {
+  using Head = TemplateSel<Head_>;
+  using Tail = None;
+};
 
-#if GTEST_HAS_STD_WSTRING
-  // Converts the given wide string to a narrow string using the UTF-8
-  // encoding, and streams the result to this Message object.
-  Message& operator <<(const ::std::wstring& wstr);
-#endif  // GTEST_HAS_STD_WSTRING
+// Tuple-like type lists
+template <typename Head_, typename... Tail_>
+struct Types {
+  using Head = Head_;
+  using Tail = Types<Tail_...>;
+};
 
-#if GTEST_HAS_GLOBAL_WSTRING
-  // Converts the given wide string to a narrow string using the UTF-8
-  // encoding, and streams the result to this Message object.
-  Message& operator <<(const ::wstring& wstr);
-#endif  // GTEST_HAS_GLOBAL_WSTRING
+template <typename Head_>
+struct Types<Head_> {
+  using Head = Head_;
+  using Tail = None;
+};
 
-  // Gets the text streamed to this object so far as an std::string.
-  // Each '\0' character in the buffer is replaced with "\\0".
-  //
-  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-  std::string GetString() const;
+// Helper metafunctions to tell apart a single type from types
+// generated by ::testing::Types
+template <typename... Ts>
+struct ProxyTypeList {
+  using type = Types<Ts...>;
+};
 
+template <typename>
+struct is_proxy_type_list : std::false_type {};
+
+template <typename... Ts>
+struct is_proxy_type_list<ProxyTypeList<Ts...>> : std::true_type {};
+
+// Generator which conditionally creates type lists.
+// It recognizes if a requested type list should be created
+// and prevents creating a new type list nested within another one.
+template <typename T>
+struct GenerateTypeList {
  private:
+  using proxy = typename std::conditional<is_proxy_type_list<T>::value, T,
+                                          ProxyTypeList<T>>::type;
 
-#if GTEST_OS_SYMBIAN
-  // These are needed as the Nokia Symbian Compiler cannot decide between
-  // const T& and const T* in a function template. The Nokia compiler _can_
-  // decide between class template specializations for T and T*, so a
-  // tr1::type_traits-like is_pointer works, and we can overload on that.
-  template <typename T>
-  inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) {
-    if (pointer == NULL) {
-      *ss_ << "(null)";
-    } else {
-      *ss_ << pointer;
-    }
-  }
-  template <typename T>
-  inline void StreamHelper(internal::false_type /*is_pointer*/,
-                           const T& value) {
-    // See the comments in Message& operator <<(const T&) above for why
-    // we need this using statement.
-    using ::operator <<;
-    *ss_ << value;
-  }
-#endif  // GTEST_OS_SYMBIAN
-
-  // We'll hold the text streamed to this object here.
-  const internal::scoped_ptr< ::std::stringstream> ss_;
-
-  // We declare (but don't implement) this to prevent the compiler
-  // from implementing the assignment operator.
-  void operator=(const Message&);
+ public:
+  using type = typename proxy::type;
 };
 
-// Streams a Message to an ostream.
-inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
-  return os << sb.GetString();
-}
-
-namespace internal {
+}  // namespace internal
 
-// Converts a streamable value to an std::string.  A NULL pointer is
-// converted to "(null)".  When the input value is a ::string,
-// ::std::string, ::wstring, or ::std::wstring object, each NUL
-// character in it is replaced with "\\0".
-template <typename T>
-std::string StreamableToString(const T& streamable) {
-  return (Message() << streamable).GetString();
-}
+template <typename... Ts>
+using Types = internal::ProxyTypeList<Ts...>;
 
-}  // namespace internal
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+
+// Due to C++ preprocessor weirdness, we need double indirection to
+// concatenate two tokens when one of them is __LINE__.  Writing
 //
-// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+//   foo ## __LINE__
 //
-// The Google C++ Testing Framework (Google Test)
+// will result in the token foo__LINE__, instead of foo followed by
+// the current line number.  For more details, see
+// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
+#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
+#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar
+
+// Stringifies its argument.
+// Work around a bug in visual studio which doesn't accept code like this:
 //
-// This header file declares the String class and functions used internally by
-// Google Test.  They are subject to change without notice. They should not used
-// by code external to Google Test.
+//   #define GTEST_STRINGIFY_(name) #name
+//   #define MACRO(a, b, c) ... GTEST_STRINGIFY_(a) ...
+//   MACRO(, x, y)
 //
-// This header file is #included by <gtest/internal/gtest-internal.h>.
-// It should not be #included by other files.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
-
-#ifdef __BORLANDC__
-// string.h is not guaranteed to provide strcpy on C++ Builder.
-# include <mem.h>
-#endif
-
-#include <string.h>
-#include <string>
+// Complaining about the argument to GTEST_STRINGIFY_ being empty.
+// This is allowed by the spec.
+#define GTEST_STRINGIFY_HELPER_(name, ...) #name
+#define GTEST_STRINGIFY_(...) GTEST_STRINGIFY_HELPER_(__VA_ARGS__, )
 
+namespace proto2 {
+class MessageLite;
+}
 
 namespace testing {
-namespace internal {
-
-// String - an abstract class holding static string utilities.
-class GTEST_API_ String {
- public:
-  // Static utility methods
-
-  // Clones a 0-terminated C string, allocating memory using new.  The
-  // caller is responsible for deleting the return value using
-  // delete[].  Returns the cloned string, or NULL if the input is
-  // NULL.
-  //
-  // This is different from strdup() in string.h, which allocates
-  // memory using malloc().
-  static const char* CloneCString(const char* c_str);
-
-#if GTEST_OS_WINDOWS_MOBILE
-  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
-  // able to pass strings to Win32 APIs on CE we need to convert them
-  // to 'Unicode', UTF-16.
 
-  // Creates a UTF-16 wide string from the given ANSI string, allocating
-  // memory using new. The caller is responsible for deleting the return
-  // value using delete[]. Returns the wide string, or NULL if the
-  // input is NULL.
-  //
-  // The wide string is created using the ANSI codepage (CP_ACP) to
-  // match the behaviour of the ANSI versions of Win32 calls and the
-  // C runtime.
-  static LPCWSTR AnsiToUtf16(const char* c_str);
+// Forward declarations.
 
-  // Creates an ANSI string from the given wide string, allocating
-  // memory using new. The caller is responsible for deleting the return
-  // value using delete[]. Returns the ANSI string, or NULL if the
-  // input is NULL.
-  //
-  // The returned string is created using the ANSI codepage (CP_ACP) to
-  // match the behaviour of the ANSI versions of Win32 calls and the
-  // C runtime.
-  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
-#endif
+class AssertionResult;                 // Result of an assertion.
+class Message;                         // Represents a failure message.
+class Test;                            // Represents a test.
+class TestInfo;                        // Information about a test.
+class TestPartResult;                  // Result of a test part.
+class UnitTest;                        // A collection of test suites.
 
-  // Compares two C strings.  Returns true iff they have the same content.
-  //
-  // Unlike strcmp(), this function can handle NULL argument(s).  A
-  // NULL C string is considered different to any non-NULL C string,
-  // including the empty string.
-  static bool CStringEquals(const char* lhs, const char* rhs);
+template <typename T>
+::std::string PrintToString(const T& value);
 
-  // Converts a wide C string to a String using the UTF-8 encoding.
-  // NULL will be converted to "(null)".  If an error occurred during
-  // the conversion, "(failed to convert from wide string)" is
-  // returned.
-  static std::string ShowWideCString(const wchar_t* wide_c_str);
+namespace internal {
 
-  // Compares two wide C strings.  Returns true iff they have the same
-  // content.
-  //
-  // Unlike wcscmp(), this function can handle NULL argument(s).  A
-  // NULL C string is considered different to any non-NULL C string,
-  // including the empty string.
-  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
+struct TraceInfo;                      // Information about a trace point.
+class TestInfoImpl;                    // Opaque implementation of TestInfo
+class UnitTestImpl;                    // Opaque implementation of UnitTest
 
-  // Compares two C strings, ignoring case.  Returns true iff they
-  // have the same content.
-  //
-  // Unlike strcasecmp(), this function can handle NULL argument(s).
-  // A NULL C string is considered different to any non-NULL C string,
-  // including the empty string.
-  static bool CaseInsensitiveCStringEquals(const char* lhs,
-                                           const char* rhs);
+// The text used in failure messages to indicate the start of the
+// stack trace.
+GTEST_API_ extern const char kStackTraceMarker[];
 
-  // Compares two wide C strings, ignoring case.  Returns true iff they
-  // have the same content.
-  //
-  // Unlike wcscasecmp(), this function can handle NULL argument(s).
-  // A NULL C string is considered different to any non-NULL wide C string,
-  // including the empty string.
-  // NB: The implementations on different platforms slightly differ.
-  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
-  // environment variable. On GNU platform this method uses wcscasecmp
-  // which compares according to LC_CTYPE category of the current locale.
-  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
-  // current locale.
-  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
-                                               const wchar_t* rhs);
+// An IgnoredValue object can be implicitly constructed from ANY value.
+class IgnoredValue {
+  struct Sink {};
+ public:
+  // This constructor template allows any value to be implicitly
+  // converted to IgnoredValue.  The object has no data member and
+  // doesn't try to remember anything about the argument.  We
+  // deliberately omit the 'explicit' keyword in order to allow the
+  // conversion to be implicit.
+  // Disable the conversion if T already has a magical conversion operator.
+  // Otherwise we get ambiguity.
+  template <typename T,
+            typename std::enable_if<!std::is_convertible<T, Sink>::value,
+                                    int>::type = 0>
+  IgnoredValue(const T& /* ignored */) {}  // NOLINT(runtime/explicit)
+};
 
-  // Returns true iff the given string ends with the given suffix, ignoring
-  // case. Any string is considered to end with an empty suffix.
-  static bool EndsWithCaseInsensitive(
-      const std::string& str, const std::string& suffix);
+// Appends the user-supplied message to the Google-Test-generated message.
+GTEST_API_ std::string AppendUserMessage(
+    const std::string& gtest_msg, const Message& user_msg);
 
-  // Formats an int value as "%02d".
-  static std::string FormatIntWidth2(int value);  // "%02d" for width == 2
+#if GTEST_HAS_EXCEPTIONS
 
-  // Formats an int value as "%X".
-  static std::string FormatHexInt(int value);
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4275 \
+/* an exported class was derived from a class that was not exported */)
 
-  // Formats a byte as "%02X".
-  static std::string FormatByte(unsigned char value);
+// This exception is thrown by (and only by) a failed Google Test
+// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
+// are enabled).  We derive it from std::runtime_error, which is for
+// errors presumably detectable only at run time.  Since
+// std::runtime_error inherits from std::exception, many testing
+// frameworks know how to extract and print the message inside it.
+class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
+ public:
+  explicit GoogleTestFailureException(const TestPartResult& failure);
+};
 
- private:
-  String();  // Not meant to be instantiated.
-};  // class String
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4275
 
-// Gets the content of the stringstream's buffer as an std::string.  Each '\0'
-// character in the buffer is replaced with "\\0".
-GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
+#endif  // GTEST_HAS_EXCEPTIONS
 
-}  // namespace internal
-}  // namespace testing
+namespace edit_distance {
+// Returns the optimal edits to go from 'left' to 'right'.
+// All edits cost the same, with replace having lower priority than
+// add/remove.
+// Simple implementation of the Wagner-Fischer algorithm.
+// See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
+enum EditType { kMatch, kAdd, kRemove, kReplace };
+GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
+    const std::vector<size_t>& left, const std::vector<size_t>& right);
+
+// Same as above, but the input is represented as strings.
+GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
+    const std::vector<std::string>& left,
+    const std::vector<std::string>& right);
+
+// Create a diff of the input strings in Unified diff format.
+GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,
+                                         const std::vector<std::string>& right,
+                                         size_t context = 2);
+
+}  // namespace edit_distance
+
+// Calculate the diff between 'left' and 'right' and return it in unified diff
+// format.
+// If not null, stores in 'total_line_count' the total number of lines found
+// in left + right.
+GTEST_API_ std::string DiffStrings(const std::string& left,
+                                   const std::string& right,
+                                   size_t* total_line_count);
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: keith.ray@gmail.com (Keith Ray)
+// Constructs and returns the message for an equality assertion
+// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
 //
-// Google Test filepath utilities
+// The first four parameters are the expressions used in the assertion
+// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
+// where foo is 5 and bar is 6, we have:
 //
-// This header file declares classes and functions used internally by
-// Google Test.  They are subject to change without notice.
+//   expected_expression: "foo"
+//   actual_expression:   "bar"
+//   expected_value:      "5"
+//   actual_value:        "6"
 //
-// This file is #included in <gtest/internal/gtest-internal.h>.
-// Do not include this header file separately!
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+// The ignoring_case parameter is true if and only if the assertion is a
+// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
+// be inserted into the message.
+GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
+                                     const char* actual_expression,
+                                     const std::string& expected_value,
+                                     const std::string& actual_value,
+                                     bool ignoring_case);
 
+// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
+GTEST_API_ std::string GetBoolAssertionFailureMessage(
+    const AssertionResult& assertion_result,
+    const char* expression_text,
+    const char* actual_predicate_value,
+    const char* expected_predicate_value);
 
-namespace testing {
-namespace internal {
-
-// FilePath - a class for file and directory pathname manipulation which
-// handles platform-specific conventions (like the pathname separator).
-// Used for helper functions for naming files in a directory for xml output.
-// Except for Set methods, all methods are const or static, which provides an
-// "immutable value object" -- useful for peace of mind.
-// A FilePath with a value ending in a path separator ("like/this/") represents
-// a directory, otherwise it is assumed to represent a file. In either case,
-// it may or may not represent an actual file or directory in the file system.
-// Names are NOT checked for syntax correctness -- no checking for illegal
-// characters, malformed paths, etc.
-
-class GTEST_API_ FilePath {
+// This template class represents an IEEE floating-point number
+// (either single-precision or double-precision, depending on the
+// template parameters).
+//
+// The purpose of this class is to do more sophisticated number
+// comparison.  (Due to round-off error, etc, it's very unlikely that
+// two floating-points will be equal exactly.  Hence a naive
+// comparison by the == operation often doesn't work.)
+//
+// Format of IEEE floating-point:
+//
+//   The most-significant bit being the leftmost, an IEEE
+//   floating-point looks like
+//
+//     sign_bit exponent_bits fraction_bits
+//
+//   Here, sign_bit is a single bit that designates the sign of the
+//   number.
+//
+//   For float, there are 8 exponent bits and 23 fraction bits.
+//
+//   For double, there are 11 exponent bits and 52 fraction bits.
+//
+//   More details can be found at
+//   http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
+//
+// Template parameter:
+//
+//   RawType: the raw floating-point type (either float or double)
+template <typename RawType>
+class FloatingPoint {
  public:
-  FilePath() : pathname_("") { }
-  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }
-
-  explicit FilePath(const std::string& pathname) : pathname_(pathname) {
-    Normalize();
-  }
+  // Defines the unsigned integer type that has the same size as the
+  // floating point number.
+  typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;
 
-  FilePath& operator=(const FilePath& rhs) {
-    Set(rhs);
-    return *this;
-  }
+  // Constants.
 
-  void Set(const FilePath& rhs) {
-    pathname_ = rhs.pathname_;
-  }
+  // # of bits in a number.
+  static const size_t kBitCount = 8*sizeof(RawType);
 
-  const std::string& string() const { return pathname_; }
-  const char* c_str() const { return pathname_.c_str(); }
+  // # of fraction bits in a number.
+  static const size_t kFractionBitCount =
+    std::numeric_limits<RawType>::digits - 1;
 
-  // Returns the current working directory, or "" if unsuccessful.
-  static FilePath GetCurrentDir();
+  // # of exponent bits in a number.
+  static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
 
-  // Given directory = "dir", base_name = "test", number = 0,
-  // extension = "xml", returns "dir/test.xml". If number is greater
-  // than zero (e.g., 12), returns "dir/test_12.xml".
-  // On Windows platform, uses \ as the separator rather than /.
-  static FilePath MakeFileName(const FilePath& directory,
-                               const FilePath& base_name,
-                               int number,
-                               const char* extension);
+  // The mask for the sign bit.
+  static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
 
-  // Given directory = "dir", relative_path = "test.xml",
-  // returns "dir/test.xml".
-  // On Windows, uses \ as the separator rather than /.
-  static FilePath ConcatPaths(const FilePath& directory,
-                              const FilePath& relative_path);
+  // The mask for the fraction bits.
+  static const Bits kFractionBitMask =
+    ~static_cast<Bits>(0) >> (kExponentBitCount + 1);
 
-  // Returns a pathname for a file that does not currently exist. The pathname
-  // will be directory/base_name.extension or
-  // directory/base_name_<number>.extension if directory/base_name.extension
-  // already exists. The number will be incremented until a pathname is found
-  // that does not already exist.
-  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
-  // There could be a race condition if two or more processes are calling this
-  // function at the same time -- they could both pick the same filename.
-  static FilePath GenerateUniqueFileName(const FilePath& directory,
-                                         const FilePath& base_name,
-                                         const char* extension);
+  // The mask for the exponent bits.
+  static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
 
-  // Returns true iff the path is "".
-  bool IsEmpty() const { return pathname_.empty(); }
+  // How many ULP's (Units in the Last Place) we want to tolerate when
+  // comparing two numbers.  The larger the value, the more error we
+  // allow.  A 0 value means that two numbers must be exactly the same
+  // to be considered equal.
+  //
+  // The maximum error of a single floating-point operation is 0.5
+  // units in the last place.  On Intel CPU's, all floating-point
+  // calculations are done with 80-bit precision, while double has 64
+  // bits.  Therefore, 4 should be enough for ordinary use.
+  //
+  // See the following article for more details on ULP:
+  // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
+  static const uint32_t kMaxUlps = 4;
 
-  // If input name has a trailing separator character, removes it and returns
-  // the name, otherwise return the name string unmodified.
-  // On Windows platform, uses \ as the separator, other platforms use /.
-  FilePath RemoveTrailingPathSeparator() const;
+  // Constructs a FloatingPoint from a raw floating-point number.
+  //
+  // On an Intel CPU, passing a non-normalized NAN (Not a Number)
+  // around may change its bits, although the new value is guaranteed
+  // to be also a NAN.  Therefore, don't expect this constructor to
+  // preserve the bits in x when x is a NAN.
+  explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
 
-  // Returns a copy of the FilePath with the directory part removed.
-  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
-  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
-  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
-  // returns an empty FilePath ("").
-  // On Windows platform, '\' is the path separator, otherwise it is '/'.
-  FilePath RemoveDirectoryName() const;
+  // Static methods
 
-  // RemoveFileName returns the directory path with the filename removed.
-  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
-  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
-  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
-  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
-  // On Windows platform, '\' is the path separator, otherwise it is '/'.
-  FilePath RemoveFileName() const;
+  // Reinterprets a bit pattern as a floating-point number.
+  //
+  // This function is needed to test the AlmostEquals() method.
+  static RawType ReinterpretBits(const Bits bits) {
+    FloatingPoint fp(0);
+    fp.u_.bits_ = bits;
+    return fp.u_.value_;
+  }
 
-  // Returns a copy of the FilePath with the case-insensitive extension removed.
-  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
-  // FilePath("dir/file"). If a case-insensitive extension is not
-  // found, returns a copy of the original FilePath.
-  FilePath RemoveExtension(const char* extension) const;
+  // Returns the floating-point number that represent positive infinity.
+  static RawType Infinity() {
+    return ReinterpretBits(kExponentBitMask);
+  }
 
-  // Creates directories so that path exists. Returns true if successful or if
-  // the directories already exist; returns false if unable to create
-  // directories for any reason. Will also return false if the FilePath does
-  // not represent a directory (that is, it doesn't end with a path separator).
-  bool CreateDirectoriesRecursively() const;
+  // Returns the maximum representable finite floating-point number.
+  static RawType Max();
 
-  // Create the directory so that path exists. Returns true if successful or
-  // if the directory already exists; returns false if unable to create the
-  // directory for any reason, including if the parent directory does not
-  // exist. Not named "CreateDirectory" because that's a macro on Windows.
-  bool CreateFolder() const;
+  // Non-static methods
 
-  // Returns true if FilePath describes something in the file-system,
-  // either a file, directory, or whatever, and that something exists.
-  bool FileOrDirectoryExists() const;
+  // Returns the bits that represents this number.
+  const Bits &bits() const { return u_.bits_; }
 
-  // Returns true if pathname describes a directory in the file-system
-  // that exists.
-  bool DirectoryExists() const;
+  // Returns the exponent bits of this number.
+  Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
 
-  // Returns true if FilePath ends with a path separator, which indicates that
-  // it is intended to represent a directory. Returns false otherwise.
-  // This does NOT check that a directory (or file) actually exists.
-  bool IsDirectory() const;
+  // Returns the fraction bits of this number.
+  Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
 
-  // Returns true if pathname describes a root directory. (Windows has one
-  // root directory per disk drive.)
-  bool IsRootDirectory() const;
+  // Returns the sign bit of this number.
+  Bits sign_bit() const { return kSignBitMask & u_.bits_; }
 
-  // Returns true if pathname describes an absolute path.
-  bool IsAbsolutePath() const;
+  // Returns true if and only if this is NAN (not a number).
+  bool is_nan() const {
+    // It's a NAN if the exponent bits are all ones and the fraction
+    // bits are not entirely zeros.
+    return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
+  }
 
- private:
-  // Replaces multiple consecutive separators with a single separator.
-  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
-  // redundancies that might be in a pathname involving "." or "..".
-  //
-  // A pathname with multiple consecutive separators may occur either through
-  // user error or as a result of some scripts or APIs that generate a pathname
-  // with a trailing separator. On other platforms the same API or script
-  // may NOT generate a pathname with a trailing "/". Then elsewhere that
-  // pathname may have another "/" and pathname components added to it,
-  // without checking for the separator already being there.
-  // The script language and operating system may allow paths like "foo//bar"
-  // but some of the functions in FilePath will not handle that correctly. In
-  // particular, RemoveTrailingPathSeparator() only removes one separator, and
-  // it is called in CreateDirectoriesRecursively() assuming that it will change
-  // a pathname from directory syntax (trailing separator) to filename syntax.
+  // Returns true if and only if this number is at most kMaxUlps ULP's away
+  // from rhs.  In particular, this function:
   //
-  // On Windows this method also replaces the alternate path separator '/' with
-  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
-  // "bar\\foo".
-
-  void Normalize();
-
-  // Returns a pointer to the last occurrence of a valid path separator in
-  // the FilePath. On Windows, for example, both '/' and '\' are valid path
-  // separators. Returns NULL if no path separator was found.
-  const char* FindLastPathSeparator() const;
-
-  std::string pathname_;
-};  // class FilePath
+  //   - returns false if either number is (or both are) NAN.
+  //   - treats really large numbers as almost equal to infinity.
+  //   - thinks +0.0 and -0.0 are 0 DLP's apart.
+  bool AlmostEquals(const FloatingPoint& rhs) const {
+    // The IEEE standard says that any comparison operation involving
+    // a NAN must return false.
+    if (is_nan() || rhs.is_nan()) return false;
 
-}  // namespace internal
-}  // namespace testing
+    return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
+        <= kMaxUlps;
+  }
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
-// This file was GENERATED by command:
-//     pump.py gtest-type-util.h.pump
-// DO NOT EDIT BY HAND!!!
+ private:
+  // The data type used to store the actual floating-point number.
+  union FloatingPointUnion {
+    RawType value_;  // The raw floating-point number.
+    Bits bits_;      // The bits that represent the number.
+  };
 
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-// Type utilities needed for implementing typed and type-parameterized
-// tests.  This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
-//
-// Currently we support at most 50 types in a list, and at most 50
-// type-parameterized tests in one type-parameterized test case.
-// Please contact googletestframework@googlegroups.com if you need
-// more.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
-
-
-// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
-// libstdc++ (which is where cxxabi.h comes from).
-# if GTEST_HAS_CXXABI_H_
-#  include <cxxabi.h>
-# elif defined(__HP_aCC)
-#  include <acxx_demangle.h>
-# endif  // GTEST_HASH_CXXABI_H_
-
-namespace testing {
-namespace internal {
-
-// GetTypeName<T>() returns a human-readable name of type T.
-// NB: This function is also used in Google Mock, so don't move it inside of
-// the typed-test-only section below.
-template <typename T>
-std::string GetTypeName() {
-# if GTEST_HAS_RTTI
-
-  const char* const name = typeid(T).name();
-#  if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
-  int status = 0;
-  // gcc's implementation of typeid(T).name() mangles the type name,
-  // so we have to demangle it.
-#   if GTEST_HAS_CXXABI_H_
-  using abi::__cxa_demangle;
-#   endif  // GTEST_HAS_CXXABI_H_
-  char* const readable_name = __cxa_demangle(name, 0, 0, &status);
-  const std::string name_str(status == 0 ? readable_name : name);
-  free(readable_name);
-  return name_str;
-#  else
-  return name;
-#  endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC
-
-# else
+  // Converts an integer from the sign-and-magnitude representation to
+  // the biased representation.  More precisely, let N be 2 to the
+  // power of (kBitCount - 1), an integer x is represented by the
+  // unsigned number x + N.
+  //
+  // For instance,
+  //
+  //   -N + 1 (the most negative number representable using
+  //          sign-and-magnitude) is represented by 1;
+  //   0      is represented by N; and
+  //   N - 1  (the biggest number representable using
+  //          sign-and-magnitude) is represented by 2N - 1.
+  //
+  // Read http://en.wikipedia.org/wiki/Signed_number_representations
+  // for more details on signed number representations.
+  static Bits SignAndMagnitudeToBiased(const Bits &sam) {
+    if (kSignBitMask & sam) {
+      // sam represents a negative number.
+      return ~sam + 1;
+    } else {
+      // sam represents a positive number.
+      return kSignBitMask | sam;
+    }
+  }
 
-  return "<type>";
+  // Given two numbers in the sign-and-magnitude representation,
+  // returns the distance between them as an unsigned number.
+  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
+                                                     const Bits &sam2) {
+    const Bits biased1 = SignAndMagnitudeToBiased(sam1);
+    const Bits biased2 = SignAndMagnitudeToBiased(sam2);
+    return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
+  }
 
-# endif  // GTEST_HAS_RTTI
-}
+  FloatingPointUnion u_;
+};
 
-#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+// We cannot use std::numeric_limits<T>::max() as it clashes with the max()
+// macro defined by <windows.h>.
+template <>
+inline float FloatingPoint<float>::Max() { return FLT_MAX; }
+template <>
+inline double FloatingPoint<double>::Max() { return DBL_MAX; }
 
-// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
-// type.  This can be used as a compile-time assertion to ensure that
-// two types are equal.
+// Typedefs the instances of the FloatingPoint template class that we
+// care to use.
+typedef FloatingPoint<float> Float;
+typedef FloatingPoint<double> Double;
 
-template <typename T1, typename T2>
-struct AssertTypeEq;
+// In order to catch the mistake of putting tests that use different
+// test fixture classes in the same test suite, we need to assign
+// unique IDs to fixture classes and compare them.  The TypeId type is
+// used to hold such IDs.  The user should treat TypeId as an opaque
+// type: the only operation allowed on TypeId values is to compare
+// them for equality using the == operator.
+typedef const void* TypeId;
 
 template <typename T>
-struct AssertTypeEq<T, T> {
-  typedef bool type;
+class TypeIdHelper {
+ public:
+  // dummy_ must not have a const type.  Otherwise an overly eager
+  // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
+  // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
+  static bool dummy_;
 };
 
-// A unique type used as the default value for the arguments of class
-// template Types.  This allows us to simulate variadic templates
-// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
-// support directly.
-struct None {};
-
-// The following family of struct and struct templates are used to
-// represent type lists.  In particular, TypesN<T1, T2, ..., TN>
-// represents a type list with N types (T1, T2, ..., and TN) in it.
-// Except for Types0, every struct in the family has two member types:
-// Head for the first type in the list, and Tail for the rest of the
-// list.
+template <typename T>
+bool TypeIdHelper<T>::dummy_ = false;
 
-// The empty type list.
-struct Types0 {};
+// GetTypeId<T>() returns the ID of type T.  Different values will be
+// returned for different types.  Calling the function twice with the
+// same type argument is guaranteed to return the same ID.
+template <typename T>
+TypeId GetTypeId() {
+  // The compiler is required to allocate a different
+  // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
+  // the template.  Therefore, the address of dummy_ is guaranteed to
+  // be unique.
+  return &(TypeIdHelper<T>::dummy_);
+}
 
-// Type lists of length 1, 2, 3, and so on.
+// Returns the type ID of ::testing::Test.  Always call this instead
+// of GetTypeId< ::testing::Test>() to get the type ID of
+// ::testing::Test, as the latter may give the wrong result due to a
+// suspected linker bug when compiling Google Test as a Mac OS X
+// framework.
+GTEST_API_ TypeId GetTestTypeId();
 
-template <typename T1>
-struct Types1 {
-  typedef T1 Head;
-  typedef Types0 Tail;
-};
-template <typename T1, typename T2>
-struct Types2 {
-  typedef T1 Head;
-  typedef Types1<T2> Tail;
-};
+// Defines the abstract factory interface that creates instances
+// of a Test object.
+class TestFactoryBase {
+ public:
+  virtual ~TestFactoryBase() {}
 
-template <typename T1, typename T2, typename T3>
-struct Types3 {
-  typedef T1 Head;
-  typedef Types2<T2, T3> Tail;
-};
+  // Creates a test instance to run. The instance is both created and destroyed
+  // within TestInfoImpl::Run()
+  virtual Test* CreateTest() = 0;
 
-template <typename T1, typename T2, typename T3, typename T4>
-struct Types4 {
-  typedef T1 Head;
-  typedef Types3<T2, T3, T4> Tail;
-};
+ protected:
+  TestFactoryBase() {}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-struct Types5 {
-  typedef T1 Head;
-  typedef Types4<T2, T3, T4, T5> Tail;
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6>
-struct Types6 {
-  typedef T1 Head;
-  typedef Types5<T2, T3, T4, T5, T6> Tail;
+// This class provides implementation of TeastFactoryBase interface.
+// It is used in TEST and TEST_F macros.
+template <class TestClass>
+class TestFactoryImpl : public TestFactoryBase {
+ public:
+  Test* CreateTest() override { return new TestClass; }
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7>
-struct Types7 {
-  typedef T1 Head;
-  typedef Types6<T2, T3, T4, T5, T6, T7> Tail;
-};
+#if GTEST_OS_WINDOWS
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8>
-struct Types8 {
-  typedef T1 Head;
-  typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail;
-};
+// Predicate-formatters for implementing the HRESULT checking macros
+// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
+// We pass a long instead of HRESULT to avoid causing an
+// include dependency for the HRESULT type.
+GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
+                                            long hr);  // NOLINT
+GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
+                                            long hr);  // NOLINT
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9>
-struct Types9 {
-  typedef T1 Head;
-  typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
-};
+#endif  // GTEST_OS_WINDOWS
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10>
-struct Types10 {
-  typedef T1 Head;
-  typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
-};
+// Types of SetUpTestSuite() and TearDownTestSuite() functions.
+using SetUpTestSuiteFunc = void (*)();
+using TearDownTestSuiteFunc = void (*)();
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11>
-struct Types11 {
-  typedef T1 Head;
-  typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
-};
+struct CodeLocation {
+  CodeLocation(const std::string& a_file, int a_line)
+      : file(a_file), line(a_line) {}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12>
-struct Types12 {
-  typedef T1 Head;
-  typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
+  std::string file;
+  int line;
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13>
-struct Types13 {
-  typedef T1 Head;
-  typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
-};
+//  Helper to identify which setup function for TestCase / TestSuite to call.
+//  Only one function is allowed, either TestCase or TestSute but not both.
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14>
-struct Types14 {
-  typedef T1 Head;
-  typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail;
-};
+// Utility functions to help SuiteApiResolver
+using SetUpTearDownSuiteFuncType = void (*)();
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15>
-struct Types15 {
-  typedef T1 Head;
-  typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15> Tail;
-};
+inline SetUpTearDownSuiteFuncType GetNotDefaultOrNull(
+    SetUpTearDownSuiteFuncType a, SetUpTearDownSuiteFuncType def) {
+  return a == def ? nullptr : a;
+}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16>
-struct Types16 {
-  typedef T1 Head;
-  typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16> Tail;
-};
+template <typename T>
+//  Note that SuiteApiResolver inherits from T because
+//  SetUpTestSuite()/TearDownTestSuite() could be protected. Ths way
+//  SuiteApiResolver can access them.
+struct SuiteApiResolver : T {
+  // testing::Test is only forward declared at this point. So we make it a
+  // dependend class for the compiler to be OK with it.
+  using Test =
+      typename std::conditional<sizeof(T) != 0, ::testing::Test, void>::type;
+
+  static SetUpTearDownSuiteFuncType GetSetUpCaseOrSuite(const char* filename,
+                                                        int line_num) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+    SetUpTearDownSuiteFuncType test_case_fp =
+        GetNotDefaultOrNull(&T::SetUpTestCase, &Test::SetUpTestCase);
+    SetUpTearDownSuiteFuncType test_suite_fp =
+        GetNotDefaultOrNull(&T::SetUpTestSuite, &Test::SetUpTestSuite);
+
+    GTEST_CHECK_(!test_case_fp || !test_suite_fp)
+        << "Test can not provide both SetUpTestSuite and SetUpTestCase, please "
+           "make sure there is only one present at "
+        << filename << ":" << line_num;
+
+    return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
+#else
+    (void)(filename);
+    (void)(line_num);
+    return &T::SetUpTestSuite;
+#endif
+  }
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17>
-struct Types17 {
-  typedef T1 Head;
-  typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17> Tail;
-};
+  static SetUpTearDownSuiteFuncType GetTearDownCaseOrSuite(const char* filename,
+                                                           int line_num) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+    SetUpTearDownSuiteFuncType test_case_fp =
+        GetNotDefaultOrNull(&T::TearDownTestCase, &Test::TearDownTestCase);
+    SetUpTearDownSuiteFuncType test_suite_fp =
+        GetNotDefaultOrNull(&T::TearDownTestSuite, &Test::TearDownTestSuite);
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18>
-struct Types18 {
-  typedef T1 Head;
-  typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18> Tail;
-};
+    GTEST_CHECK_(!test_case_fp || !test_suite_fp)
+        << "Test can not provide both TearDownTestSuite and TearDownTestCase,"
+           " please make sure there is only one present at"
+        << filename << ":" << line_num;
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19>
-struct Types19 {
-  typedef T1 Head;
-  typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19> Tail;
+    return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
+#else
+    (void)(filename);
+    (void)(line_num);
+    return &T::TearDownTestSuite;
+#endif
+  }
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20>
-struct Types20 {
-  typedef T1 Head;
-  typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20> Tail;
-};
+// Creates a new TestInfo object and registers it with Google Test;
+// returns the created object.
+//
+// Arguments:
+//
+//   test_suite_name:  name of the test suite
+//   name:             name of the test
+//   type_param:       the name of the test's type parameter, or NULL if
+//                     this is not a typed or a type-parameterized test.
+//   value_param:      text representation of the test's value parameter,
+//                     or NULL if this is not a type-parameterized test.
+//   code_location:    code location where the test is defined
+//   fixture_class_id: ID of the test fixture class
+//   set_up_tc:        pointer to the function that sets up the test suite
+//   tear_down_tc:     pointer to the function that tears down the test suite
+//   factory:          pointer to the factory that creates a test object.
+//                     The newly created TestInfo instance will assume
+//                     ownership of the factory object.
+GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
+    const char* test_suite_name, const char* name, const char* type_param,
+    const char* value_param, CodeLocation code_location,
+    TypeId fixture_class_id, SetUpTestSuiteFunc set_up_tc,
+    TearDownTestSuiteFunc tear_down_tc, TestFactoryBase* factory);
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21>
-struct Types21 {
-  typedef T1 Head;
-  typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21> Tail;
-};
+// If *pstr starts with the given prefix, modifies *pstr to be right
+// past the prefix and returns true; otherwise leaves *pstr unchanged
+// and returns false.  None of pstr, *pstr, and prefix can be NULL.
+GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22>
-struct Types22 {
-  typedef T1 Head;
-  typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22> Tail;
-};
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23>
-struct Types23 {
-  typedef T1 Head;
-  typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23> Tail;
-};
+// State of the definition of a type-parameterized test suite.
+class GTEST_API_ TypedTestSuitePState {
+ public:
+  TypedTestSuitePState() : registered_(false) {}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24>
-struct Types24 {
-  typedef T1 Head;
-  typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
-};
+  // Adds the given test name to defined_test_names_ and return true
+  // if the test suite hasn't been registered; otherwise aborts the
+  // program.
+  bool AddTestName(const char* file, int line, const char* case_name,
+                   const char* test_name) {
+    if (registered_) {
+      fprintf(stderr,
+              "%s Test %s must be defined before "
+              "REGISTER_TYPED_TEST_SUITE_P(%s, ...).\n",
+              FormatFileLocation(file, line).c_str(), test_name, case_name);
+      fflush(stderr);
+      posix::Abort();
+    }
+    registered_tests_.insert(
+        ::std::make_pair(test_name, CodeLocation(file, line)));
+    return true;
+  }
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25>
-struct Types25 {
-  typedef T1 Head;
-  typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
-};
+  bool TestExists(const std::string& test_name) const {
+    return registered_tests_.count(test_name) > 0;
+  }
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26>
-struct Types26 {
-  typedef T1 Head;
-  typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
-};
+  const CodeLocation& GetCodeLocation(const std::string& test_name) const {
+    RegisteredTestsMap::const_iterator it = registered_tests_.find(test_name);
+    GTEST_CHECK_(it != registered_tests_.end());
+    return it->second;
+  }
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27>
-struct Types27 {
-  typedef T1 Head;
-  typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
-};
+  // Verifies that registered_tests match the test names in
+  // defined_test_names_; returns registered_tests if successful, or
+  // aborts the program otherwise.
+  const char* VerifyRegisteredTestNames(const char* test_suite_name,
+                                        const char* file, int line,
+                                        const char* registered_tests);
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28>
-struct Types28 {
-  typedef T1 Head;
-  typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail;
-};
+ private:
+  typedef ::std::map<std::string, CodeLocation> RegisteredTestsMap;
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29>
-struct Types29 {
-  typedef T1 Head;
-  typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29> Tail;
+  bool registered_;
+  RegisteredTestsMap registered_tests_;
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30>
-struct Types30 {
-  typedef T1 Head;
-  typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30> Tail;
-};
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+using TypedTestCasePState = TypedTestSuitePState;
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31>
-struct Types31 {
-  typedef T1 Head;
-  typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31> Tail;
-};
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32>
-struct Types32 {
-  typedef T1 Head;
-  typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32> Tail;
-};
+// Skips to the first non-space char after the first comma in 'str';
+// returns NULL if no comma is found in 'str'.
+inline const char* SkipComma(const char* str) {
+  const char* comma = strchr(str, ',');
+  if (comma == nullptr) {
+    return nullptr;
+  }
+  while (IsSpace(*(++comma))) {}
+  return comma;
+}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33>
-struct Types33 {
-  typedef T1 Head;
-  typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33> Tail;
-};
+// Returns the prefix of 'str' before the first comma in it; returns
+// the entire string if it contains no comma.
+inline std::string GetPrefixUntilComma(const char* str) {
+  const char* comma = strchr(str, ',');
+  return comma == nullptr ? str : std::string(str, comma);
+}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34>
-struct Types34 {
-  typedef T1 Head;
-  typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34> Tail;
-};
+// Splits a given string on a given delimiter, populating a given
+// vector with the fields.
+void SplitString(const ::std::string& str, char delimiter,
+                 ::std::vector< ::std::string>* dest);
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35>
-struct Types35 {
-  typedef T1 Head;
-  typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35> Tail;
+// The default argument to the template below for the case when the user does
+// not provide a name generator.
+struct DefaultNameGenerator {
+  template <typename T>
+  static std::string GetName(int i) {
+    return StreamableToString(i);
+  }
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36>
-struct Types36 {
-  typedef T1 Head;
-  typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36> Tail;
+template <typename Provided = DefaultNameGenerator>
+struct NameGeneratorSelector {
+  typedef Provided type;
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37>
-struct Types37 {
-  typedef T1 Head;
-  typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37> Tail;
-};
+template <typename NameGenerator>
+void GenerateNamesRecursively(internal::None, std::vector<std::string>*, int) {}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38>
-struct Types38 {
-  typedef T1 Head;
-  typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
-};
+template <typename NameGenerator, typename Types>
+void GenerateNamesRecursively(Types, std::vector<std::string>* result, int i) {
+  result->push_back(NameGenerator::template GetName<typename Types::Head>(i));
+  GenerateNamesRecursively<NameGenerator>(typename Types::Tail(), result,
+                                          i + 1);
+}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39>
-struct Types39 {
-  typedef T1 Head;
-  typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
-};
+template <typename NameGenerator, typename Types>
+std::vector<std::string> GenerateNames() {
+  std::vector<std::string> result;
+  GenerateNamesRecursively<NameGenerator>(Types(), &result, 0);
+  return result;
+}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40>
-struct Types40 {
-  typedef T1 Head;
-  typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41>
-struct Types41 {
-  typedef T1 Head;
-  typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42>
-struct Types42 {
-  typedef T1 Head;
-  typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43>
-struct Types43 {
-  typedef T1 Head;
-  typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44>
-struct Types44 {
-  typedef T1 Head;
-  typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45>
-struct Types45 {
-  typedef T1 Head;
-  typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45> Tail;
-};
+// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
+// registers a list of type-parameterized tests with Google Test.  The
+// return value is insignificant - we just need to return something
+// such that we can call this function in a namespace scope.
+//
+// Implementation note: The GTEST_TEMPLATE_ macro declares a template
+// template parameter.  It's defined in gtest-type-util.h.
+template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
+class TypeParameterizedTest {
+ public:
+  // 'index' is the index of the test in the type list 'Types'
+  // specified in INSTANTIATE_TYPED_TEST_SUITE_P(Prefix, TestSuite,
+  // Types).  Valid values for 'index' are [0, N - 1] where N is the
+  // length of Types.
+  static bool Register(const char* prefix, const CodeLocation& code_location,
+                       const char* case_name, const char* test_names, int index,
+                       const std::vector<std::string>& type_names =
+                           GenerateNames<DefaultNameGenerator, Types>()) {
+    typedef typename Types::Head Type;
+    typedef Fixture<Type> FixtureClass;
+    typedef typename GTEST_BIND_(TestSel, Type) TestClass;
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46>
-struct Types46 {
-  typedef T1 Head;
-  typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46> Tail;
-};
+    // First, registers the first type-parameterized test in the type
+    // list.
+    MakeAndRegisterTestInfo(
+        (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name +
+         "/" + type_names[static_cast<size_t>(index)])
+            .c_str(),
+        StripTrailingSpaces(GetPrefixUntilComma(test_names)).c_str(),
+        GetTypeName<Type>().c_str(),
+        nullptr,  // No value parameter.
+        code_location, GetTypeId<FixtureClass>(),
+        SuiteApiResolver<TestClass>::GetSetUpCaseOrSuite(
+            code_location.file.c_str(), code_location.line),
+        SuiteApiResolver<TestClass>::GetTearDownCaseOrSuite(
+            code_location.file.c_str(), code_location.line),
+        new TestFactoryImpl<TestClass>);
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47>
-struct Types47 {
-  typedef T1 Head;
-  typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46, T47> Tail;
+    // Next, recurses (at compile time) with the tail of the type list.
+    return TypeParameterizedTest<Fixture, TestSel,
+                                 typename Types::Tail>::Register(prefix,
+                                                                 code_location,
+                                                                 case_name,
+                                                                 test_names,
+                                                                 index + 1,
+                                                                 type_names);
+  }
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48>
-struct Types48 {
-  typedef T1 Head;
-  typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46, T47, T48> Tail;
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, class TestSel>
+class TypeParameterizedTest<Fixture, TestSel, internal::None> {
+ public:
+  static bool Register(const char* /*prefix*/, const CodeLocation&,
+                       const char* /*case_name*/, const char* /*test_names*/,
+                       int /*index*/,
+                       const std::vector<std::string>& =
+                           std::vector<std::string>() /*type_names*/) {
+    return true;
+  }
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49>
-struct Types49 {
-  typedef T1 Head;
-  typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46, T47, T48, T49> Tail;
-};
+GTEST_API_ void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
+                                                   CodeLocation code_location);
+GTEST_API_ void RegisterTypeParameterizedTestSuiteInstantiation(
+    const char* case_name);
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49, typename T50>
-struct Types50 {
-  typedef T1 Head;
-  typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46, T47, T48, T49, T50> Tail;
-};
+// TypeParameterizedTestSuite<Fixture, Tests, Types>::Register()
+// registers *all combinations* of 'Tests' and 'Types' with Google
+// Test.  The return value is insignificant - we just need to return
+// something such that we can call this function in a namespace scope.
+template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
+class TypeParameterizedTestSuite {
+ public:
+  static bool Register(const char* prefix, CodeLocation code_location,
+                       const TypedTestSuitePState* state, const char* case_name,
+                       const char* test_names,
+                       const std::vector<std::string>& type_names =
+                           GenerateNames<DefaultNameGenerator, Types>()) {
+    RegisterTypeParameterizedTestSuiteInstantiation(case_name);
+    std::string test_name = StripTrailingSpaces(
+        GetPrefixUntilComma(test_names));
+    if (!state->TestExists(test_name)) {
+      fprintf(stderr, "Failed to get code location for test %s.%s at %s.",
+              case_name, test_name.c_str(),
+              FormatFileLocation(code_location.file.c_str(),
+                                 code_location.line).c_str());
+      fflush(stderr);
+      posix::Abort();
+    }
+    const CodeLocation& test_location = state->GetCodeLocation(test_name);
 
+    typedef typename Tests::Head Head;
 
-}  // namespace internal
+    // First, register the first test in 'Test' for each type in 'Types'.
+    TypeParameterizedTest<Fixture, Head, Types>::Register(
+        prefix, test_location, case_name, test_names, 0, type_names);
 
-// We don't want to require the users to write TypesN<...> directly,
-// as that would require them to count the length.  Types<...> is much
-// easier to write, but generates horrible messages when there is a
-// compiler error, as gcc insists on printing out each template
-// argument, even if it has the default value (this means Types<int>
-// will appear as Types<int, None, None, ..., None> in the compiler
-// errors).
-//
-// Our solution is to combine the best part of the two approaches: a
-// user would write Types<T1, ..., TN>, and Google Test will translate
-// that to TypesN<T1, ..., TN> internally to make error messages
-// readable.  The translation is done by the 'type' member of the
-// Types template.
-template <typename T1 = internal::None, typename T2 = internal::None,
-    typename T3 = internal::None, typename T4 = internal::None,
-    typename T5 = internal::None, typename T6 = internal::None,
-    typename T7 = internal::None, typename T8 = internal::None,
-    typename T9 = internal::None, typename T10 = internal::None,
-    typename T11 = internal::None, typename T12 = internal::None,
-    typename T13 = internal::None, typename T14 = internal::None,
-    typename T15 = internal::None, typename T16 = internal::None,
-    typename T17 = internal::None, typename T18 = internal::None,
-    typename T19 = internal::None, typename T20 = internal::None,
-    typename T21 = internal::None, typename T22 = internal::None,
-    typename T23 = internal::None, typename T24 = internal::None,
-    typename T25 = internal::None, typename T26 = internal::None,
-    typename T27 = internal::None, typename T28 = internal::None,
-    typename T29 = internal::None, typename T30 = internal::None,
-    typename T31 = internal::None, typename T32 = internal::None,
-    typename T33 = internal::None, typename T34 = internal::None,
-    typename T35 = internal::None, typename T36 = internal::None,
-    typename T37 = internal::None, typename T38 = internal::None,
-    typename T39 = internal::None, typename T40 = internal::None,
-    typename T41 = internal::None, typename T42 = internal::None,
-    typename T43 = internal::None, typename T44 = internal::None,
-    typename T45 = internal::None, typename T46 = internal::None,
-    typename T47 = internal::None, typename T48 = internal::None,
-    typename T49 = internal::None, typename T50 = internal::None>
-struct Types {
-  typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
+    // Next, recurses (at compile time) with the tail of the test list.
+    return TypeParameterizedTestSuite<Fixture, typename Tests::Tail,
+                                      Types>::Register(prefix, code_location,
+                                                       state, case_name,
+                                                       SkipComma(test_names),
+                                                       type_names);
+  }
 };
 
-template <>
-struct Types<internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types0 type;
-};
-template <typename T1>
-struct Types<T1, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types1<T1> type;
-};
-template <typename T1, typename T2>
-struct Types<T1, T2, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types2<T1, T2> type;
-};
-template <typename T1, typename T2, typename T3>
-struct Types<T1, T2, T3, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types3<T1, T2, T3> type;
-};
-template <typename T1, typename T2, typename T3, typename T4>
-struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types4<T1, T2, T3, T4> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-struct Types<T1, T2, T3, T4, T5, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types5<T1, T2, T3, T4, T5> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6>
-struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types6<T1, T2, T3, T4, T5, T6> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7>
-struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    T46, internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    T46, T47, internal::None, internal::None, internal::None> {
-  typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    T46, T47, T48, internal::None, internal::None> {
-  typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47, T48> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    T46, T47, T48, T49, internal::None> {
-  typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47, T48, T49> type;
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, typename Types>
+class TypeParameterizedTestSuite<Fixture, internal::None, Types> {
+ public:
+  static bool Register(const char* /*prefix*/, const CodeLocation&,
+                       const TypedTestSuitePState* /*state*/,
+                       const char* /*case_name*/, const char* /*test_names*/,
+                       const std::vector<std::string>& =
+                           std::vector<std::string>() /*type_names*/) {
+    return true;
+  }
 };
 
-namespace internal {
-
-# define GTEST_TEMPLATE_ template <typename T> class
-
-// The template "selector" struct TemplateSel<Tmpl> is used to
-// represent Tmpl, which must be a class template with one type
-// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
-// as the type Tmpl<T>.  This allows us to actually instantiate the
-// template "selected" by TemplateSel<Tmpl>.
+// Returns the current OS stack trace as an std::string.
 //
-// This trick is necessary for simulating typedef for class templates,
-// which C++ doesn't support directly.
-template <GTEST_TEMPLATE_ Tmpl>
-struct TemplateSel {
-  template <typename T>
-  struct Bind {
-    typedef Tmpl<T> type;
-  };
-};
-
-# define GTEST_BIND_(TmplSel, T) \
-  TmplSel::template Bind<T>::type
-
-// A unique struct template used as the default value for the
-// arguments of class template Templates.  This allows us to simulate
-// variadic templates (e.g. Templates<int>, Templates<int, double>,
-// and etc), which C++ doesn't support directly.
-template <typename T>
-struct NoneT {};
-
-// The following family of struct and struct templates are used to
-// represent template lists.  In particular, TemplatesN<T1, T2, ...,
-// TN> represents a list of N templates (T1, T2, ..., and TN).  Except
-// for Templates0, every struct in the family has two member types:
-// Head for the selector of the first template in the list, and Tail
-// for the rest of the list.
-
-// The empty template list.
-struct Templates0 {};
-
-// Template lists of length 1, 2, 3, and so on.
-
-template <GTEST_TEMPLATE_ T1>
-struct Templates1 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates0 Tail;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
-struct Templates2 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates1<T2> Tail;
-};
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag.  The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
+// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
+GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(
+    UnitTest* unit_test, int skip_count);
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
-struct Templates3 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates2<T2, T3> Tail;
-};
+// Helpers for suppressing warnings on unreachable code or constant
+// condition.
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4>
-struct Templates4 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates3<T2, T3, T4> Tail;
-};
+// Always returns true.
+GTEST_API_ bool AlwaysTrue();
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
-struct Templates5 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates4<T2, T3, T4, T5> Tail;
-};
+// Always returns false.
+inline bool AlwaysFalse() { return !AlwaysTrue(); }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
-struct Templates6 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates5<T2, T3, T4, T5, T6> Tail;
+// Helper for suppressing false warning from Clang on a const char*
+// variable declared in a conditional expression always being NULL in
+// the else branch.
+struct GTEST_API_ ConstCharPtr {
+  ConstCharPtr(const char* str) : value(str) {}
+  operator bool() const { return true; }
+  const char* value;
 };
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7>
-struct Templates7 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates6<T2, T3, T4, T5, T6, T7> Tail;
+// Helper for declaring std::string within 'if' statement
+// in pre C++17 build environment.
+struct TrueWithString {
+  TrueWithString() = default;
+  explicit TrueWithString(const char* str) : value(str) {}
+  explicit TrueWithString(const std::string& str) : value(str) {}
+  explicit operator bool() const { return true; }
+  std::string value;
 };
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
-struct Templates8 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail;
-};
+// A simple Linear Congruential Generator for generating random
+// numbers with a uniform distribution.  Unlike rand() and srand(), it
+// doesn't use global state (and therefore can't interfere with user
+// code).  Unlike rand_r(), it's portable.  An LCG isn't very random,
+// but it's good enough for our purposes.
+class GTEST_API_ Random {
+ public:
+  static const uint32_t kMaxRange = 1u << 31;
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
-struct Templates9 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
-};
+  explicit Random(uint32_t seed) : state_(seed) {}
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10>
-struct Templates10 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
-};
+  void Reseed(uint32_t seed) { state_ = seed; }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
-struct Templates11 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
-};
+  // Generates a random number from [0, range).  Crashes if 'range' is
+  // 0 or greater than kMaxRange.
+  uint32_t Generate(uint32_t range);
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
-struct Templates12 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
+ private:
+  uint32_t state_;
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
 };
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13>
-struct Templates13 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
-};
+// Turns const U&, U&, const U, and U all into U.
+#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
+  typename std::remove_const<typename std::remove_reference<T>::type>::type
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
-struct Templates14 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14> Tail;
-};
+// HasDebugStringAndShortDebugString<T>::value is a compile-time bool constant
+// that's true if and only if T has methods DebugString() and ShortDebugString()
+// that return std::string.
+template <typename T>
+class HasDebugStringAndShortDebugString {
+ private:
+  template <typename C>
+  static auto CheckDebugString(C*) -> typename std::is_same<
+      std::string, decltype(std::declval<const C>().DebugString())>::type;
+  template <typename>
+  static std::false_type CheckDebugString(...);
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
-struct Templates15 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15> Tail;
-};
+  template <typename C>
+  static auto CheckShortDebugString(C*) -> typename std::is_same<
+      std::string, decltype(std::declval<const C>().ShortDebugString())>::type;
+  template <typename>
+  static std::false_type CheckShortDebugString(...);
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16>
-struct Templates16 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16> Tail;
-};
+  using HasDebugStringType = decltype(CheckDebugString<T>(nullptr));
+  using HasShortDebugStringType = decltype(CheckShortDebugString<T>(nullptr));
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
-struct Templates17 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17> Tail;
+ public:
+  static constexpr bool value =
+      HasDebugStringType::value && HasShortDebugStringType::value;
 };
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
-struct Templates18 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18> Tail;
-};
+template <typename T>
+constexpr bool HasDebugStringAndShortDebugString<T>::value;
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19>
-struct Templates19 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19> Tail;
-};
+// When the compiler sees expression IsContainerTest<C>(0), if C is an
+// STL-style container class, the first overload of IsContainerTest
+// will be viable (since both C::iterator* and C::const_iterator* are
+// valid types and NULL can be implicitly converted to them).  It will
+// be picked over the second overload as 'int' is a perfect match for
+// the type of argument 0.  If C::iterator or C::const_iterator is not
+// a valid type, the first overload is not viable, and the second
+// overload will be picked.  Therefore, we can determine whether C is
+// a container class by checking the type of IsContainerTest<C>(0).
+// The value of the expression is insignificant.
+//
+// In C++11 mode we check the existence of a const_iterator and that an
+// iterator is properly implemented for the container.
+//
+// For pre-C++11 that we look for both C::iterator and C::const_iterator.
+// The reason is that C++ injects the name of a class as a member of the
+// class itself (e.g. you can refer to class iterator as either
+// 'iterator' or 'iterator::iterator').  If we look for C::iterator
+// only, for example, we would mistakenly think that a class named
+// iterator is an STL container.
+//
+// Also note that the simpler approach of overloading
+// IsContainerTest(typename C::const_iterator*) and
+// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
+typedef int IsContainer;
+template <class C,
+          class Iterator = decltype(::std::declval<const C&>().begin()),
+          class = decltype(::std::declval<const C&>().end()),
+          class = decltype(++::std::declval<Iterator&>()),
+          class = decltype(*::std::declval<Iterator>()),
+          class = typename C::const_iterator>
+IsContainer IsContainerTest(int /* dummy */) {
+  return 0;
+}
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
-struct Templates20 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20> Tail;
-};
+typedef char IsNotContainer;
+template <class C>
+IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
-struct Templates21 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21> Tail;
-};
+// Trait to detect whether a type T is a hash table.
+// The heuristic used is that the type contains an inner type `hasher` and does
+// not contain an inner type `reverse_iterator`.
+// If the container is iterable in reverse, then order might actually matter.
+template <typename T>
+struct IsHashTable {
+ private:
+  template <typename U>
+  static char test(typename U::hasher*, typename U::reverse_iterator*);
+  template <typename U>
+  static int test(typename U::hasher*, ...);
+  template <typename U>
+  static char test(...);
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22>
-struct Templates22 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22> Tail;
+ public:
+  static const bool value = sizeof(test<T>(nullptr, nullptr)) == sizeof(int);
 };
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
-struct Templates23 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail;
-};
+template <typename T>
+const bool IsHashTable<T>::value;
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
-struct Templates24 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
-};
+template <typename C,
+          bool = sizeof(IsContainerTest<C>(0)) == sizeof(IsContainer)>
+struct IsRecursiveContainerImpl;
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25>
-struct Templates25 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
-};
+template <typename C>
+struct IsRecursiveContainerImpl<C, false> : public std::false_type {};
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
-struct Templates26 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
-};
+// Since the IsRecursiveContainerImpl depends on the IsContainerTest we need to
+// obey the same inconsistencies as the IsContainerTest, namely check if
+// something is a container is relying on only const_iterator in C++11 and
+// is relying on both const_iterator and iterator otherwise
+template <typename C>
+struct IsRecursiveContainerImpl<C, true> {
+  using value_type = decltype(*std::declval<typename C::const_iterator>());
+  using type =
+      std::is_same<typename std::remove_const<
+                       typename std::remove_reference<value_type>::type>::type,
+                   C>;
+};
+
+// IsRecursiveContainer<Type> is a unary compile-time predicate that
+// evaluates whether C is a recursive container type. A recursive container
+// type is a container type whose value_type is equal to the container type
+// itself. An example for a recursive container type is
+// boost::filesystem::path, whose iterator has a value_type that is equal to
+// boost::filesystem::path.
+template <typename C>
+struct IsRecursiveContainer : public IsRecursiveContainerImpl<C>::type {};
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
-struct Templates27 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
-};
+// Utilities for native arrays.
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28>
-struct Templates28 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28> Tail;
-};
+// ArrayEq() compares two k-dimensional native arrays using the
+// elements' operator==, where k can be any integer >= 0.  When k is
+// 0, ArrayEq() degenerates into comparing a single pair of values.
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
-struct Templates29 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29> Tail;
-};
+template <typename T, typename U>
+bool ArrayEq(const T* lhs, size_t size, const U* rhs);
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
-struct Templates30 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30> Tail;
-};
+// This generic version is used when k is 0.
+template <typename T, typename U>
+inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31>
-struct Templates31 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31> Tail;
-};
+// This overload is used when k >= 1.
+template <typename T, typename U, size_t N>
+inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) {
+  return internal::ArrayEq(lhs, N, rhs);
+}
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
-struct Templates32 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32> Tail;
-};
+// This helper reduces code bloat.  If we instead put its logic inside
+// the previous ArrayEq() function, arrays with different sizes would
+// lead to different copies of the template code.
+template <typename T, typename U>
+bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
+  for (size_t i = 0; i != size; i++) {
+    if (!internal::ArrayEq(lhs[i], rhs[i]))
+      return false;
+  }
+  return true;
+}
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
-struct Templates33 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33> Tail;
-};
+// Finds the first element in the iterator range [begin, end) that
+// equals elem.  Element may be a native array type itself.
+template <typename Iter, typename Element>
+Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
+  for (Iter it = begin; it != end; ++it) {
+    if (internal::ArrayEq(*it, elem))
+      return it;
+  }
+  return end;
+}
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34>
-struct Templates34 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34> Tail;
-};
+// CopyArray() copies a k-dimensional native array using the elements'
+// operator=, where k can be any integer >= 0.  When k is 0,
+// CopyArray() degenerates into copying a single value.
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
-struct Templates35 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35> Tail;
-};
+template <typename T, typename U>
+void CopyArray(const T* from, size_t size, U* to);
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
-struct Templates36 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36> Tail;
-};
+// This generic version is used when k is 0.
+template <typename T, typename U>
+inline void CopyArray(const T& from, U* to) { *to = from; }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37>
-struct Templates37 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail;
-};
+// This overload is used when k >= 1.
+template <typename T, typename U, size_t N>
+inline void CopyArray(const T(&from)[N], U(*to)[N]) {
+  internal::CopyArray(from, N, *to);
+}
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
-struct Templates38 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
-};
+// This helper reduces code bloat.  If we instead put its logic inside
+// the previous CopyArray() function, arrays with different sizes
+// would lead to different copies of the template code.
+template <typename T, typename U>
+void CopyArray(const T* from, size_t size, U* to) {
+  for (size_t i = 0; i != size; i++) {
+    internal::CopyArray(from[i], to + i);
+  }
+}
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
-struct Templates39 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
-};
+// The relation between an NativeArray object (see below) and the
+// native array it represents.
+// We use 2 different structs to allow non-copyable types to be used, as long
+// as RelationToSourceReference() is passed.
+struct RelationToSourceReference {};
+struct RelationToSourceCopy {};
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40>
-struct Templates40 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
-};
+// Adapts a native array to a read-only STL-style container.  Instead
+// of the complete STL container concept, this adaptor only implements
+// members useful for Google Mock's container matchers.  New members
+// should be added as needed.  To simplify the implementation, we only
+// support Element being a raw type (i.e. having no top-level const or
+// reference modifier).  It's the client's responsibility to satisfy
+// this requirement.  Element can be an array type itself (hence
+// multi-dimensional arrays are supported).
+template <typename Element>
+class NativeArray {
+ public:
+  // STL-style container typedefs.
+  typedef Element value_type;
+  typedef Element* iterator;
+  typedef const Element* const_iterator;
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
-struct Templates41 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
-};
+  // Constructs from a native array. References the source.
+  NativeArray(const Element* array, size_t count, RelationToSourceReference) {
+    InitRef(array, count);
+  }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
-struct Templates42 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42> Tail;
-};
+  // Constructs from a native array. Copies the source.
+  NativeArray(const Element* array, size_t count, RelationToSourceCopy) {
+    InitCopy(array, count);
+  }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43>
-struct Templates43 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43> Tail;
-};
+  // Copy constructor.
+  NativeArray(const NativeArray& rhs) {
+    (this->*rhs.clone_)(rhs.array_, rhs.size_);
+  }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
-struct Templates44 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44> Tail;
-};
+  ~NativeArray() {
+    if (clone_ != &NativeArray::InitRef)
+      delete[] array_;
+  }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
-struct Templates45 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45> Tail;
-};
+  // STL-style container methods.
+  size_t size() const { return size_; }
+  const_iterator begin() const { return array_; }
+  const_iterator end() const { return array_ + size_; }
+  bool operator==(const NativeArray& rhs) const {
+    return size() == rhs.size() &&
+        ArrayEq(begin(), size(), rhs.begin());
+  }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46>
-struct Templates46 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46> Tail;
-};
+ private:
+  static_assert(!std::is_const<Element>::value, "Type must not be const");
+  static_assert(!std::is_reference<Element>::value,
+                "Type must not be a reference");
+
+  // Initializes this object with a copy of the input.
+  void InitCopy(const Element* array, size_t a_size) {
+    Element* const copy = new Element[a_size];
+    CopyArray(array, a_size, copy);
+    array_ = copy;
+    size_ = a_size;
+    clone_ = &NativeArray::InitCopy;
+  }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
-struct Templates47 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46, T47> Tail;
-};
+  // Initializes this object with a reference of the input.
+  void InitRef(const Element* array, size_t a_size) {
+    array_ = array;
+    size_ = a_size;
+    clone_ = &NativeArray::InitRef;
+  }
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
-struct Templates48 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46, T47, T48> Tail;
+  const Element* array_;
+  size_t size_;
+  void (NativeArray::*clone_)(const Element*, size_t);
 };
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
-    GTEST_TEMPLATE_ T49>
-struct Templates49 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46, T47, T48, T49> Tail;
+// Backport of std::index_sequence.
+template <size_t... Is>
+struct IndexSequence {
+  using type = IndexSequence;
 };
 
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
-    GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50>
-struct Templates50 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46, T47, T48, T49, T50> Tail;
+// Double the IndexSequence, and one if plus_one is true.
+template <bool plus_one, typename T, size_t sizeofT>
+struct DoubleSequence;
+template <size_t... I, size_t sizeofT>
+struct DoubleSequence<true, IndexSequence<I...>, sizeofT> {
+  using type = IndexSequence<I..., (sizeofT + I)..., 2 * sizeofT>;
 };
-
-
-// We don't want to require the users to write TemplatesN<...> directly,
-// as that would require them to count the length.  Templates<...> is much
-// easier to write, but generates horrible messages when there is a
-// compiler error, as gcc insists on printing out each template
-// argument, even if it has the default value (this means Templates<list>
-// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
-// errors).
-//
-// Our solution is to combine the best part of the two approaches: a
-// user would write Templates<T1, ..., TN>, and Google Test will translate
-// that to TemplatesN<T1, ..., TN> internally to make error messages
-// readable.  The translation is done by the 'type' member of the
-// Templates template.
-template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT,
-    GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT,
-    GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT,
-    GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT,
-    GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT,
-    GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT,
-    GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT,
-    GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT,
-    GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT,
-    GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT,
-    GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT,
-    GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT,
-    GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT,
-    GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT,
-    GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT,
-    GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT,
-    GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT,
-    GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT,
-    GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT,
-    GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT,
-    GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT,
-    GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT,
-    GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT,
-    GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT,
-    GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT>
-struct Templates {
-  typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
+template <size_t... I, size_t sizeofT>
+struct DoubleSequence<false, IndexSequence<I...>, sizeofT> {
+  using type = IndexSequence<I..., (sizeofT + I)...>;
 };
 
+// Backport of std::make_index_sequence.
+// It uses O(ln(N)) instantiation depth.
+template <size_t N>
+struct MakeIndexSequenceImpl
+    : DoubleSequence<N % 2 == 1, typename MakeIndexSequenceImpl<N / 2>::type,
+                     N / 2>::type {};
+
 template <>
-struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT> {
-  typedef Templates0 type;
-};
-template <GTEST_TEMPLATE_ T1>
-struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT> {
-  typedef Templates1<T1> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
-struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT> {
-  typedef Templates2<T1, T2> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
-struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates3<T1, T2, T3> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4>
-struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates4<T1, T2, T3, T4> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
-struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates5<T1, T2, T3, T4, T5> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
-struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates6<T1, T2, T3, T4, T5, T6> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT> {
-  typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT> {
-  typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT> {
-  typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT> {
-  typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT> {
-  typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT> {
-  typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT> {
-  typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT> {
-  typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, T46, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, T46, T47, NoneT, NoneT, NoneT> {
-  typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46, T47> type;
+struct MakeIndexSequenceImpl<0> : IndexSequence<> {};
+
+template <size_t N>
+using MakeIndexSequence = typename MakeIndexSequenceImpl<N>::type;
+
+template <typename... T>
+using IndexSequenceFor = typename MakeIndexSequence<sizeof...(T)>::type;
+
+template <size_t>
+struct Ignore {
+  Ignore(...);  // NOLINT
 };
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, T46, T47, T48, NoneT, NoneT> {
-  typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46, T47, T48> type;
+
+template <typename>
+struct ElemFromListImpl;
+template <size_t... I>
+struct ElemFromListImpl<IndexSequence<I...>> {
+  // We make Ignore a template to solve a problem with MSVC.
+  // A non-template Ignore would work fine with `decltype(Ignore(I))...`, but
+  // MSVC doesn't understand how to deal with that pack expansion.
+  // Use `0 * I` to have a single instantiation of Ignore.
+  template <typename R>
+  static R Apply(Ignore<0 * I>..., R (*)(), ...);
 };
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
-    GTEST_TEMPLATE_ T49>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, T46, T47, T48, T49, NoneT> {
-  typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46, T47, T48, T49> type;
+
+template <size_t N, typename... T>
+struct ElemFromList {
+  using type =
+      decltype(ElemFromListImpl<typename MakeIndexSequence<N>::type>::Apply(
+          static_cast<T (*)()>(nullptr)...));
 };
 
-// The TypeList template makes it possible to use either a single type
-// or a Types<...> list in TYPED_TEST_CASE() and
-// INSTANTIATE_TYPED_TEST_CASE_P().
+struct FlatTupleConstructTag {};
 
-template <typename T>
-struct TypeList {
-  typedef Types1<T> type;
+template <typename... T>
+class FlatTuple;
+
+template <typename Derived, size_t I>
+struct FlatTupleElemBase;
+
+template <typename... T, size_t I>
+struct FlatTupleElemBase<FlatTuple<T...>, I> {
+  using value_type = typename ElemFromList<I, T...>::type;
+  FlatTupleElemBase() = default;
+  template <typename Arg>
+  explicit FlatTupleElemBase(FlatTupleConstructTag, Arg&& t)
+      : value(std::forward<Arg>(t)) {}
+  value_type value;
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49, typename T50>
-struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    T44, T45, T46, T47, T48, T49, T50> > {
-  typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type;
+template <typename Derived, typename Idx>
+struct FlatTupleBase;
+
+template <size_t... Idx, typename... T>
+struct FlatTupleBase<FlatTuple<T...>, IndexSequence<Idx...>>
+    : FlatTupleElemBase<FlatTuple<T...>, Idx>... {
+  using Indices = IndexSequence<Idx...>;
+  FlatTupleBase() = default;
+  template <typename... Args>
+  explicit FlatTupleBase(FlatTupleConstructTag, Args&&... args)
+      : FlatTupleElemBase<FlatTuple<T...>, Idx>(FlatTupleConstructTag{},
+                                                std::forward<Args>(args))... {}
+
+  template <size_t I>
+  const typename ElemFromList<I, T...>::type& Get() const {
+    return FlatTupleElemBase<FlatTuple<T...>, I>::value;
+  }
+
+  template <size_t I>
+  typename ElemFromList<I, T...>::type& Get() {
+    return FlatTupleElemBase<FlatTuple<T...>, I>::value;
+  }
+
+  template <typename F>
+  auto Apply(F&& f) -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
+    return std::forward<F>(f)(Get<Idx>()...);
+  }
+
+  template <typename F>
+  auto Apply(F&& f) const -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
+    return std::forward<F>(f)(Get<Idx>()...);
+  }
 };
 
-#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+// Analog to std::tuple but with different tradeoffs.
+// This class minimizes the template instantiation depth, thus allowing more
+// elements than std::tuple would. std::tuple has been seen to require an
+// instantiation depth of more than 10x the number of elements in some
+// implementations.
+// FlatTuple and ElemFromList are not recursive and have a fixed depth
+// regardless of T...
+// MakeIndexSequence, on the other hand, it is recursive but with an
+// instantiation depth of O(ln(N)).
+template <typename... T>
+class FlatTuple
+    : private FlatTupleBase<FlatTuple<T...>,
+                            typename MakeIndexSequence<sizeof...(T)>::type> {
+  using Indices = typename FlatTupleBase<
+      FlatTuple<T...>, typename MakeIndexSequence<sizeof...(T)>::type>::Indices;
 
-}  // namespace internal
-}  // namespace testing
+ public:
+  FlatTuple() = default;
+  template <typename... Args>
+  explicit FlatTuple(FlatTupleConstructTag tag, Args&&... args)
+      : FlatTuple::FlatTupleBase(tag, std::forward<Args>(args)...) {}
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+  using FlatTuple::FlatTupleBase::Apply;
+  using FlatTuple::FlatTupleBase::Get;
+};
 
-// Due to C++ preprocessor weirdness, we need double indirection to
-// concatenate two tokens when one of them is __LINE__.  Writing
-//
-//   foo ## __LINE__
-//
-// will result in the token foo__LINE__, instead of foo followed by
-// the current line number.  For more details, see
-// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
-#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
-#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar
+// Utility functions to be called with static_assert to induce deprecation
+// warnings.
+GTEST_INTERNAL_DEPRECATED(
+    "INSTANTIATE_TEST_CASE_P is deprecated, please use "
+    "INSTANTIATE_TEST_SUITE_P")
+constexpr bool InstantiateTestCase_P_IsDeprecated() { return true; }
 
-class ProtocolMessage;
-namespace proto2 { class Message; }
+GTEST_INTERNAL_DEPRECATED(
+    "TYPED_TEST_CASE_P is deprecated, please use "
+    "TYPED_TEST_SUITE_P")
+constexpr bool TypedTestCase_P_IsDeprecated() { return true; }
 
-namespace testing {
+GTEST_INTERNAL_DEPRECATED(
+    "TYPED_TEST_CASE is deprecated, please use "
+    "TYPED_TEST_SUITE")
+constexpr bool TypedTestCaseIsDeprecated() { return true; }
 
-// Forward declarations.
+GTEST_INTERNAL_DEPRECATED(
+    "REGISTER_TYPED_TEST_CASE_P is deprecated, please use "
+    "REGISTER_TYPED_TEST_SUITE_P")
+constexpr bool RegisterTypedTestCase_P_IsDeprecated() { return true; }
 
-class AssertionResult;                 // Result of an assertion.
-class Message;                         // Represents a failure message.
-class Test;                            // Represents a test.
-class TestInfo;                        // Information about a test.
-class TestPartResult;                  // Result of a test part.
-class UnitTest;                        // A collection of test cases.
+GTEST_INTERNAL_DEPRECATED(
+    "INSTANTIATE_TYPED_TEST_CASE_P is deprecated, please use "
+    "INSTANTIATE_TYPED_TEST_SUITE_P")
+constexpr bool InstantiateTypedTestCase_P_IsDeprecated() { return true; }
 
-template <typename T>
-::std::string PrintToString(const T& value);
+}  // namespace internal
+}  // namespace testing
 
-namespace internal {
+namespace std {
+// Some standard library implementations use `struct tuple_size` and some use
+// `class tuple_size`. Clang warns about the mismatch.
+// https://reviews.llvm.org/D55466
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmismatched-tags"
+#endif
+template <typename... Ts>
+struct tuple_size<testing::internal::FlatTuple<Ts...>>
+    : std::integral_constant<size_t, sizeof...(Ts)> {};
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+}  // namespace std
 
-struct TraceInfo;                      // Information about a trace point.
-class ScopedTrace;                     // Implements scoped trace.
-class TestInfoImpl;                    // Opaque implementation of TestInfo
-class UnitTestImpl;                    // Opaque implementation of UnitTest
+#define GTEST_MESSAGE_AT_(file, line, message, result_type) \
+  ::testing::internal::AssertHelper(result_type, file, line, message) \
+    = ::testing::Message()
 
-// How many times InitGoogleTest() has been called.
-GTEST_API_ extern int g_init_gtest_count;
+#define GTEST_MESSAGE_(message, result_type) \
+  GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)
 
-// The text used in failure messages to indicate the start of the
-// stack trace.
-GTEST_API_ extern const char kStackTraceMarker[];
+#define GTEST_FATAL_FAILURE_(message) \
+  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)
 
-// Two overloaded helpers for checking at compile time whether an
-// expression is a null pointer literal (i.e. NULL or any 0-valued
-// compile-time integral constant).  Their return values have
-// different sizes, so we can use sizeof() to test which version is
-// picked by the compiler.  These helpers have no implementations, as
-// we only need their signatures.
-//
-// Given IsNullLiteralHelper(x), the compiler will pick the first
-// version if x can be implicitly converted to Secret*, and pick the
-// second version otherwise.  Since Secret is a secret and incomplete
-// type, the only expression a user can write that has type Secret* is
-// a null pointer literal.  Therefore, we know that x is a null
-// pointer literal if and only if the first version is picked by the
-// compiler.
-char IsNullLiteralHelper(Secret* p);
-char (&IsNullLiteralHelper(...))[2];  // NOLINT
-
-// A compile-time bool constant that is true if and only if x is a
-// null pointer literal (i.e. NULL or any 0-valued compile-time
-// integral constant).
-#ifdef GTEST_ELLIPSIS_NEEDS_POD_
-// We lose support for NULL detection where the compiler doesn't like
-// passing non-POD classes through ellipsis (...).
-# define GTEST_IS_NULL_LITERAL_(x) false
-#else
-# define GTEST_IS_NULL_LITERAL_(x) \
-    (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1)
-#endif  // GTEST_ELLIPSIS_NEEDS_POD_
+#define GTEST_NONFATAL_FAILURE_(message) \
+  GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)
 
-// Appends the user-supplied message to the Google-Test-generated message.
-GTEST_API_ std::string AppendUserMessage(
-    const std::string& gtest_msg, const Message& user_msg);
+#define GTEST_SUCCESS_(message) \
+  GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)
+
+#define GTEST_SKIP_(message) \
+  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kSkip)
+
+// Suppress MSVC warning 4072 (unreachable code) for the code following
+// statement if it returns or throws (or doesn't return or throw in some
+// situations).
+// NOTE: The "else" is important to keep this expansion to prevent a top-level
+// "else" from attaching to our "if".
+#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
+  if (::testing::internal::AlwaysTrue()) {                        \
+    statement;                                                    \
+  } else                     /* NOLINT */                         \
+    static_assert(true, "")  // User must have a semicolon after expansion.
 
 #if GTEST_HAS_EXCEPTIONS
 
-// This exception is thrown by (and only by) a failed Google Test
-// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
-// are enabled).  We derive it from std::runtime_error, which is for
-// errors presumably detectable only at run time.  Since
-// std::runtime_error inherits from std::exception, many testing
-// frameworks know how to extract and print the message inside it.
-class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
+namespace testing {
+namespace internal {
+
+class NeverThrown {
  public:
-  explicit GoogleTestFailureException(const TestPartResult& failure);
+  const char* what() const noexcept {
+    return "this exception should never be thrown";
+  }
 };
 
-#endif  // GTEST_HAS_EXCEPTIONS
+}  // namespace internal
+}  // namespace testing
 
-// A helper class for creating scoped traces in user programs.
-class GTEST_API_ ScopedTrace {
- public:
-  // The c'tor pushes the given source file location and message onto
-  // a trace stack maintained by Google Test.
-  ScopedTrace(const char* file, int line, const Message& message);
+#if GTEST_HAS_RTTI
 
-  // The d'tor pops the info pushed by the c'tor.
-  //
-  // Note that the d'tor is not virtual in order to be efficient.
-  // Don't inherit from ScopedTrace!
-  ~ScopedTrace();
+#define GTEST_EXCEPTION_TYPE_(e) ::testing::internal::GetTypeName(typeid(e))
 
- private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
-} GTEST_ATTRIBUTE_UNUSED_;  // A ScopedTrace object does its job in its
-                            // c'tor and d'tor.  Therefore it doesn't
-                            // need to be used otherwise.
+#else  // GTEST_HAS_RTTI
 
-// Constructs and returns the message for an equality assertion
-// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
-//
-// The first four parameters are the expressions used in the assertion
-// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
-// where foo is 5 and bar is 6, we have:
-//
-//   expected_expression: "foo"
-//   actual_expression:   "bar"
-//   expected_value:      "5"
-//   actual_value:        "6"
-//
-// The ignoring_case parameter is true iff the assertion is a
-// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
-// be inserted into the message.
-GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
-                                     const char* actual_expression,
-                                     const std::string& expected_value,
-                                     const std::string& actual_value,
-                                     bool ignoring_case);
+#define GTEST_EXCEPTION_TYPE_(e) \
+  std::string { "an std::exception-derived error" }
 
-// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
-GTEST_API_ std::string GetBoolAssertionFailureMessage(
-    const AssertionResult& assertion_result,
-    const char* expression_text,
-    const char* actual_predicate_value,
-    const char* expected_predicate_value);
+#endif  // GTEST_HAS_RTTI
 
-// This template class represents an IEEE floating-point number
-// (either single-precision or double-precision, depending on the
-// template parameters).
-//
-// The purpose of this class is to do more sophisticated number
-// comparison.  (Due to round-off error, etc, it's very unlikely that
-// two floating-points will be equal exactly.  Hence a naive
-// comparison by the == operation often doesn't work.)
-//
-// Format of IEEE floating-point:
-//
-//   The most-significant bit being the leftmost, an IEEE
-//   floating-point looks like
-//
-//     sign_bit exponent_bits fraction_bits
-//
-//   Here, sign_bit is a single bit that designates the sign of the
-//   number.
-//
-//   For float, there are 8 exponent bits and 23 fraction bits.
-//
-//   For double, there are 11 exponent bits and 52 fraction bits.
-//
-//   More details can be found at
-//   http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
-//
-// Template parameter:
-//
-//   RawType: the raw floating-point type (either float or double)
-template <typename RawType>
-class FloatingPoint {
- public:
-  // Defines the unsigned integer type that has the same size as the
-  // floating point number.
-  typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;
+#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)   \
+  catch (typename std::conditional<                                            \
+         std::is_same<typename std::remove_cv<typename std::remove_reference<  \
+                          expected_exception>::type>::type,                    \
+                      std::exception>::value,                                  \
+         const ::testing::internal::NeverThrown&, const std::exception&>::type \
+             e) {                                                              \
+    gtest_msg.value = "Expected: " #statement                                  \
+                      " throws an exception of type " #expected_exception      \
+                      ".\n  Actual: it throws ";                               \
+    gtest_msg.value += GTEST_EXCEPTION_TYPE_(e);                               \
+    gtest_msg.value += " with description \"";                                 \
+    gtest_msg.value += e.what();                                               \
+    gtest_msg.value += "\".";                                                  \
+    goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);                \
+  }
 
-  // Constants.
+#else  // GTEST_HAS_EXCEPTIONS
 
-  // # of bits in a number.
-  static const size_t kBitCount = 8*sizeof(RawType);
+#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)
 
-  // # of fraction bits in a number.
-  static const size_t kFractionBitCount =
-    std::numeric_limits<RawType>::digits - 1;
+#endif  // GTEST_HAS_EXCEPTIONS
 
-  // # of exponent bits in a number.
-  static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
+#define GTEST_TEST_THROW_(statement, expected_exception, fail)              \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                             \
+  if (::testing::internal::TrueWithString gtest_msg{}) {                    \
+    bool gtest_caught_expected = false;                                     \
+    try {                                                                   \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);            \
+    } catch (expected_exception const&) {                                   \
+      gtest_caught_expected = true;                                         \
+    }                                                                       \
+    GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)    \
+    catch (...) {                                                           \
+      gtest_msg.value = "Expected: " #statement                             \
+                        " throws an exception of type " #expected_exception \
+                        ".\n  Actual: it throws a different type.";         \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);           \
+    }                                                                       \
+    if (!gtest_caught_expected) {                                           \
+      gtest_msg.value = "Expected: " #statement                             \
+                        " throws an exception of type " #expected_exception \
+                        ".\n  Actual: it throws nothing.";                  \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);           \
+    }                                                                       \
+  } else /*NOLINT*/                                                         \
+    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__)                   \
+        : fail(gtest_msg.value.c_str())
 
-  // The mask for the sign bit.
-  static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
+#if GTEST_HAS_EXCEPTIONS
 
-  // The mask for the fraction bits.
-  static const Bits kFractionBitMask =
-    ~static_cast<Bits>(0) >> (kExponentBitCount + 1);
+#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()                \
+  catch (std::exception const& e) {                               \
+    gtest_msg.value = "it throws ";                               \
+    gtest_msg.value += GTEST_EXCEPTION_TYPE_(e);                  \
+    gtest_msg.value += " with description \"";                    \
+    gtest_msg.value += e.what();                                  \
+    gtest_msg.value += "\".";                                     \
+    goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
+  }
 
-  // The mask for the exponent bits.
-  static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
+#else  // GTEST_HAS_EXCEPTIONS
 
-  // How many ULP's (Units in the Last Place) we want to tolerate when
-  // comparing two numbers.  The larger the value, the more error we
-  // allow.  A 0 value means that two numbers must be exactly the same
-  // to be considered equal.
-  //
-  // The maximum error of a single floating-point operation is 0.5
-  // units in the last place.  On Intel CPU's, all floating-point
-  // calculations are done with 80-bit precision, while double has 64
-  // bits.  Therefore, 4 should be enough for ordinary use.
-  //
-  // See the following article for more details on ULP:
-  // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
-  static const size_t kMaxUlps = 4;
+#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()
 
-  // Constructs a FloatingPoint from a raw floating-point number.
-  //
-  // On an Intel CPU, passing a non-normalized NAN (Not a Number)
-  // around may change its bits, although the new value is guaranteed
-  // to be also a NAN.  Therefore, don't expect this constructor to
-  // preserve the bits in x when x is a NAN.
-  explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
+#endif  // GTEST_HAS_EXCEPTIONS
 
-  // Static methods
+#define GTEST_TEST_NO_THROW_(statement, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::TrueWithString gtest_msg{}) { \
+    try { \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    } \
+    GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_() \
+    catch (...) { \
+      gtest_msg.value = "it throws."; \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
+      fail(("Expected: " #statement " doesn't throw an exception.\n" \
+            "  Actual: " + gtest_msg.value).c_str())
 
-  // Reinterprets a bit pattern as a floating-point number.
-  //
-  // This function is needed to test the AlmostEquals() method.
-  static RawType ReinterpretBits(const Bits bits) {
-    FloatingPoint fp(0);
-    fp.u_.bits_ = bits;
-    return fp.u_.value_;
-  }
+#define GTEST_TEST_ANY_THROW_(statement, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::AlwaysTrue()) { \
+    bool gtest_caught_any = false; \
+    try { \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    } \
+    catch (...) { \
+      gtest_caught_any = true; \
+    } \
+    if (!gtest_caught_any) { \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \
+      fail("Expected: " #statement " throws an exception.\n" \
+           "  Actual: it doesn't.")
 
-  // Returns the floating-point number that represent positive infinity.
-  static RawType Infinity() {
-    return ReinterpretBits(kExponentBitMask);
-  }
 
-  // Returns the maximum representable finite floating-point number.
-  static RawType Max();
+// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
+// either a boolean expression or an AssertionResult. text is a textual
+// representation of expression as it was passed into the EXPECT_TRUE.
+#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const ::testing::AssertionResult gtest_ar_ = \
+      ::testing::AssertionResult(expression)) \
+    ; \
+  else \
+    fail(::testing::internal::GetBoolAssertionFailureMessage(\
+        gtest_ar_, text, #actual, #expected).c_str())
 
-  // Non-static methods
+#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::AlwaysTrue()) { \
+    ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \
+      fail("Expected: " #statement " doesn't generate new fatal " \
+           "failures in the current thread.\n" \
+           "  Actual: it does.")
 
-  // Returns the bits that represents this number.
-  const Bits &bits() const { return u_.bits_; }
+// Expands to the name of the class that implements the given test.
+#define GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) \
+  test_suite_name##_##test_name##_Test
 
-  // Returns the exponent bits of this number.
-  Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
+// Helper macro for defining tests.
+#define GTEST_TEST_(test_suite_name, test_name, parent_class, parent_id)      \
+  static_assert(sizeof(GTEST_STRINGIFY_(test_suite_name)) > 1,                \
+                "test_suite_name must not be empty");                         \
+  static_assert(sizeof(GTEST_STRINGIFY_(test_name)) > 1,                      \
+                "test_name must not be empty");                               \
+  class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                    \
+      : public parent_class {                                                 \
+   public:                                                                    \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() = default;           \
+    ~GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() override = default; \
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name,   \
+                                                           test_name));       \
+    GTEST_DISALLOW_MOVE_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name,   \
+                                                           test_name));       \
+                                                                              \
+   private:                                                                   \
+    void TestBody() override;                                                 \
+    static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;     \
+  };                                                                          \
+                                                                              \
+  ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_suite_name,          \
+                                                    test_name)::test_info_ =  \
+      ::testing::internal::MakeAndRegisterTestInfo(                           \
+          #test_suite_name, #test_name, nullptr, nullptr,                     \
+          ::testing::internal::CodeLocation(__FILE__, __LINE__), (parent_id), \
+          ::testing::internal::SuiteApiResolver<                              \
+              parent_class>::GetSetUpCaseOrSuite(__FILE__, __LINE__),         \
+          ::testing::internal::SuiteApiResolver<                              \
+              parent_class>::GetTearDownCaseOrSuite(__FILE__, __LINE__),      \
+          new ::testing::internal::TestFactoryImpl<GTEST_TEST_CLASS_NAME_(    \
+              test_suite_name, test_name)>);                                  \
+  void GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::TestBody()
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-  // Returns the fraction bits of this number.
-  Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the public API for death tests.  It is
+// #included by gtest.h so a user doesn't need to include this
+// directly.
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-  // Returns the sign bit of this number.
-  Bits sign_bit() const { return kSignBitMask & u_.bits_; }
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
 
-  // Returns true iff this is NAN (not a number).
-  bool is_nan() const {
-    // It's a NAN if the exponent bits are all ones and the fraction
-    // bits are not entirely zeros.
-    return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
-  }
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines internal utilities needed for implementing
+// death tests.  They are subject to change without notice.
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-  // Returns true iff this number is at most kMaxUlps ULP's away from
-  // rhs.  In particular, this function:
-  //
-  //   - returns false if either number is (or both are) NAN.
-  //   - treats really large numbers as almost equal to infinity.
-  //   - thinks +0.0 and -0.0 are 0 DLP's apart.
-  bool AlmostEquals(const FloatingPoint& rhs) const {
-    // The IEEE standard says that any comparison operation involving
-    // a NAN must return false.
-    if (is_nan() || rhs.is_nan()) return false;
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
 
-    return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
-        <= kMaxUlps;
-  }
-
- private:
-  // The data type used to store the actual floating-point number.
-  union FloatingPointUnion {
-    RawType value_;  // The raw floating-point number.
-    Bits bits_;      // The bits that represent the number.
-  };
-
-  // Converts an integer from the sign-and-magnitude representation to
-  // the biased representation.  More precisely, let N be 2 to the
-  // power of (kBitCount - 1), an integer x is represented by the
-  // unsigned number x + N.
-  //
-  // For instance,
-  //
-  //   -N + 1 (the most negative number representable using
-  //          sign-and-magnitude) is represented by 1;
-  //   0      is represented by N; and
-  //   N - 1  (the biggest number representable using
-  //          sign-and-magnitude) is represented by 2N - 1.
-  //
-  // Read http://en.wikipedia.org/wiki/Signed_number_representations
-  // for more details on signed number representations.
-  static Bits SignAndMagnitudeToBiased(const Bits &sam) {
-    if (kSignBitMask & sam) {
-      // sam represents a negative number.
-      return ~sam + 1;
-    } else {
-      // sam represents a positive number.
-      return kSignBitMask | sam;
-    }
-  }
-
-  // Given two numbers in the sign-and-magnitude representation,
-  // returns the distance between them as an unsigned number.
-  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
-                                                     const Bits &sam2) {
-    const Bits biased1 = SignAndMagnitudeToBiased(sam1);
-    const Bits biased2 = SignAndMagnitudeToBiased(sam2);
-    return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
-  }
-
-  FloatingPointUnion u_;
-};
-
-// We cannot use std::numeric_limits<T>::max() as it clashes with the max()
-// macro defined by <windows.h>.
-template <>
-inline float FloatingPoint<float>::Max() { return FLT_MAX; }
-template <>
-inline double FloatingPoint<double>::Max() { return DBL_MAX; }
-
-// Typedefs the instances of the FloatingPoint template class that we
-// care to use.
-typedef FloatingPoint<float> Float;
-typedef FloatingPoint<double> Double;
-
-// In order to catch the mistake of putting tests that use different
-// test fixture classes in the same test case, we need to assign
-// unique IDs to fixture classes and compare them.  The TypeId type is
-// used to hold such IDs.  The user should treat TypeId as an opaque
-// type: the only operation allowed on TypeId values is to compare
-// them for equality using the == operator.
-typedef const void* TypeId;
-
-template <typename T>
-class TypeIdHelper {
- public:
-  // dummy_ must not have a const type.  Otherwise an overly eager
-  // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
-  // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
-  static bool dummy_;
-};
-
-template <typename T>
-bool TypeIdHelper<T>::dummy_ = false;
-
-// GetTypeId<T>() returns the ID of type T.  Different values will be
-// returned for different types.  Calling the function twice with the
-// same type argument is guaranteed to return the same ID.
-template <typename T>
-TypeId GetTypeId() {
-  // The compiler is required to allocate a different
-  // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
-  // the template.  Therefore, the address of dummy_ is guaranteed to
-  // be unique.
-  return &(TypeIdHelper<T>::dummy_);
-}
-
-// Returns the type ID of ::testing::Test.  Always call this instead
-// of GetTypeId< ::testing::Test>() to get the type ID of
-// ::testing::Test, as the latter may give the wrong result due to a
-// suspected linker bug when compiling Google Test as a Mac OS X
-// framework.
-GTEST_API_ TypeId GetTestTypeId();
-
-// Defines the abstract factory interface that creates instances
-// of a Test object.
-class TestFactoryBase {
- public:
-  virtual ~TestFactoryBase() {}
-
-  // Creates a test instance to run. The instance is both created and destroyed
-  // within TestInfoImpl::Run()
-  virtual Test* CreateTest() = 0;
-
- protected:
-  TestFactoryBase() {}
-
- private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
-};
-
-// This class provides implementation of TeastFactoryBase interface.
-// It is used in TEST and TEST_F macros.
-template <class TestClass>
-class TestFactoryImpl : public TestFactoryBase {
- public:
-  virtual Test* CreateTest() { return new TestClass; }
-};
-
-#if GTEST_OS_WINDOWS
-
-// Predicate-formatters for implementing the HRESULT checking macros
-// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
-// We pass a long instead of HRESULT to avoid causing an
-// include dependency for the HRESULT type.
-GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
-                                            long hr);  // NOLINT
-GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
-                                            long hr);  // NOLINT
-
-#endif  // GTEST_OS_WINDOWS
-
-// Types of SetUpTestCase() and TearDownTestCase() functions.
-typedef void (*SetUpTestCaseFunc)();
-typedef void (*TearDownTestCaseFunc)();
-
-// Creates a new TestInfo object and registers it with Google Test;
-// returns the created object.
-//
-// Arguments:
-//
-//   test_case_name:   name of the test case
-//   name:             name of the test
-//   type_param        the name of the test's type parameter, or NULL if
-//                     this is not a typed or a type-parameterized test.
-//   value_param       text representation of the test's value parameter,
-//                     or NULL if this is not a type-parameterized test.
-//   fixture_class_id: ID of the test fixture class
-//   set_up_tc:        pointer to the function that sets up the test case
-//   tear_down_tc:     pointer to the function that tears down the test case
-//   factory:          pointer to the factory that creates a test object.
-//                     The newly created TestInfo instance will assume
-//                     ownership of the factory object.
-GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
-    const char* test_case_name,
-    const char* name,
-    const char* type_param,
-    const char* value_param,
-    TypeId fixture_class_id,
-    SetUpTestCaseFunc set_up_tc,
-    TearDownTestCaseFunc tear_down_tc,
-    TestFactoryBase* factory);
-
-// If *pstr starts with the given prefix, modifies *pstr to be right
-// past the prefix and returns true; otherwise leaves *pstr unchanged
-// and returns false.  None of pstr, *pstr, and prefix can be NULL.
-GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);
-
-#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
-// State of the definition of a type-parameterized test case.
-class GTEST_API_ TypedTestCasePState {
- public:
-  TypedTestCasePState() : registered_(false) {}
-
-  // Adds the given test name to defined_test_names_ and return true
-  // if the test case hasn't been registered; otherwise aborts the
-  // program.
-  bool AddTestName(const char* file, int line, const char* case_name,
-                   const char* test_name) {
-    if (registered_) {
-      fprintf(stderr, "%s Test %s must be defined before "
-              "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n",
-              FormatFileLocation(file, line).c_str(), test_name, case_name);
-      fflush(stderr);
-      posix::Abort();
-    }
-    defined_test_names_.insert(test_name);
-    return true;
-  }
-
-  // Verifies that registered_tests match the test names in
-  // defined_test_names_; returns registered_tests if successful, or
-  // aborts the program otherwise.
-  const char* VerifyRegisteredTestNames(
-      const char* file, int line, const char* registered_tests);
-
- private:
-  bool registered_;
-  ::std::set<const char*> defined_test_names_;
-};
-
-// Skips to the first non-space char after the first comma in 'str';
-// returns NULL if no comma is found in 'str'.
-inline const char* SkipComma(const char* str) {
-  const char* comma = strchr(str, ',');
-  if (comma == NULL) {
-    return NULL;
-  }
-  while (IsSpace(*(++comma))) {}
-  return comma;
-}
-
-// Returns the prefix of 'str' before the first comma in it; returns
-// the entire string if it contains no comma.
-inline std::string GetPrefixUntilComma(const char* str) {
-  const char* comma = strchr(str, ',');
-  return comma == NULL ? str : std::string(str, comma);
-}
-
-// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
-// registers a list of type-parameterized tests with Google Test.  The
-// return value is insignificant - we just need to return something
-// such that we can call this function in a namespace scope.
-//
-// Implementation note: The GTEST_TEMPLATE_ macro declares a template
-// template parameter.  It's defined in gtest-type-util.h.
-template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
-class TypeParameterizedTest {
- public:
-  // 'index' is the index of the test in the type list 'Types'
-  // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase,
-  // Types).  Valid values for 'index' are [0, N - 1] where N is the
-  // length of Types.
-  static bool Register(const char* prefix, const char* case_name,
-                       const char* test_names, int index) {
-    typedef typename Types::Head Type;
-    typedef Fixture<Type> FixtureClass;
-    typedef typename GTEST_BIND_(TestSel, Type) TestClass;
-
-    // First, registers the first type-parameterized test in the type
-    // list.
-    MakeAndRegisterTestInfo(
-        (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/"
-         + StreamableToString(index)).c_str(),
-        GetPrefixUntilComma(test_names).c_str(),
-        GetTypeName<Type>().c_str(),
-        NULL,  // No value parameter.
-        GetTypeId<FixtureClass>(),
-        TestClass::SetUpTestCase,
-        TestClass::TearDownTestCase,
-        new TestFactoryImpl<TestClass>);
-
-    // Next, recurses (at compile time) with the tail of the type list.
-    return TypeParameterizedTest<Fixture, TestSel, typename Types::Tail>
-        ::Register(prefix, case_name, test_names, index + 1);
-  }
-};
-
-// The base case for the compile time recursion.
-template <GTEST_TEMPLATE_ Fixture, class TestSel>
-class TypeParameterizedTest<Fixture, TestSel, Types0> {
- public:
-  static bool Register(const char* /*prefix*/, const char* /*case_name*/,
-                       const char* /*test_names*/, int /*index*/) {
-    return true;
-  }
-};
-
-// TypeParameterizedTestCase<Fixture, Tests, Types>::Register()
-// registers *all combinations* of 'Tests' and 'Types' with Google
-// Test.  The return value is insignificant - we just need to return
-// something such that we can call this function in a namespace scope.
-template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
-class TypeParameterizedTestCase {
- public:
-  static bool Register(const char* prefix, const char* case_name,
-                       const char* test_names) {
-    typedef typename Tests::Head Head;
-
-    // First, register the first test in 'Test' for each type in 'Types'.
-    TypeParameterizedTest<Fixture, Head, Types>::Register(
-        prefix, case_name, test_names, 0);
-
-    // Next, recurses (at compile time) with the tail of the test list.
-    return TypeParameterizedTestCase<Fixture, typename Tests::Tail, Types>
-        ::Register(prefix, case_name, SkipComma(test_names));
-  }
-};
-
-// The base case for the compile time recursion.
-template <GTEST_TEMPLATE_ Fixture, typename Types>
-class TypeParameterizedTestCase<Fixture, Templates0, Types> {
- public:
-  static bool Register(const char* /*prefix*/, const char* /*case_name*/,
-                       const char* /*test_names*/) {
-    return true;
-  }
-};
-
-#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
-// Returns the current OS stack trace as an std::string.
-//
-// The maximum number of stack frames to be included is specified by
-// the gtest_stack_trace_depth flag.  The skip_count parameter
-// specifies the number of top frames to be skipped, which doesn't
-// count against the number of frames to be included.
-//
-// For example, if Foo() calls Bar(), which in turn calls
-// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
-// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
-GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(
-    UnitTest* unit_test, int skip_count);
-
-// Helpers for suppressing warnings on unreachable code or constant
-// condition.
-
-// Always returns true.
-GTEST_API_ bool AlwaysTrue();
-
-// Always returns false.
-inline bool AlwaysFalse() { return !AlwaysTrue(); }
-
-// Helper for suppressing false warning from Clang on a const char*
-// variable declared in a conditional expression always being NULL in
-// the else branch.
-struct GTEST_API_ ConstCharPtr {
-  ConstCharPtr(const char* str) : value(str) {}
-  operator bool() const { return true; }
-  const char* value;
-};
-
-// A simple Linear Congruential Generator for generating random
-// numbers with a uniform distribution.  Unlike rand() and srand(), it
-// doesn't use global state (and therefore can't interfere with user
-// code).  Unlike rand_r(), it's portable.  An LCG isn't very random,
-// but it's good enough for our purposes.
-class GTEST_API_ Random {
- public:
-  static const UInt32 kMaxRange = 1u << 31;
-
-  explicit Random(UInt32 seed) : state_(seed) {}
-
-  void Reseed(UInt32 seed) { state_ = seed; }
-
-  // Generates a random number from [0, range).  Crashes if 'range' is
-  // 0 or greater than kMaxRange.
-  UInt32 Generate(UInt32 range);
-
- private:
-  UInt32 state_;
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
-};
-
-// Defining a variable of type CompileAssertTypesEqual<T1, T2> will cause a
-// compiler error iff T1 and T2 are different types.
-template <typename T1, typename T2>
-struct CompileAssertTypesEqual;
-
-template <typename T>
-struct CompileAssertTypesEqual<T, T> {
-};
-
-// Removes the reference from a type if it is a reference type,
-// otherwise leaves it unchanged.  This is the same as
-// tr1::remove_reference, which is not widely available yet.
-template <typename T>
-struct RemoveReference { typedef T type; };  // NOLINT
-template <typename T>
-struct RemoveReference<T&> { typedef T type; };  // NOLINT
-
-// A handy wrapper around RemoveReference that works when the argument
-// T depends on template parameters.
-#define GTEST_REMOVE_REFERENCE_(T) \
-    typename ::testing::internal::RemoveReference<T>::type
-
-// Removes const from a type if it is a const type, otherwise leaves
-// it unchanged.  This is the same as tr1::remove_const, which is not
-// widely available yet.
-template <typename T>
-struct RemoveConst { typedef T type; };  // NOLINT
-template <typename T>
-struct RemoveConst<const T> { typedef T type; };  // NOLINT
-
-// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above
-// definition to fail to remove the const in 'const int[3]' and 'const
-// char[3][4]'.  The following specialization works around the bug.
-template <typename T, size_t N>
-struct RemoveConst<const T[N]> {
-  typedef typename RemoveConst<T>::type type[N];
-};
-
-#if defined(_MSC_VER) && _MSC_VER < 1400
-// This is the only specialization that allows VC++ 7.1 to remove const in
-// 'const int[3] and 'const int[3][4]'.  However, it causes trouble with GCC
-// and thus needs to be conditionally compiled.
-template <typename T, size_t N>
-struct RemoveConst<T[N]> {
-  typedef typename RemoveConst<T>::type type[N];
-};
-#endif
-
-// A handy wrapper around RemoveConst that works when the argument
-// T depends on template parameters.
-#define GTEST_REMOVE_CONST_(T) \
-    typename ::testing::internal::RemoveConst<T>::type
-
-// Turns const U&, U&, const U, and U all into U.
-#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
-    GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T))
-
-// Adds reference to a type if it is not a reference type,
-// otherwise leaves it unchanged.  This is the same as
-// tr1::add_reference, which is not widely available yet.
-template <typename T>
-struct AddReference { typedef T& type; };  // NOLINT
-template <typename T>
-struct AddReference<T&> { typedef T& type; };  // NOLINT
-
-// A handy wrapper around AddReference that works when the argument T
-// depends on template parameters.
-#define GTEST_ADD_REFERENCE_(T) \
-    typename ::testing::internal::AddReference<T>::type
-
-// Adds a reference to const on top of T as necessary.  For example,
-// it transforms
-//
-//   char         ==> const char&
-//   const char   ==> const char&
-//   char&        ==> const char&
-//   const char&  ==> const char&
-//
-// The argument T must depend on some template parameters.
-#define GTEST_REFERENCE_TO_CONST_(T) \
-    GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T))
-
-// ImplicitlyConvertible<From, To>::value is a compile-time bool
-// constant that's true iff type From can be implicitly converted to
-// type To.
-template <typename From, typename To>
-class ImplicitlyConvertible {
- private:
-  // We need the following helper functions only for their types.
-  // They have no implementations.
-
-  // MakeFrom() is an expression whose type is From.  We cannot simply
-  // use From(), as the type From may not have a public default
-  // constructor.
-  static From MakeFrom();
-
-  // These two functions are overloaded.  Given an expression
-  // Helper(x), the compiler will pick the first version if x can be
-  // implicitly converted to type To; otherwise it will pick the
-  // second version.
-  //
-  // The first version returns a value of size 1, and the second
-  // version returns a value of size 2.  Therefore, by checking the
-  // size of Helper(x), which can be done at compile time, we can tell
-  // which version of Helper() is used, and hence whether x can be
-  // implicitly converted to type To.
-  static char Helper(To);
-  static char (&Helper(...))[2];  // NOLINT
-
-  // We have to put the 'public' section after the 'private' section,
-  // or MSVC refuses to compile the code.
- public:
-  // MSVC warns about implicitly converting from double to int for
-  // possible loss of data, so we need to temporarily disable the
-  // warning.
-#ifdef _MSC_VER
-# pragma warning(push)          // Saves the current warning state.
-# pragma warning(disable:4244)  // Temporarily disables warning 4244.
-
-  static const bool value =
-      sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
-# pragma warning(pop)           // Restores the warning state.
-#elif defined(__BORLANDC__)
-  // C++Builder cannot use member overload resolution during template
-  // instantiation.  The simplest workaround is to use its C++0x type traits
-  // functions (C++Builder 2009 and above only).
-  static const bool value = __is_convertible(From, To);
-#else
-  static const bool value =
-      sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
-#endif  // _MSV_VER
-};
-template <typename From, typename To>
-const bool ImplicitlyConvertible<From, To>::value;
-
-// IsAProtocolMessage<T>::value is a compile-time bool constant that's
-// true iff T is type ProtocolMessage, proto2::Message, or a subclass
-// of those.
-template <typename T>
-struct IsAProtocolMessage
-    : public bool_constant<
-  ImplicitlyConvertible<const T*, const ::ProtocolMessage*>::value ||
-  ImplicitlyConvertible<const T*, const ::proto2::Message*>::value> {
-};
-
-// When the compiler sees expression IsContainerTest<C>(0), if C is an
-// STL-style container class, the first overload of IsContainerTest
-// will be viable (since both C::iterator* and C::const_iterator* are
-// valid types and NULL can be implicitly converted to them).  It will
-// be picked over the second overload as 'int' is a perfect match for
-// the type of argument 0.  If C::iterator or C::const_iterator is not
-// a valid type, the first overload is not viable, and the second
-// overload will be picked.  Therefore, we can determine whether C is
-// a container class by checking the type of IsContainerTest<C>(0).
-// The value of the expression is insignificant.
-//
-// Note that we look for both C::iterator and C::const_iterator.  The
-// reason is that C++ injects the name of a class as a member of the
-// class itself (e.g. you can refer to class iterator as either
-// 'iterator' or 'iterator::iterator').  If we look for C::iterator
-// only, for example, we would mistakenly think that a class named
-// iterator is an STL container.
-//
-// Also note that the simpler approach of overloading
-// IsContainerTest(typename C::const_iterator*) and
-// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
-typedef int IsContainer;
-template <class C>
-IsContainer IsContainerTest(int /* dummy */,
-                            typename C::iterator* /* it */ = NULL,
-                            typename C::const_iterator* /* const_it */ = NULL) {
-  return 0;
-}
-
-typedef char IsNotContainer;
-template <class C>
-IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; }
-
-// EnableIf<condition>::type is void when 'Cond' is true, and
-// undefined when 'Cond' is false.  To use SFINAE to make a function
-// overload only apply when a particular expression is true, add
-// "typename EnableIf<expression>::type* = 0" as the last parameter.
-template<bool> struct EnableIf;
-template<> struct EnableIf<true> { typedef void type; };  // NOLINT
-
-// Utilities for native arrays.
-
-// ArrayEq() compares two k-dimensional native arrays using the
-// elements' operator==, where k can be any integer >= 0.  When k is
-// 0, ArrayEq() degenerates into comparing a single pair of values.
-
-template <typename T, typename U>
-bool ArrayEq(const T* lhs, size_t size, const U* rhs);
-
-// This generic version is used when k is 0.
-template <typename T, typename U>
-inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; }
-
-// This overload is used when k >= 1.
-template <typename T, typename U, size_t N>
-inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) {
-  return internal::ArrayEq(lhs, N, rhs);
-}
-
-// This helper reduces code bloat.  If we instead put its logic inside
-// the previous ArrayEq() function, arrays with different sizes would
-// lead to different copies of the template code.
-template <typename T, typename U>
-bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
-  for (size_t i = 0; i != size; i++) {
-    if (!internal::ArrayEq(lhs[i], rhs[i]))
-      return false;
-  }
-  return true;
-}
-
-// Finds the first element in the iterator range [begin, end) that
-// equals elem.  Element may be a native array type itself.
-template <typename Iter, typename Element>
-Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
-  for (Iter it = begin; it != end; ++it) {
-    if (internal::ArrayEq(*it, elem))
-      return it;
-  }
-  return end;
-}
-
-// CopyArray() copies a k-dimensional native array using the elements'
-// operator=, where k can be any integer >= 0.  When k is 0,
-// CopyArray() degenerates into copying a single value.
-
-template <typename T, typename U>
-void CopyArray(const T* from, size_t size, U* to);
-
-// This generic version is used when k is 0.
-template <typename T, typename U>
-inline void CopyArray(const T& from, U* to) { *to = from; }
-
-// This overload is used when k >= 1.
-template <typename T, typename U, size_t N>
-inline void CopyArray(const T(&from)[N], U(*to)[N]) {
-  internal::CopyArray(from, N, *to);
-}
-
-// This helper reduces code bloat.  If we instead put its logic inside
-// the previous CopyArray() function, arrays with different sizes
-// would lead to different copies of the template code.
-template <typename T, typename U>
-void CopyArray(const T* from, size_t size, U* to) {
-  for (size_t i = 0; i != size; i++) {
-    internal::CopyArray(from[i], to + i);
-  }
-}
-
-// The relation between an NativeArray object (see below) and the
-// native array it represents.
-enum RelationToSource {
-  kReference,  // The NativeArray references the native array.
-  kCopy        // The NativeArray makes a copy of the native array and
-               // owns the copy.
-};
-
-// Adapts a native array to a read-only STL-style container.  Instead
-// of the complete STL container concept, this adaptor only implements
-// members useful for Google Mock's container matchers.  New members
-// should be added as needed.  To simplify the implementation, we only
-// support Element being a raw type (i.e. having no top-level const or
-// reference modifier).  It's the client's responsibility to satisfy
-// this requirement.  Element can be an array type itself (hence
-// multi-dimensional arrays are supported).
-template <typename Element>
-class NativeArray {
- public:
-  // STL-style container typedefs.
-  typedef Element value_type;
-  typedef Element* iterator;
-  typedef const Element* const_iterator;
-
-  // Constructs from a native array.
-  NativeArray(const Element* array, size_t count, RelationToSource relation) {
-    Init(array, count, relation);
-  }
-
-  // Copy constructor.
-  NativeArray(const NativeArray& rhs) {
-    Init(rhs.array_, rhs.size_, rhs.relation_to_source_);
-  }
-
-  ~NativeArray() {
-    // Ensures that the user doesn't instantiate NativeArray with a
-    // const or reference type.
-    static_cast<void>(StaticAssertTypeEqHelper<Element,
-        GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>());
-    if (relation_to_source_ == kCopy)
-      delete[] array_;
-  }
-
-  // STL-style container methods.
-  size_t size() const { return size_; }
-  const_iterator begin() const { return array_; }
-  const_iterator end() const { return array_ + size_; }
-  bool operator==(const NativeArray& rhs) const {
-    return size() == rhs.size() &&
-        ArrayEq(begin(), size(), rhs.begin());
-  }
-
- private:
-  // Initializes this object; makes a copy of the input array if
-  // 'relation' is kCopy.
-  void Init(const Element* array, size_t a_size, RelationToSource relation) {
-    if (relation == kReference) {
-      array_ = array;
-    } else {
-      Element* const copy = new Element[a_size];
-      CopyArray(array, a_size, copy);
-      array_ = copy;
-    }
-    size_ = a_size;
-    relation_to_source_ = relation;
-  }
-
-  const Element* array_;
-  size_t size_;
-  RelationToSource relation_to_source_;
-
-  GTEST_DISALLOW_ASSIGN_(NativeArray);
-};
-
-}  // namespace internal
-}  // namespace testing
-
-#define GTEST_MESSAGE_AT_(file, line, message, result_type) \
-  ::testing::internal::AssertHelper(result_type, file, line, message) \
-    = ::testing::Message()
-
-#define GTEST_MESSAGE_(message, result_type) \
-  GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)
-
-#define GTEST_FATAL_FAILURE_(message) \
-  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)
-
-#define GTEST_NONFATAL_FAILURE_(message) \
-  GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)
-
-#define GTEST_SUCCESS_(message) \
-  GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)
-
-// Suppresses MSVC warnings 4072 (unreachable code) for the code following
-// statement if it returns or throws (or doesn't return or throw in some
-// situations).
-#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
-  if (::testing::internal::AlwaysTrue()) { statement; }
-
-#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (::testing::internal::ConstCharPtr gtest_msg = "") { \
-    bool gtest_caught_expected = false; \
-    try { \
-      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
-    } \
-    catch (expected_exception const&) { \
-      gtest_caught_expected = true; \
-    } \
-    catch (...) { \
-      gtest_msg.value = \
-          "Expected: " #statement " throws an exception of type " \
-          #expected_exception ".\n  Actual: it throws a different type."; \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
-    } \
-    if (!gtest_caught_expected) { \
-      gtest_msg.value = \
-          "Expected: " #statement " throws an exception of type " \
-          #expected_exception ".\n  Actual: it throws nothing."; \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
-    } \
-  } else \
-    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \
-      fail(gtest_msg.value)
-
-#define GTEST_TEST_NO_THROW_(statement, fail) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (::testing::internal::AlwaysTrue()) { \
-    try { \
-      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
-    } \
-    catch (...) { \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
-    } \
-  } else \
-    GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
-      fail("Expected: " #statement " doesn't throw an exception.\n" \
-           "  Actual: it throws.")
-
-#define GTEST_TEST_ANY_THROW_(statement, fail) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (::testing::internal::AlwaysTrue()) { \
-    bool gtest_caught_any = false; \
-    try { \
-      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
-    } \
-    catch (...) { \
-      gtest_caught_any = true; \
-    } \
-    if (!gtest_caught_any) { \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
-    } \
-  } else \
-    GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \
-      fail("Expected: " #statement " throws an exception.\n" \
-           "  Actual: it doesn't.")
-
-
-// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
-// either a boolean expression or an AssertionResult. text is a textual
-// represenation of expression as it was passed into the EXPECT_TRUE.
-#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (const ::testing::AssertionResult gtest_ar_ = \
-      ::testing::AssertionResult(expression)) \
-    ; \
-  else \
-    fail(::testing::internal::GetBoolAssertionFailureMessage(\
-        gtest_ar_, text, #actual, #expected).c_str())
-
-#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (::testing::internal::AlwaysTrue()) { \
-    ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
-    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
-    if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
-    } \
-  } else \
-    GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \
-      fail("Expected: " #statement " doesn't generate new fatal " \
-           "failures in the current thread.\n" \
-           "  Actual: it does.")
-
-// Expands to the name of the class that implements the given test.
-#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
-  test_case_name##_##test_name##_Test
-
-// Helper macro for defining tests.
-#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\
-class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
- public:\
-  GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
- private:\
-  virtual void TestBody();\
-  static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(\
-      GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
-};\
-\
-::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
-  ::test_info_ =\
-    ::testing::internal::MakeAndRegisterTestInfo(\
-        #test_case_name, #test_name, NULL, NULL, \
-        (parent_id), \
-        parent_class::SetUpTestCase, \
-        parent_class::TearDownTestCase, \
-        new ::testing::internal::TestFactoryImpl<\
-            GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
-void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
-
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
-// Copyright 2005, Google Inc.
+// Copyright 2007, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -8148,19 +5042,22 @@ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
 //
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file defines the public API for death tests.  It is
-// #included by gtest.h so a user doesn't need to include this
-// directly.
+// This file implements just enough of the matcher interface to allow
+// EXPECT_DEATH and friends to accept a matcher argument.
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
 
-// Copyright 2005, Google Inc.
+#include <atomic>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <type_traits>
+
+// Copyright 2007, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -8188,1014 +5085,1003 @@ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Google Test - The Google C++ Testing and Mocking Framework
 //
-// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+// This file implements a universal value printer that can print a
+// value of any type T:
 //
-// The Google C++ Testing Framework (Google Test)
+//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
 //
-// This header file defines internal utilities needed for implementing
-// death tests.  They are subject to change without notice.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
-
-
-#include <stdio.h>
-
-namespace testing {
-namespace internal {
-
-GTEST_DECLARE_string_(internal_run_death_test);
-
-// Names of the flags (needed for parsing Google Test flags).
-const char kDeathTestStyleFlag[] = "death_test_style";
-const char kDeathTestUseFork[] = "death_test_use_fork";
-const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
-
-#if GTEST_HAS_DEATH_TEST
-
-// DeathTest is a class that hides much of the complexity of the
-// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
-// returns a concrete class that depends on the prevailing death test
-// style, as defined by the --gtest_death_test_style and/or
-// --gtest_internal_run_death_test flags.
-
-// In describing the results of death tests, these terms are used with
-// the corresponding definitions:
+// A user can teach this function how to print a class type T by
+// defining either operator<<() or PrintTo() in the namespace that
+// defines T.  More specifically, the FIRST defined function in the
+// following list will be used (assuming T is defined in namespace
+// foo):
 //
-// exit status:  The integer exit information in the format specified
-//               by wait(2)
-// exit code:    The integer code passed to exit(3), _exit(2), or
-//               returned from main()
-class GTEST_API_ DeathTest {
- public:
-  // Create returns false if there was an error determining the
-  // appropriate action to take for the current death test; for example,
-  // if the gtest_death_test_style flag is set to an invalid value.
-  // The LastMessage method will return a more detailed message in that
-  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
-  // argument is set.  If the death test should be skipped, the pointer
-  // is set to NULL; otherwise, it is set to the address of a new concrete
-  // DeathTest object that controls the execution of the current test.
-  static bool Create(const char* statement, const RE* regex,
-                     const char* file, int line, DeathTest** test);
-  DeathTest();
-  virtual ~DeathTest() { }
-
-  // A helper class that aborts a death test when it's deleted.
-  class ReturnSentinel {
-   public:
-    explicit ReturnSentinel(DeathTest* test) : test_(test) { }
-    ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
-   private:
-    DeathTest* const test_;
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
-  } GTEST_ATTRIBUTE_UNUSED_;
-
-  // An enumeration of possible roles that may be taken when a death
-  // test is encountered.  EXECUTE means that the death test logic should
-  // be executed immediately.  OVERSEE means that the program should prepare
-  // the appropriate environment for a child process to execute the death
-  // test, then wait for it to complete.
-  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
-
-  // An enumeration of the three reasons that a test might be aborted.
-  enum AbortReason {
-    TEST_ENCOUNTERED_RETURN_STATEMENT,
-    TEST_THREW_EXCEPTION,
-    TEST_DID_NOT_DIE
-  };
-
-  // Assumes one of the above roles.
-  virtual TestRole AssumeRole() = 0;
-
-  // Waits for the death test to finish and returns its status.
-  virtual int Wait() = 0;
-
-  // Returns true if the death test passed; that is, the test process
-  // exited during the test, its exit status matches a user-supplied
-  // predicate, and its stderr output matches a user-supplied regular
-  // expression.
-  // The user-supplied predicate may be a macro expression rather
-  // than a function pointer or functor, or else Wait and Passed could
-  // be combined.
-  virtual bool Passed(bool exit_status_ok) = 0;
-
-  // Signals that the death test did not die as expected.
-  virtual void Abort(AbortReason reason) = 0;
-
-  // Returns a human-readable outcome message regarding the outcome of
-  // the last death test.
-  static const char* LastMessage();
-
-  static void set_last_death_test_message(const std::string& message);
-
- private:
-  // A string containing a description of the outcome of the last death test.
-  static std::string last_death_test_message_;
-
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
-};
-
-// Factory interface for death tests.  May be mocked out for testing.
-class DeathTestFactory {
- public:
-  virtual ~DeathTestFactory() { }
-  virtual bool Create(const char* statement, const RE* regex,
-                      const char* file, int line, DeathTest** test) = 0;
-};
-
-// A concrete DeathTestFactory implementation for normal use.
-class DefaultDeathTestFactory : public DeathTestFactory {
- public:
-  virtual bool Create(const char* statement, const RE* regex,
-                      const char* file, int line, DeathTest** test);
-};
+//   1. foo::PrintTo(const T&, ostream*)
+//   2. operator<<(ostream&, const T&) defined in either foo or the
+//      global namespace.
+//
+// However if T is an STL-style container then it is printed element-wise
+// unless foo::PrintTo(const T&, ostream*) is defined. Note that
+// operator<<() is ignored for container types.
+//
+// If none of the above is defined, it will print the debug string of
+// the value if it is a protocol buffer, or print the raw bytes in the
+// value otherwise.
+//
+// To aid debugging: when T is a reference type, the address of the
+// value is also printed; when T is a (const) char pointer, both the
+// pointer value and the NUL-terminated string it points to are
+// printed.
+//
+// We also provide some convenient wrappers:
+//
+//   // Prints a value to a string.  For a (const or not) char
+//   // pointer, the NUL-terminated string (but not the pointer) is
+//   // printed.
+//   std::string ::testing::PrintToString(const T& value);
+//
+//   // Prints a value tersely: for a reference type, the referenced
+//   // value (but not the address) is printed; for a (const or not) char
+//   // pointer, the NUL-terminated string (but not the pointer) is
+//   // printed.
+//   void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
+//
+//   // Prints value using the type inferred by the compiler.  The difference
+//   // from UniversalTersePrint() is that this function prints both the
+//   // pointer and the NUL-terminated string for a (const or not) char pointer.
+//   void ::testing::internal::UniversalPrint(const T& value, ostream*);
+//
+//   // Prints the fields of a tuple tersely to a string vector, one
+//   // element for each field. Tuple support must be enabled in
+//   // gtest-port.h.
+//   std::vector<string> UniversalTersePrintTupleFieldsToStrings(
+//       const Tuple& value);
+//
+// Known limitation:
+//
+// The print primitives print the elements of an STL-style container
+// using the compiler-inferred type of *iter where iter is a
+// const_iterator of the container.  When const_iterator is an input
+// iterator but not a forward iterator, this inferred type may not
+// match value_type, and the print output may be incorrect.  In
+// practice, this is rarely a problem as for most containers
+// const_iterator is a forward iterator.  We'll fix this if there's an
+// actual need for it.  Note that this fix cannot rely on value_type
+// being defined as many user-defined container types don't have
+// value_type.
 
-// Returns true if exit_status describes a process that was terminated
-// by a signal, or exited normally with a nonzero exit code.
-GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-// Traps C++ exceptions escaping statement and reports them as test
-// failures. Note that trapping SEH exceptions is not implemented here.
-# if GTEST_HAS_EXCEPTIONS
-#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
-  try { \
-    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
-  } catch (const ::std::exception& gtest_exception) { \
-    fprintf(\
-        stderr, \
-        "\n%s: Caught std::exception-derived exception escaping the " \
-        "death test statement. Exception message: %s\n", \
-        ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
-        gtest_exception.what()); \
-    fflush(stderr); \
-    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
-  } catch (...) { \
-    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
-  }
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
 
-# else
-#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
-  GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+#include <functional>
+#include <memory>
+#include <ostream>  // NOLINT
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
 
-# endif
 
-// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
-// ASSERT_EXIT*, and EXPECT_EXIT*.
-# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (::testing::internal::AlwaysTrue()) { \
-    const ::testing::internal::RE& gtest_regex = (regex); \
-    ::testing::internal::DeathTest* gtest_dt; \
-    if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
-        __FILE__, __LINE__, &gtest_dt)) { \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
-    } \
-    if (gtest_dt != NULL) { \
-      ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
-          gtest_dt_ptr(gtest_dt); \
-      switch (gtest_dt->AssumeRole()) { \
-        case ::testing::internal::DeathTest::OVERSEE_TEST: \
-          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
-            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
-          } \
-          break; \
-        case ::testing::internal::DeathTest::EXECUTE_TEST: { \
-          ::testing::internal::DeathTest::ReturnSentinel \
-              gtest_sentinel(gtest_dt); \
-          GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
-          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
-          break; \
-        } \
-        default: \
-          break; \
-      } \
-    } \
-  } else \
-    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
-      fail(::testing::internal::DeathTest::LastMessage())
-// The symbol "fail" here expands to something into which a message
-// can be streamed.
+namespace testing {
 
-// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
-// NDEBUG mode. In this case we need the statements to be executed, the regex is
-// ignored, and the macro must accept a streamed message even though the message
-// is never printed.
-# define GTEST_EXECUTE_STATEMENT_(statement, regex) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (::testing::internal::AlwaysTrue()) { \
-     GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
-  } else \
-    ::testing::Message()
+// Definitions in the internal* namespaces are subject to change without notice.
+// DO NOT USE THEM IN USER CODE!
+namespace internal {
 
-// A class representing the parsed contents of the
-// --gtest_internal_run_death_test flag, as it existed when
-// RUN_ALL_TESTS was called.
-class InternalRunDeathTestFlag {
- public:
-  InternalRunDeathTestFlag(const std::string& a_file,
-                           int a_line,
-                           int an_index,
-                           int a_write_fd)
-      : file_(a_file), line_(a_line), index_(an_index),
-        write_fd_(a_write_fd) {}
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os);
 
-  ~InternalRunDeathTestFlag() {
-    if (write_fd_ >= 0)
-      posix::Close(write_fd_);
-  }
+// Used to print an STL-style container when the user doesn't define
+// a PrintTo() for it.
+struct ContainerPrinter {
+  template <typename T,
+            typename = typename std::enable_if<
+                (sizeof(IsContainerTest<T>(0)) == sizeof(IsContainer)) &&
+                !IsRecursiveContainer<T>::value>::type>
+  static void PrintValue(const T& container, std::ostream* os) {
+    const size_t kMaxCount = 32;  // The maximum number of elements to print.
+    *os << '{';
+    size_t count = 0;
+    for (auto&& elem : container) {
+      if (count > 0) {
+        *os << ',';
+        if (count == kMaxCount) {  // Enough has been printed.
+          *os << " ...";
+          break;
+        }
+      }
+      *os << ' ';
+      // We cannot call PrintTo(elem, os) here as PrintTo() doesn't
+      // handle `elem` being a native array.
+      internal::UniversalPrint(elem, os);
+      ++count;
+    }
 
-  const std::string& file() const { return file_; }
-  int line() const { return line_; }
-  int index() const { return index_; }
-  int write_fd() const { return write_fd_; }
+    if (count > 0) {
+      *os << ' ';
+    }
+    *os << '}';
+  }
+};
 
- private:
-  std::string file_;
-  int line_;
-  int index_;
-  int write_fd_;
+// Used to print a pointer that is neither a char pointer nor a member
+// pointer, when the user doesn't define PrintTo() for it.  (A member
+// variable pointer or member function pointer doesn't really point to
+// a location in the address space.  Their representation is
+// implementation-defined.  Therefore they will be printed as raw
+// bytes.)
+struct FunctionPointerPrinter {
+  template <typename T, typename = typename std::enable_if<
+                            std::is_function<T>::value>::type>
+  static void PrintValue(T* p, ::std::ostream* os) {
+    if (p == nullptr) {
+      *os << "NULL";
+    } else {
+      // T is a function type, so '*os << p' doesn't do what we want
+      // (it just prints p as bool).  We want to print p as a const
+      // void*.
+      *os << reinterpret_cast<const void*>(p);
+    }
+  }
+};
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
+struct PointerPrinter {
+  template <typename T>
+  static void PrintValue(T* p, ::std::ostream* os) {
+    if (p == nullptr) {
+      *os << "NULL";
+    } else {
+      // T is not a function type.  We just call << to print p,
+      // relying on ADL to pick up user-defined << for their pointer
+      // types, if any.
+      *os << p;
+    }
+  }
 };
 
-// Returns a newly created InternalRunDeathTestFlag object with fields
-// initialized from the GTEST_FLAG(internal_run_death_test) flag if
-// the flag is specified; otherwise returns NULL.
-InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
+namespace internal_stream_operator_without_lexical_name_lookup {
 
-#else  // GTEST_HAS_DEATH_TEST
+// The presence of an operator<< here will terminate lexical scope lookup
+// straight away (even though it cannot be a match because of its argument
+// types). Thus, the two operator<< calls in StreamPrinter will find only ADL
+// candidates.
+struct LookupBlocker {};
+void operator<<(LookupBlocker, LookupBlocker);
 
-// This macro is used for implementing macros such as
-// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
-// death tests are not supported. Those macros must compile on such systems
-// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
-// systems that support death tests. This allows one to write such a macro
-// on a system that does not support death tests and be sure that it will
-// compile on a death-test supporting system.
-//
-// Parameters:
-//   statement -  A statement that a macro such as EXPECT_DEATH would test
-//                for program termination. This macro has to make sure this
-//                statement is compiled but not executed, to ensure that
-//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
-//                parameter iff EXPECT_DEATH compiles with it.
-//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
-//                the output of statement.  This parameter has to be
-//                compiled but not evaluated by this macro, to ensure that
-//                this macro only accepts expressions that a macro such as
-//                EXPECT_DEATH would accept.
-//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
-//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
-//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
-//                compile inside functions where ASSERT_DEATH doesn't
-//                compile.
-//
-//  The branch that has an always false condition is used to ensure that
-//  statement and regex are compiled (and thus syntactically correct) but
-//  never executed. The unreachable code macro protects the terminator
-//  statement from generating an 'unreachable code' warning in case
-//  statement unconditionally returns or throws. The Message constructor at
-//  the end allows the syntax of streaming additional messages into the
-//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
-# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \
-    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-    if (::testing::internal::AlwaysTrue()) { \
-      GTEST_LOG_(WARNING) \
-          << "Death tests are not supported on this platform.\n" \
-          << "Statement '" #statement "' cannot be verified."; \
-    } else if (::testing::internal::AlwaysFalse()) { \
-      ::testing::internal::RE::PartialMatch(".*", (regex)); \
-      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
-      terminator; \
-    } else \
-      ::testing::Message()
+struct StreamPrinter {
+  template <typename T,
+            // Don't accept member pointers here. We'd print them via implicit
+            // conversion to bool, which isn't useful.
+            typename = typename std::enable_if<
+                !std::is_member_pointer<T>::value>::type,
+            // Only accept types for which we can find a streaming operator via
+            // ADL (possibly involving implicit conversions).
+            typename = decltype(std::declval<std::ostream&>()
+                                << std::declval<const T&>())>
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    // Call streaming operator found by ADL, possibly with implicit conversions
+    // of the arguments.
+    *os << value;
+  }
+};
 
-#endif  // GTEST_HAS_DEATH_TEST
+}  // namespace internal_stream_operator_without_lexical_name_lookup
 
-}  // namespace internal
-}  // namespace testing
+struct ProtobufPrinter {
+  // We print a protobuf using its ShortDebugString() when the string
+  // doesn't exceed this many characters; otherwise we print it using
+  // DebugString() for better readability.
+  static const size_t kProtobufOneLinerMaxLength = 50;
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+  template <typename T,
+            typename = typename std::enable_if<
+                internal::HasDebugStringAndShortDebugString<T>::value>::type>
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    std::string pretty_str = value.ShortDebugString();
+    if (pretty_str.length() > kProtobufOneLinerMaxLength) {
+      pretty_str = "\n" + value.DebugString();
+    }
+    *os << ("<" + pretty_str + ">");
+  }
+};
 
-namespace testing {
+struct ConvertibleToIntegerPrinter {
+  // Since T has no << operator or PrintTo() but can be implicitly
+  // converted to BiggestInt, we print it as a BiggestInt.
+  //
+  // Most likely T is an enum type (either named or unnamed), in which
+  // case printing it as an integer is the desired behavior.  In case
+  // T is not an enum, printing it as an integer is the best we can do
+  // given that it has no user-defined printer.
+  static void PrintValue(internal::BiggestInt value, ::std::ostream* os) {
+    *os << value;
+  }
+};
 
-// This flag controls the style of death tests.  Valid values are "threadsafe",
-// meaning that the death test child process will re-execute the test binary
-// from the start, running only a single death test, or "fast",
-// meaning that the child process will execute the test logic immediately
-// after forking.
-GTEST_DECLARE_string_(death_test_style);
+struct ConvertibleToStringViewPrinter {
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+  static void PrintValue(internal::StringView value, ::std::ostream* os) {
+    internal::UniversalPrint(value, os);
+  }
+#endif
+};
 
-#if GTEST_HAS_DEATH_TEST
 
-namespace internal {
+// Prints the given number of bytes in the given object to the given
+// ostream.
+GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
+                                     size_t count,
+                                     ::std::ostream* os);
+struct RawBytesPrinter {
+  // SFINAE on `sizeof` to make sure we have a complete type.
+  template <typename T, size_t = sizeof(T)>
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    PrintBytesInObjectTo(
+        static_cast<const unsigned char*>(
+            // Load bearing cast to void* to support iOS
+            reinterpret_cast<const void*>(std::addressof(value))),
+        sizeof(value), os);
+  }
+};
 
-// Returns a Boolean value indicating whether the caller is currently
-// executing in the context of the death test child process.  Tools such as
-// Valgrind heap checkers may need this to modify their behavior in death
-// tests.  IMPORTANT: This is an internal utility.  Using it may break the
-// implementation of death tests.  User code MUST NOT use it.
-GTEST_API_ bool InDeathTestChild();
+struct FallbackPrinter {
+  template <typename T>
+  static void PrintValue(const T&, ::std::ostream* os) {
+    *os << "(incomplete type)";
+  }
+};
 
-}  // namespace internal
+// Try every printer in order and return the first one that works.
+template <typename T, typename E, typename Printer, typename... Printers>
+struct FindFirstPrinter : FindFirstPrinter<T, E, Printers...> {};
 
-// The following macros are useful for writing death tests.
+template <typename T, typename Printer, typename... Printers>
+struct FindFirstPrinter<
+    T, decltype(Printer::PrintValue(std::declval<const T&>(), nullptr)),
+    Printer, Printers...> {
+  using type = Printer;
+};
 
-// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
-// executed:
-//
-//   1. It generates a warning if there is more than one active
-//   thread.  This is because it's safe to fork() or clone() only
-//   when there is a single thread.
-//
-//   2. The parent process clone()s a sub-process and runs the death
-//   test in it; the sub-process exits with code 0 at the end of the
-//   death test, if it hasn't exited already.
-//
-//   3. The parent process waits for the sub-process to terminate.
-//
-//   4. The parent process checks the exit code and error message of
-//   the sub-process.
-//
-// Examples:
-//
-//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
-//   for (int i = 0; i < 5; i++) {
-//     EXPECT_DEATH(server.ProcessRequest(i),
-//                  "Invalid request .* in ProcessRequest()")
-//                  << "Failed to die on request " << i;
-//   }
-//
-//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
-//
-//   bool KilledBySIGHUP(int exit_code) {
-//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
-//   }
-//
-//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
-//
-// On the regular expressions used in death tests:
-//
-//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
-//   which uses the POSIX extended regex syntax.
-//
-//   On other platforms (e.g. Windows), we only support a simple regex
-//   syntax implemented as part of Google Test.  This limited
-//   implementation should be enough most of the time when writing
-//   death tests; though it lacks many features you can find in PCRE
-//   or POSIX extended regex syntax.  For example, we don't support
-//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
-//   repetition count ("x{5,7}"), among others.
-//
-//   Below is the syntax that we do support.  We chose it to be a
-//   subset of both PCRE and POSIX extended regex, so it's easy to
-//   learn wherever you come from.  In the following: 'A' denotes a
-//   literal character, period (.), or a single \\ escape sequence;
-//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
-//   natural numbers.
-//
-//     c     matches any literal character c
-//     \\d   matches any decimal digit
-//     \\D   matches any character that's not a decimal digit
-//     \\f   matches \f
-//     \\n   matches \n
-//     \\r   matches \r
-//     \\s   matches any ASCII whitespace, including \n
-//     \\S   matches any character that's not a whitespace
-//     \\t   matches \t
-//     \\v   matches \v
-//     \\w   matches any letter, _, or decimal digit
-//     \\W   matches any character that \\w doesn't match
-//     \\c   matches any literal character c, which must be a punctuation
-//     .     matches any single character except \n
-//     A?    matches 0 or 1 occurrences of A
-//     A*    matches 0 or many occurrences of A
-//     A+    matches 1 or many occurrences of A
-//     ^     matches the beginning of a string (not that of each line)
-//     $     matches the end of a string (not that of each line)
-//     xy    matches x followed by y
-//
-//   If you accidentally use PCRE or POSIX extended regex features
-//   not implemented by us, you will get a run-time failure.  In that
-//   case, please try to rewrite your regular expression within the
-//   above syntax.
-//
-//   This implementation is *not* meant to be as highly tuned or robust
-//   as a compiled regex library, but should perform well enough for a
-//   death test, which already incurs significant overhead by launching
-//   a child process.
-//
-// Known caveats:
-//
-//   A "threadsafe" style death test obtains the path to the test
-//   program from argv[0] and re-executes it in the sub-process.  For
-//   simplicity, the current implementation doesn't search the PATH
-//   when launching the sub-process.  This means that the user must
-//   invoke the test program via a path that contains at least one
-//   path separator (e.g. path/to/foo_test and
-//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
-//   is rarely a problem as people usually don't put the test binary
-//   directory in PATH.
+// Select the best printer in the following order:
+//  - Print containers (they have begin/end/etc).
+//  - Print function pointers.
+//  - Print object pointers.
+//  - Use the stream operator, if available.
+//  - Print protocol buffers.
+//  - Print types convertible to BiggestInt.
+//  - Print types convertible to StringView, if available.
+//  - Fallback to printing the raw bytes of the object.
+template <typename T>
+void PrintWithFallback(const T& value, ::std::ostream* os) {
+  using Printer = typename FindFirstPrinter<
+      T, void, ContainerPrinter, FunctionPointerPrinter, PointerPrinter,
+      internal_stream_operator_without_lexical_name_lookup::StreamPrinter,
+      ProtobufPrinter, ConvertibleToIntegerPrinter,
+      ConvertibleToStringViewPrinter, RawBytesPrinter, FallbackPrinter>::type;
+  Printer::PrintValue(value, os);
+}
+
+// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
+// value of type ToPrint that is an operand of a comparison assertion
+// (e.g. ASSERT_EQ).  OtherOperand is the type of the other operand in
+// the comparison, and is used to help determine the best way to
+// format the value.  In particular, when the value is a C string
+// (char pointer) and the other operand is an STL string object, we
+// want to format the C string as a string, since we know it is
+// compared by value with the string object.  If the value is a char
+// pointer but the other operand is not an STL string object, we don't
+// know whether the pointer is supposed to point to a NUL-terminated
+// string, and thus want to print it as a pointer to be safe.
 //
-// TODO(wan@google.com): make thread-safe death tests search the PATH.
-
-// Asserts that a given statement causes the program to exit, with an
-// integer exit status that satisfies predicate, and emitting error output
-// that matches regex.
-# define ASSERT_EXIT(statement, predicate, regex) \
-    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)
-
-// Like ASSERT_EXIT, but continues on to successive tests in the
-// test case, if any:
-# define EXPECT_EXIT(statement, predicate, regex) \
-    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)
-
-// Asserts that a given statement causes the program to exit, either by
-// explicitly exiting with a nonzero exit code or being killed by a
-// signal, and emitting error output that matches regex.
-# define ASSERT_DEATH(statement, regex) \
-    ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
-
-// Like ASSERT_DEATH, but continues on to successive tests in the
-// test case, if any:
-# define EXPECT_DEATH(statement, regex) \
-    EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
-
-// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
 
-// Tests that an exit code describes a normal exit with a given exit code.
-class GTEST_API_ ExitedWithCode {
+// The default case.
+template <typename ToPrint, typename OtherOperand>
+class FormatForComparison {
  public:
-  explicit ExitedWithCode(int exit_code);
-  bool operator()(int exit_status) const;
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ExitedWithCode& other);
-
-  const int exit_code_;
+  static ::std::string Format(const ToPrint& value) {
+    return ::testing::PrintToString(value);
+  }
 };
 
-# if !GTEST_OS_WINDOWS
-// Tests that an exit code describes an exit due to termination by a
-// given signal.
-class GTEST_API_ KilledBySignal {
+// Array.
+template <typename ToPrint, size_t N, typename OtherOperand>
+class FormatForComparison<ToPrint[N], OtherOperand> {
  public:
-  explicit KilledBySignal(int signum);
-  bool operator()(int exit_status) const;
- private:
-  const int signum_;
+  static ::std::string Format(const ToPrint* value) {
+    return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
+  }
 };
-# endif  // !GTEST_OS_WINDOWS
-
-// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
-// The death testing framework causes this to have interesting semantics,
-// since the sideeffects of the call are only visible in opt mode, and not
-// in debug mode.
-//
-// In practice, this can be used to test functions that utilize the
-// LOG(DFATAL) macro using the following style:
-//
-// int DieInDebugOr12(int* sideeffect) {
-//   if (sideeffect) {
-//     *sideeffect = 12;
-//   }
-//   LOG(DFATAL) << "death";
-//   return 12;
-// }
-//
-// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
-//   int sideeffect = 0;
-//   // Only asserts in dbg.
-//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
-//
-// #ifdef NDEBUG
-//   // opt-mode has sideeffect visible.
-//   EXPECT_EQ(12, sideeffect);
-// #else
-//   // dbg-mode no visible sideeffect.
-//   EXPECT_EQ(0, sideeffect);
-// #endif
-// }
-//
-// This will assert that DieInDebugReturn12InOpt() crashes in debug
-// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
-// appropriate fallback value (12 in this case) in opt mode. If you
-// need to test that a function has appropriate side-effects in opt
-// mode, include assertions against the side-effects.  A general
-// pattern for this is:
-//
-// EXPECT_DEBUG_DEATH({
-//   // Side-effects here will have an effect after this statement in
-//   // opt mode, but none in debug mode.
-//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
-// }, "death");
-//
-# ifdef NDEBUG
 
-#  define EXPECT_DEBUG_DEATH(statement, regex) \
-  GTEST_EXECUTE_STATEMENT_(statement, regex)
+// By default, print C string as pointers to be safe, as we don't know
+// whether they actually point to a NUL-terminated string.
 
-#  define ASSERT_DEBUG_DEATH(statement, regex) \
-  GTEST_EXECUTE_STATEMENT_(statement, regex)
+#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType)                \
+  template <typename OtherOperand>                                      \
+  class FormatForComparison<CharType*, OtherOperand> {                  \
+   public:                                                              \
+    static ::std::string Format(CharType* value) {                      \
+      return ::testing::PrintToString(static_cast<const void*>(value)); \
+    }                                                                   \
+  }
 
-# else
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
+#ifdef __cpp_char8_t
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char8_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char8_t);
+#endif
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char16_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char16_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char32_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char32_t);
 
-#  define EXPECT_DEBUG_DEATH(statement, regex) \
-  EXPECT_DEATH(statement, regex)
+#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_
 
-#  define ASSERT_DEBUG_DEATH(statement, regex) \
-  ASSERT_DEATH(statement, regex)
+// If a C string is compared with an STL string object, we know it's meant
+// to point to a NUL-terminated string, and thus can print it as a string.
 
-# endif  // NDEBUG for EXPECT_DEBUG_DEATH
-#endif  // GTEST_HAS_DEATH_TEST
+#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
+  template <>                                                           \
+  class FormatForComparison<CharType*, OtherStringType> {               \
+   public:                                                              \
+    static ::std::string Format(CharType* value) {                      \
+      return ::testing::PrintToString(value);                           \
+    }                                                                   \
+  }
 
-// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
-// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
-// death tests are supported; otherwise they just issue a warning.  This is
-// useful when you are combining death test assertions with normal test
-// assertions in one test.
-#if GTEST_HAS_DEATH_TEST
-# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
-    EXPECT_DEATH(statement, regex)
-# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
-    ASSERT_DEATH(statement, regex)
-#else
-# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
-    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
-# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
-    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);
+#ifdef __cpp_char8_t
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char8_t, ::std::u8string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char8_t, ::std::u8string);
 #endif
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char16_t, ::std::u16string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char16_t, ::std::u16string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char32_t, ::std::u32string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char32_t, ::std::u32string);
 
-}  // namespace testing
+#if GTEST_HAS_STD_WSTRING
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
+#endif
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
-// This file was GENERATED by command:
-//     pump.py gtest-param-test.h.pump
-// DO NOT EDIT BY HAND!!!
+#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_
 
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: vladl@google.com (Vlad Losev)
+// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
+// operand to be used in a failure message.  The type (but not value)
+// of the other operand may affect the format.  This allows us to
+// print a char* as a raw pointer when it is compared against another
+// char* or void*, and print it as a C string when it is compared
+// against an std::string object, for example.
 //
-// Macros and functions for implementing parameterized tests
-// in Google C++ Testing Framework (Google Test)
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename T1, typename T2>
+std::string FormatForComparisonFailureMessage(
+    const T1& value, const T2& /* other_operand */) {
+  return FormatForComparison<T1, T2>::Format(value);
+}
+
+// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
+// value to the given ostream.  The caller must ensure that
+// 'ostream_ptr' is not NULL, or the behavior is undefined.
 //
-// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+// We define UniversalPrinter as a class template (as opposed to a
+// function template), as we need to partially specialize it for
+// reference types, which cannot be done with function templates.
+template <typename T>
+class UniversalPrinter;
+
+// Prints the given value using the << operator if it has one;
+// otherwise prints the bytes in it.  This is what
+// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
+// or overloaded for type T.
 //
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+// A user can override this behavior for a class type Foo by defining
+// an overload of PrintTo() in the namespace where Foo is defined.  We
+// give the user this option as sometimes defining a << operator for
+// Foo is not desirable (e.g. the coding style may prevent doing it,
+// or there is already a << operator but it doesn't do what the user
+// wants).
+template <typename T>
+void PrintTo(const T& value, ::std::ostream* os) {
+  internal::PrintWithFallback(value, os);
+}
 
+// The following list of PrintTo() overloads tells
+// UniversalPrinter<T>::Print() how to print standard types (built-in
+// types, strings, plain arrays, and pointers).
 
-// Value-parameterized tests allow you to test your code with different
-// parameters without writing multiple copies of the same test.
-//
-// Here is how you use value-parameterized tests:
+// Overloads for various char types.
+GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
+GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
+inline void PrintTo(char c, ::std::ostream* os) {
+  // When printing a plain char, we always treat it as unsigned.  This
+  // way, the output won't be affected by whether the compiler thinks
+  // char is signed or not.
+  PrintTo(static_cast<unsigned char>(c), os);
+}
 
-#if 0
+// Overloads for other simple built-in types.
+inline void PrintTo(bool x, ::std::ostream* os) {
+  *os << (x ? "true" : "false");
+}
 
-// To write value-parameterized tests, first you should define a fixture
-// class. It is usually derived from testing::TestWithParam<T> (see below for
-// another inheritance scheme that's sometimes useful in more complicated
-// class hierarchies), where the type of your parameter values.
-// TestWithParam<T> is itself derived from testing::Test. T can be any
-// copyable type. If it's a raw pointer, you are responsible for managing the
-// lifespan of the pointed values.
+// Overload for wchar_t type.
+// Prints a wchar_t as a symbol if it is printable or as its internal
+// code otherwise and also as its decimal code (except for L'\0').
+// The L'\0' char is printed as "L'\\0'". The decimal code is printed
+// as signed integer when wchar_t is implemented by the compiler
+// as a signed type and is printed as an unsigned integer when wchar_t
+// is implemented as an unsigned type.
+GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
 
-class FooTest : public ::testing::TestWithParam<const char*> {
-  // You can implement all the usual class fixture members here.
-};
+GTEST_API_ void PrintTo(char32_t c, ::std::ostream* os);
+inline void PrintTo(char16_t c, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<char32_t>(c), os);
+}
+#ifdef __cpp_char8_t
+inline void PrintTo(char8_t c, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<char32_t>(c), os);
+}
+#endif
+
+// Overloads for C strings.
+GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
+inline void PrintTo(char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char*>(s), os);
+}
 
-// Then, use the TEST_P macro to define as many parameterized tests
-// for this fixture as you want. The _P suffix is for "parameterized"
-// or "pattern", whichever you prefer to think.
+// signed/unsigned char is often used for representing binary data, so
+// we print pointers to it as void* to be safe.
+inline void PrintTo(const signed char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(signed char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(unsigned char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+#ifdef __cpp_char8_t
+// Overloads for u8 strings.
+GTEST_API_ void PrintTo(const char8_t* s, ::std::ostream* os);
+inline void PrintTo(char8_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char8_t*>(s), os);
+}
+#endif
+// Overloads for u16 strings.
+GTEST_API_ void PrintTo(const char16_t* s, ::std::ostream* os);
+inline void PrintTo(char16_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char16_t*>(s), os);
+}
+// Overloads for u32 strings.
+GTEST_API_ void PrintTo(const char32_t* s, ::std::ostream* os);
+inline void PrintTo(char32_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char32_t*>(s), os);
+}
 
-TEST_P(FooTest, DoesBlah) {
-  // Inside a test, access the test parameter with the GetParam() method
-  // of the TestWithParam<T> class:
-  EXPECT_TRUE(foo.Blah(GetParam()));
-  ...
+// MSVC can be configured to define wchar_t as a typedef of unsigned
+// short.  It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
+// type.  When wchar_t is a typedef, defining an overload for const
+// wchar_t* would cause unsigned short* be printed as a wide string,
+// possibly causing invalid memory accesses.
+#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
+// Overloads for wide C strings
+GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
+inline void PrintTo(wchar_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const wchar_t*>(s), os);
 }
+#endif
 
-TEST_P(FooTest, HasBlahBlah) {
-  ...
+// Overload for C arrays.  Multi-dimensional arrays are printed
+// properly.
+
+// Prints the given number of elements in an array, without printing
+// the curly braces.
+template <typename T>
+void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
+  UniversalPrint(a[0], os);
+  for (size_t i = 1; i != count; i++) {
+    *os << ", ";
+    UniversalPrint(a[i], os);
+  }
 }
 
-// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test
-// case with any set of parameters you want. Google Test defines a number
-// of functions for generating test parameters. They return what we call
-// (surprise!) parameter generators. Here is a  summary of them, which
-// are all in the testing namespace:
-//
-//
-//  Range(begin, end [, step]) - Yields values {begin, begin+step,
-//                               begin+step+step, ...}. The values do not
-//                               include end. step defaults to 1.
-//  Values(v1, v2, ..., vN)    - Yields values {v1, v2, ..., vN}.
-//  ValuesIn(container)        - Yields values from a C-style array, an STL
-//  ValuesIn(begin,end)          container, or an iterator range [begin, end).
-//  Bool()                     - Yields sequence {false, true}.
-//  Combine(g1, g2, ..., gN)   - Yields all combinations (the Cartesian product
-//                               for the math savvy) of the values generated
-//                               by the N generators.
-//
-// For more details, see comments at the definitions of these functions below
-// in this file.
-//
-// The following statement will instantiate tests from the FooTest test case
-// each with parameter values "meeny", "miny", and "moe".
+// Overloads for ::std::string.
+GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os);
+inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
+  PrintStringTo(s, os);
+}
 
-INSTANTIATE_TEST_CASE_P(InstantiationName,
-                        FooTest,
-                        Values("meeny", "miny", "moe"));
+// Overloads for ::std::u8string
+#ifdef __cpp_char8_t
+GTEST_API_ void PrintU8StringTo(const ::std::u8string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::u8string& s, ::std::ostream* os) {
+  PrintU8StringTo(s, os);
+}
+#endif
 
-// To distinguish different instances of the pattern, (yes, you
-// can instantiate it more then once) the first argument to the
-// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the
-// actual test case name. Remember to pick unique prefixes for different
-// instantiations. The tests from the instantiation above will have
-// these names:
-//
-//    * InstantiationName/FooTest.DoesBlah/0 for "meeny"
-//    * InstantiationName/FooTest.DoesBlah/1 for "miny"
-//    * InstantiationName/FooTest.DoesBlah/2 for "moe"
-//    * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
-//    * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
-//    * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
-//
-// You can use these names in --gtest_filter.
-//
-// This statement will instantiate all tests from FooTest again, each
-// with parameter values "cat" and "dog":
+// Overloads for ::std::u16string
+GTEST_API_ void PrintU16StringTo(const ::std::u16string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::u16string& s, ::std::ostream* os) {
+  PrintU16StringTo(s, os);
+}
 
-const char* pets[] = {"cat", "dog"};
-INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
+// Overloads for ::std::u32string
+GTEST_API_ void PrintU32StringTo(const ::std::u32string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::u32string& s, ::std::ostream* os) {
+  PrintU32StringTo(s, os);
+}
 
-// The tests from the instantiation above will have these names:
-//
-//    * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
-//    * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
-//    * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
-//    * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
-//
-// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests
-// in the given test case, whether their definitions come before or
-// AFTER the INSTANTIATE_TEST_CASE_P statement.
-//
-// Please also note that generator expressions (including parameters to the
-// generators) are evaluated in InitGoogleTest(), after main() has started.
-// This allows the user on one hand, to adjust generator parameters in order
-// to dynamically determine a set of tests to run and on the other hand,
-// give the user a chance to inspect the generated tests with Google Test
-// reflection API before RUN_ALL_TESTS() is executed.
-//
-// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
-// for more examples.
-//
-// In the future, we plan to publish the API for defining new parameter
-// generators. But for now this interface remains part of the internal
-// implementation and is subject to change.
-//
-//
-// A parameterized test fixture must be derived from testing::Test and from
-// testing::WithParamInterface<T>, where T is the type of the parameter
-// values. Inheriting from TestWithParam<T> satisfies that requirement because
-// TestWithParam<T> inherits from both Test and WithParamInterface. In more
-// complicated hierarchies, however, it is occasionally useful to inherit
-// separately from Test and WithParamInterface. For example:
+// Overloads for ::std::wstring.
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os);
+inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
+  PrintWideStringTo(s, os);
+}
+#endif  // GTEST_HAS_STD_WSTRING
 
-class BaseTest : public ::testing::Test {
-  // You can inherit all the usual members for a non-parameterized test
-  // fixture here.
-};
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+// Overload for internal::StringView.
+inline void PrintTo(internal::StringView sp, ::std::ostream* os) {
+  PrintTo(::std::string(sp), os);
+}
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
 
-class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
-  // The usual test fixture members go here too.
-};
+inline void PrintTo(std::nullptr_t, ::std::ostream* os) { *os << "(nullptr)"; }
 
-TEST_F(BaseTest, HasFoo) {
-  // This is an ordinary non-parameterized test.
+template <typename T>
+void PrintTo(std::reference_wrapper<T> ref, ::std::ostream* os) {
+  UniversalPrinter<T&>::Print(ref.get(), os);
 }
 
-TEST_P(DerivedTest, DoesBlah) {
-  // GetParam works just the same here as if you inherit from TestWithParam.
-  EXPECT_TRUE(foo.Blah(GetParam()));
+inline const void* VoidifyPointer(const void* p) { return p; }
+inline const void* VoidifyPointer(volatile const void* p) {
+  return const_cast<const void*>(p);
 }
 
-#endif  // 0
+template <typename T, typename Ptr>
+void PrintSmartPointer(const Ptr& ptr, std::ostream* os, char) {
+  if (ptr == nullptr) {
+    *os << "(nullptr)";
+  } else {
+    // We can't print the value. Just print the pointer..
+    *os << "(" << (VoidifyPointer)(ptr.get()) << ")";
+  }
+}
+template <typename T, typename Ptr,
+          typename = typename std::enable_if<!std::is_void<T>::value &&
+                                             !std::is_array<T>::value>::type>
+void PrintSmartPointer(const Ptr& ptr, std::ostream* os, int) {
+  if (ptr == nullptr) {
+    *os << "(nullptr)";
+  } else {
+    *os << "(ptr = " << (VoidifyPointer)(ptr.get()) << ", value = ";
+    UniversalPrinter<T>::Print(*ptr, os);
+    *os << ")";
+  }
+}
 
+template <typename T, typename D>
+void PrintTo(const std::unique_ptr<T, D>& ptr, std::ostream* os) {
+  (PrintSmartPointer<T>)(ptr, os, 0);
+}
 
-#if !GTEST_OS_SYMBIAN
-# include <utility>
-#endif
+template <typename T>
+void PrintTo(const std::shared_ptr<T>& ptr, std::ostream* os) {
+  (PrintSmartPointer<T>)(ptr, os, 0);
+}
 
-// scripts/fuse_gtest.py depends on gtest's own header being #included
-// *unconditionally*.  Therefore these #includes cannot be moved
-// inside #if GTEST_HAS_PARAM_TEST.
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: vladl@google.com (Vlad Losev)
+// Helper function for printing a tuple.  T must be instantiated with
+// a tuple type.
+template <typename T>
+void PrintTupleTo(const T&, std::integral_constant<size_t, 0>,
+                  ::std::ostream*) {}
+
+template <typename T, size_t I>
+void PrintTupleTo(const T& t, std::integral_constant<size_t, I>,
+                  ::std::ostream* os) {
+  PrintTupleTo(t, std::integral_constant<size_t, I - 1>(), os);
+  GTEST_INTENTIONAL_CONST_COND_PUSH_()
+  if (I > 1) {
+    GTEST_INTENTIONAL_CONST_COND_POP_()
+    *os << ", ";
+  }
+  UniversalPrinter<typename std::tuple_element<I - 1, T>::type>::Print(
+      std::get<I - 1>(t), os);
+}
 
-// Type and function utilities for implementing parameterized tests.
+template <typename... Types>
+void PrintTo(const ::std::tuple<Types...>& t, ::std::ostream* os) {
+  *os << "(";
+  PrintTupleTo(t, std::integral_constant<size_t, sizeof...(Types)>(), os);
+  *os << ")";
+}
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+// Overload for std::pair.
+template <typename T1, typename T2>
+void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
+  *os << '(';
+  // We cannot use UniversalPrint(value.first, os) here, as T1 may be
+  // a reference type.  The same for printing value.second.
+  UniversalPrinter<T1>::Print(value.first, os);
+  *os << ", ";
+  UniversalPrinter<T2>::Print(value.second, os);
+  *os << ')';
+}
 
-#include <iterator>
-#include <utility>
-#include <vector>
+// Implements printing a non-reference type T by letting the compiler
+// pick the right overload of PrintTo() for T.
+template <typename T>
+class UniversalPrinter {
+ public:
+  // MSVC warns about adding const to a function type, so we want to
+  // disable the warning.
+  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
 
-// scripts/fuse_gtest.py depends on gtest's own header being #included
-// *unconditionally*.  Therefore these #includes cannot be moved
-// inside #if GTEST_HAS_PARAM_TEST.
-// Copyright 2003 Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: Dan Egnor (egnor@google.com)
-//
-// A "smart" pointer type with reference tracking.  Every pointer to a
-// particular object is kept on a circular linked list.  When the last pointer
-// to an object is destroyed or reassigned, the object is deleted.
-//
-// Used properly, this deletes the object when the last reference goes away.
-// There are several caveats:
-// - Like all reference counting schemes, cycles lead to leaks.
-// - Each smart pointer is actually two pointers (8 bytes instead of 4).
-// - Every time a pointer is assigned, the entire list of pointers to that
-//   object is traversed.  This class is therefore NOT SUITABLE when there
-//   will often be more than two or three pointers to a particular object.
-// - References are only tracked as long as linked_ptr<> objects are copied.
-//   If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
-//   will happen (double deletion).
-//
-// A good use of this class is storing object references in STL containers.
-// You can safely put linked_ptr<> in a vector<>.
-// Other uses may not be as good.
-//
-// Note: If you use an incomplete type with linked_ptr<>, the class
-// *containing* linked_ptr<> must have a constructor and destructor (even
-// if they do nothing!).
-//
-// Bill Gibbons suggested we use something like this.
-//
-// Thread Safety:
-//   Unlike other linked_ptr implementations, in this implementation
-//   a linked_ptr object is thread-safe in the sense that:
-//     - it's safe to copy linked_ptr objects concurrently,
-//     - it's safe to copy *from* a linked_ptr and read its underlying
-//       raw pointer (e.g. via get()) concurrently, and
-//     - it's safe to write to two linked_ptrs that point to the same
-//       shared object concurrently.
-// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
-// confusion with normal linked_ptr.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+  // Note: we deliberately don't call this PrintTo(), as that name
+  // conflicts with ::testing::internal::PrintTo in the body of the
+  // function.
+  static void Print(const T& value, ::std::ostream* os) {
+    // By default, ::testing::internal::PrintTo() is used for printing
+    // the value.
+    //
+    // Thanks to Koenig look-up, if T is a class and has its own
+    // PrintTo() function defined in its namespace, that function will
+    // be visible here.  Since it is more specific than the generic ones
+    // in ::testing::internal, it will be picked by the compiler in the
+    // following statement - exactly what we want.
+    PrintTo(value, os);
+  }
 
-#include <stdlib.h>
-#include <assert.h>
+  GTEST_DISABLE_MSC_WARNINGS_POP_()
+};
 
+// Remove any const-qualifiers before passing a type to UniversalPrinter.
+template <typename T>
+class UniversalPrinter<const T> : public UniversalPrinter<T> {};
 
-namespace testing {
-namespace internal {
+#if GTEST_INTERNAL_HAS_ANY
 
-// Protects copying of all linked_ptr objects.
-GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
+// Printer for std::any / absl::any
 
-// This is used internally by all instances of linked_ptr<>.  It needs to be
-// a non-template class because different types of linked_ptr<> can refer to
-// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
-// So, it needs to be possible for different types of linked_ptr to participate
-// in the same circular linked list, so we need a single class type here.
-//
-// DO NOT USE THIS CLASS DIRECTLY YOURSELF.  Use linked_ptr<T>.
-class linked_ptr_internal {
+template <>
+class UniversalPrinter<Any> {
  public:
-  // Create a new circle that includes only this instance.
-  void join_new() {
-    next_ = this;
+  static void Print(const Any& value, ::std::ostream* os) {
+    if (value.has_value()) {
+      *os << "value of type " << GetTypeName(value);
+    } else {
+      *os << "no value";
+    }
   }
 
-  // Many linked_ptr operations may change p.link_ for some linked_ptr
-  // variable p in the same circle as this object.  Therefore we need
-  // to prevent two such operations from occurring concurrently.
-  //
-  // Note that different types of linked_ptr objects can coexist in a
-  // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
-  // linked_ptr<Derived2>).  Therefore we must use a single mutex to
-  // protect all linked_ptr objects.  This can create serious
-  // contention in production code, but is acceptable in a testing
-  // framework.
-
-  // Join an existing circle.
-  void join(linked_ptr_internal const* ptr)
-      GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
-    MutexLock lock(&g_linked_ptr_mutex);
-
-    linked_ptr_internal const* p = ptr;
-    while (p->next_ != ptr) p = p->next_;
-    p->next_ = this;
-    next_ = ptr;
+ private:
+  static std::string GetTypeName(const Any& value) {
+#if GTEST_HAS_RTTI
+    return internal::GetTypeName(value.type());
+#else
+    static_cast<void>(value);  // possibly unused
+    return "<unknown_type>";
+#endif  // GTEST_HAS_RTTI
   }
+};
 
-  // Leave whatever circle we're part of.  Returns true if we were the
-  // last member of the circle.  Once this is done, you can join() another.
-  bool depart()
-      GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
-    MutexLock lock(&g_linked_ptr_mutex);
-
-    if (next_ == this) return true;
-    linked_ptr_internal const* p = next_;
-    while (p->next_ != this) p = p->next_;
-    p->next_ = next_;
-    return false;
-  }
+#endif  // GTEST_INTERNAL_HAS_ANY
 
- private:
-  mutable linked_ptr_internal const* next_;
-};
+#if GTEST_INTERNAL_HAS_OPTIONAL
+
+// Printer for std::optional / absl::optional
 
 template <typename T>
-class linked_ptr {
+class UniversalPrinter<Optional<T>> {
  public:
-  typedef T element_type;
-
-  // Take over ownership of a raw pointer.  This should happen as soon as
-  // possible after the object is created.
-  explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
-  ~linked_ptr() { depart(); }
-
-  // Copy an existing linked_ptr<>, adding ourselves to the list of references.
-  template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
-  linked_ptr(linked_ptr const& ptr) {  // NOLINT
-    assert(&ptr != this);
-    copy(&ptr);
+  static void Print(const Optional<T>& value, ::std::ostream* os) {
+    *os << '(';
+    if (!value) {
+      *os << "nullopt";
+    } else {
+      UniversalPrint(*value, os);
+    }
+    *os << ')';
   }
+};
 
-  // Assignment releases the old value and acquires the new.
-  template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
-    depart();
-    copy(&ptr);
-    return *this;
+#endif  // GTEST_INTERNAL_HAS_OPTIONAL
+
+#if GTEST_INTERNAL_HAS_VARIANT
+
+// Printer for std::variant / absl::variant
+
+template <typename... T>
+class UniversalPrinter<Variant<T...>> {
+ public:
+  static void Print(const Variant<T...>& value, ::std::ostream* os) {
+    *os << '(';
+#if GTEST_HAS_ABSL
+    absl::visit(Visitor{os, value.index()}, value);
+#else
+    std::visit(Visitor{os, value.index()}, value);
+#endif  // GTEST_HAS_ABSL
+    *os << ')';
   }
 
-  linked_ptr& operator=(linked_ptr const& ptr) {
-    if (&ptr != this) {
-      depart();
-      copy(&ptr);
+ private:
+  struct Visitor {
+    template <typename U>
+    void operator()(const U& u) const {
+      *os << "'" << GetTypeName<U>() << "(index = " << index
+          << ")' with value ";
+      UniversalPrint(u, os);
     }
-    return *this;
+    ::std::ostream* os;
+    std::size_t index;
+  };
+};
+
+#endif  // GTEST_INTERNAL_HAS_VARIANT
+
+// UniversalPrintArray(begin, len, os) prints an array of 'len'
+// elements, starting at address 'begin'.
+template <typename T>
+void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
+  if (len == 0) {
+    *os << "{}";
+  } else {
+    *os << "{ ";
+    const size_t kThreshold = 18;
+    const size_t kChunkSize = 8;
+    // If the array has more than kThreshold elements, we'll have to
+    // omit some details by printing only the first and the last
+    // kChunkSize elements.
+    if (len <= kThreshold) {
+      PrintRawArrayTo(begin, len, os);
+    } else {
+      PrintRawArrayTo(begin, kChunkSize, os);
+      *os << ", ..., ";
+      PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
+    }
+    *os << " }";
+  }
+}
+// This overload prints a (const) char array compactly.
+GTEST_API_ void UniversalPrintArray(
+    const char* begin, size_t len, ::std::ostream* os);
+
+#ifdef __cpp_char8_t
+// This overload prints a (const) char8_t array compactly.
+GTEST_API_ void UniversalPrintArray(const char8_t* begin, size_t len,
+                                    ::std::ostream* os);
+#endif
+
+// This overload prints a (const) char16_t array compactly.
+GTEST_API_ void UniversalPrintArray(const char16_t* begin, size_t len,
+                                    ::std::ostream* os);
+
+// This overload prints a (const) char32_t array compactly.
+GTEST_API_ void UniversalPrintArray(const char32_t* begin, size_t len,
+                                    ::std::ostream* os);
+
+// This overload prints a (const) wchar_t array compactly.
+GTEST_API_ void UniversalPrintArray(
+    const wchar_t* begin, size_t len, ::std::ostream* os);
+
+// Implements printing an array type T[N].
+template <typename T, size_t N>
+class UniversalPrinter<T[N]> {
+ public:
+  // Prints the given array, omitting some elements when there are too
+  // many.
+  static void Print(const T (&a)[N], ::std::ostream* os) {
+    UniversalPrintArray(a, N, os);
   }
+};
+
+// Implements printing a reference type T&.
+template <typename T>
+class UniversalPrinter<T&> {
+ public:
+  // MSVC warns about adding const to a function type, so we want to
+  // disable the warning.
+  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
+
+  static void Print(const T& value, ::std::ostream* os) {
+    // Prints the address of the value.  We use reinterpret_cast here
+    // as static_cast doesn't compile when T is a function type.
+    *os << "@" << reinterpret_cast<const void*>(&value) << " ";
 
-  // Smart pointer members.
-  void reset(T* ptr = NULL) {
-    depart();
-    capture(ptr);
+    // Then prints the value itself.
+    UniversalPrint(value, os);
   }
-  T* get() const { return value_; }
-  T* operator->() const { return value_; }
-  T& operator*() const { return *value_; }
 
-  bool operator==(T* p) const { return value_ == p; }
-  bool operator!=(T* p) const { return value_ != p; }
-  template <typename U>
-  bool operator==(linked_ptr<U> const& ptr) const {
-    return value_ == ptr.get();
+  GTEST_DISABLE_MSC_WARNINGS_POP_()
+};
+
+// Prints a value tersely: for a reference type, the referenced value
+// (but not the address) is printed; for a (const) char pointer, the
+// NUL-terminated string (but not the pointer) is printed.
+
+template <typename T>
+class UniversalTersePrinter {
+ public:
+  static void Print(const T& value, ::std::ostream* os) {
+    UniversalPrint(value, os);
   }
-  template <typename U>
-  bool operator!=(linked_ptr<U> const& ptr) const {
-    return value_ != ptr.get();
+};
+template <typename T>
+class UniversalTersePrinter<T&> {
+ public:
+  static void Print(const T& value, ::std::ostream* os) {
+    UniversalPrint(value, os);
+  }
+};
+template <typename T, size_t N>
+class UniversalTersePrinter<T[N]> {
+ public:
+  static void Print(const T (&value)[N], ::std::ostream* os) {
+    UniversalPrinter<T[N]>::Print(value, os);
+  }
+};
+template <>
+class UniversalTersePrinter<const char*> {
+ public:
+  static void Print(const char* str, ::std::ostream* os) {
+    if (str == nullptr) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(std::string(str), os);
+    }
   }
+};
+template <>
+class UniversalTersePrinter<char*> : public UniversalTersePrinter<const char*> {
+};
 
- private:
-  template <typename U>
-  friend class linked_ptr;
+#ifdef __cpp_char8_t
+template <>
+class UniversalTersePrinter<const char8_t*> {
+ public:
+  static void Print(const char8_t* str, ::std::ostream* os) {
+    if (str == nullptr) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(::std::u8string(str), os);
+    }
+  }
+};
+template <>
+class UniversalTersePrinter<char8_t*>
+    : public UniversalTersePrinter<const char8_t*> {};
+#endif
 
-  T* value_;
-  linked_ptr_internal link_;
+template <>
+class UniversalTersePrinter<const char16_t*> {
+ public:
+  static void Print(const char16_t* str, ::std::ostream* os) {
+    if (str == nullptr) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(::std::u16string(str), os);
+    }
+  }
+};
+template <>
+class UniversalTersePrinter<char16_t*>
+    : public UniversalTersePrinter<const char16_t*> {};
 
-  void depart() {
-    if (link_.depart()) delete value_;
+template <>
+class UniversalTersePrinter<const char32_t*> {
+ public:
+  static void Print(const char32_t* str, ::std::ostream* os) {
+    if (str == nullptr) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(::std::u32string(str), os);
+    }
   }
+};
+template <>
+class UniversalTersePrinter<char32_t*>
+    : public UniversalTersePrinter<const char32_t*> {};
 
-  void capture(T* ptr) {
-    value_ = ptr;
-    link_.join_new();
+#if GTEST_HAS_STD_WSTRING
+template <>
+class UniversalTersePrinter<const wchar_t*> {
+ public:
+  static void Print(const wchar_t* str, ::std::ostream* os) {
+    if (str == nullptr) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(::std::wstring(str), os);
+    }
   }
+};
+#endif
 
-  template <typename U> void copy(linked_ptr<U> const* ptr) {
-    value_ = ptr->get();
-    if (value_)
-      link_.join(&ptr->link_);
-    else
-      link_.join_new();
+template <>
+class UniversalTersePrinter<wchar_t*> {
+ public:
+  static void Print(wchar_t* str, ::std::ostream* os) {
+    UniversalTersePrinter<const wchar_t*>::Print(str, os);
   }
 };
 
-template<typename T> inline
-bool operator==(T* ptr, const linked_ptr<T>& x) {
-  return ptr == x.get();
+template <typename T>
+void UniversalTersePrint(const T& value, ::std::ostream* os) {
+  UniversalTersePrinter<T>::Print(value, os);
 }
 
-template<typename T> inline
-bool operator!=(T* ptr, const linked_ptr<T>& x) {
-  return ptr != x.get();
+// Prints a value using the type inferred by the compiler.  The
+// difference between this and UniversalTersePrint() is that for a
+// (const) char pointer, this prints both the pointer and the
+// NUL-terminated string.
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os) {
+  // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
+  // UniversalPrinter with T directly.
+  typedef T T1;
+  UniversalPrinter<T1>::Print(value, os);
 }
 
-// A function to convert T* into linked_ptr<T>
-// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
-// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
-template <typename T>
-linked_ptr<T> make_linked_ptr(T* ptr) {
-  return linked_ptr<T>(ptr);
+typedef ::std::vector< ::std::string> Strings;
+
+  // Tersely prints the first N fields of a tuple to a string vector,
+  // one element for each field.
+template <typename Tuple>
+void TersePrintPrefixToStrings(const Tuple&, std::integral_constant<size_t, 0>,
+                               Strings*) {}
+template <typename Tuple, size_t I>
+void TersePrintPrefixToStrings(const Tuple& t,
+                               std::integral_constant<size_t, I>,
+                               Strings* strings) {
+  TersePrintPrefixToStrings(t, std::integral_constant<size_t, I - 1>(),
+                            strings);
+  ::std::stringstream ss;
+  UniversalTersePrint(std::get<I - 1>(t), &ss);
+  strings->push_back(ss.str());
+}
+
+// Prints the fields of a tuple tersely to a string vector, one
+// element for each field.  See the comment before
+// UniversalTersePrint() for how we define "tersely".
+template <typename Tuple>
+Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
+  Strings result;
+  TersePrintPrefixToStrings(
+      value, std::integral_constant<size_t, std::tuple_size<Tuple>::value>(),
+      &result);
+  return result;
 }
 
 }  // namespace internal
+
+template <typename T>
+::std::string PrintToString(const T& value) {
+  ::std::stringstream ss;
+  internal::UniversalTersePrinter<T>::Print(value, &ss);
+  return ss.str();
+}
+
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
-// Copyright 2007, Google Inc.
+// Include any custom printer added by the local installation.
+// We must include this header at the end to make sure it can use the
+// declarations from this file.
+// Copyright 2015, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -9220,1412 +6106,1480 @@ linked_ptr<T> make_linked_ptr(T* ptr) {
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-// Google Test - The Google C++ Testing Framework
-//
-// This file implements a universal value printer that can print a
-// value of any type T:
-//
-//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
-//
-// A user can teach this function how to print a class type T by
-// defining either operator<<() or PrintTo() in the namespace that
-// defines T.  More specifically, the FIRST defined function in the
-// following list will be used (assuming T is defined in namespace
-// foo):
-//
-//   1. foo::PrintTo(const T&, ostream*)
-//   2. operator<<(ostream&, const T&) defined in either foo or the
-//      global namespace.
-//
-// If none of the above is defined, it will print the debug string of
-// the value if it is a protocol buffer, or print the raw bytes in the
-// value otherwise.
-//
-// To aid debugging: when T is a reference type, the address of the
-// value is also printed; when T is a (const) char pointer, both the
-// pointer value and the NUL-terminated string it points to are
-// printed.
-//
-// We also provide some convenient wrappers:
-//
-//   // Prints a value to a string.  For a (const or not) char
-//   // pointer, the NUL-terminated string (but not the pointer) is
-//   // printed.
-//   std::string ::testing::PrintToString(const T& value);
-//
-//   // Prints a value tersely: for a reference type, the referenced
-//   // value (but not the address) is printed; for a (const or not) char
-//   // pointer, the NUL-terminated string (but not the pointer) is
-//   // printed.
-//   void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
-//
-//   // Prints value using the type inferred by the compiler.  The difference
-//   // from UniversalTersePrint() is that this function prints both the
-//   // pointer and the NUL-terminated string for a (const or not) char pointer.
-//   void ::testing::internal::UniversalPrint(const T& value, ostream*);
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
-//   // Prints the fields of a tuple tersely to a string vector, one
-//   // element for each field. Tuple support must be enabled in
-//   // gtest-port.h.
-//   std::vector<string> UniversalTersePrintTupleFieldsToStrings(
-//       const Tuple& value);
+// This file provides an injection point for custom printers in a local
+// installation of gTest.
+// It will be included from gtest-printers.h and the overrides in this file
+// will be visible to everyone.
 //
-// Known limitation:
+// Injection point for custom user configurations. See README for details
 //
-// The print primitives print the elements of an STL-style container
-// using the compiler-inferred type of *iter where iter is a
-// const_iterator of the container.  When const_iterator is an input
-// iterator but not a forward iterator, this inferred type may not
-// match value_type, and the print output may be incorrect.  In
-// practice, this is rarely a problem as for most containers
-// const_iterator is a forward iterator.  We'll fix this if there's an
-// actual need for it.  Note that this fix cannot rely on value_type
-// being defined as many user-defined container types don't have
-// value_type.
+// ** Custom implementation starts here **
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
 
-#include <ostream>  // NOLINT
-#include <sstream>
-#include <string>
-#include <utility>
-#include <vector>
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
 
-namespace testing {
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
 
-// Definitions in the 'internal' and 'internal2' name spaces are
-// subject to change without notice.  DO NOT USE THEM IN USER CODE!
-namespace internal2 {
+// MSVC warning C5046 is new as of VS2017 version 15.8.
+#if defined(_MSC_VER) && _MSC_VER >= 1915
+#define GTEST_MAYBE_5046_ 5046
+#else
+#define GTEST_MAYBE_5046_
+#endif
 
-// Prints the given number of bytes in the given object to the given
-// ostream.
-GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
-                                     size_t count,
-                                     ::std::ostream* os);
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(
+    4251 GTEST_MAYBE_5046_ /* class A needs to have dll-interface to be used by
+                              clients of class B */
+    /* Symbol involving type with internal linkage not defined */)
 
-// For selecting which printer to use when a given type has neither <<
-// nor PrintTo().
-enum TypeKind {
-  kProtobuf,              // a protobuf type
-  kConvertibleToInteger,  // a type implicitly convertible to BiggestInt
-                          // (e.g. a named or unnamed enum type)
-  kOtherType              // anything else
-};
+namespace testing {
+
+// To implement a matcher Foo for type T, define:
+//   1. a class FooMatcherMatcher that implements the matcher interface:
+//     using is_gtest_matcher = void;
+//     bool MatchAndExplain(const T&, std::ostream*);
+//       (MatchResultListener* can also be used instead of std::ostream*)
+//     void DescribeTo(std::ostream*);
+//     void DescribeNegationTo(std::ostream*);
+//
+//   2. a factory function that creates a Matcher<T> object from a
+//      FooMatcherMatcher.
 
-// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
-// by the universal printer to print a value of type T when neither
-// operator<< nor PrintTo() is defined for T, where kTypeKind is the
-// "kind" of T as defined by enum TypeKind.
-template <typename T, TypeKind kTypeKind>
-class TypeWithoutFormatter {
+class MatchResultListener {
  public:
-  // This default version is called when kTypeKind is kOtherType.
-  static void PrintValue(const T& value, ::std::ostream* os) {
-    PrintBytesInObjectTo(reinterpret_cast<const unsigned char*>(&value),
-                         sizeof(value), os);
+  // Creates a listener object with the given underlying ostream.  The
+  // listener does not own the ostream, and does not dereference it
+  // in the constructor or destructor.
+  explicit MatchResultListener(::std::ostream* os) : stream_(os) {}
+  virtual ~MatchResultListener() = 0;  // Makes this class abstract.
+
+  // Streams x to the underlying ostream; does nothing if the ostream
+  // is NULL.
+  template <typename T>
+  MatchResultListener& operator<<(const T& x) {
+    if (stream_ != nullptr) *stream_ << x;
+    return *this;
   }
-};
 
-// We print a protobuf using its ShortDebugString() when the string
-// doesn't exceed this many characters; otherwise we print it using
-// DebugString() for better readability.
-const size_t kProtobufOneLinerMaxLength = 50;
+  // Returns the underlying ostream.
+  ::std::ostream* stream() { return stream_; }
 
-template <typename T>
-class TypeWithoutFormatter<T, kProtobuf> {
- public:
-  static void PrintValue(const T& value, ::std::ostream* os) {
-    const ::testing::internal::string short_str = value.ShortDebugString();
-    const ::testing::internal::string pretty_str =
-        short_str.length() <= kProtobufOneLinerMaxLength ?
-        short_str : ("\n" + value.DebugString());
-    *os << ("<" + pretty_str + ">");
-  }
-};
+  // Returns true if and only if the listener is interested in an explanation
+  // of the match result.  A matcher's MatchAndExplain() method can use
+  // this information to avoid generating the explanation when no one
+  // intends to hear it.
+  bool IsInterested() const { return stream_ != nullptr; }
 
-template <typename T>
-class TypeWithoutFormatter<T, kConvertibleToInteger> {
- public:
-  // Since T has no << operator or PrintTo() but can be implicitly
-  // converted to BiggestInt, we print it as a BiggestInt.
-  //
-  // Most likely T is an enum type (either named or unnamed), in which
-  // case printing it as an integer is the desired behavior.  In case
-  // T is not an enum, printing it as an integer is the best we can do
-  // given that it has no user-defined printer.
-  static void PrintValue(const T& value, ::std::ostream* os) {
-    const internal::BiggestInt kBigInt = value;
-    *os << kBigInt;
-  }
+ private:
+  ::std::ostream* const stream_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(MatchResultListener);
 };
 
-// Prints the given value to the given ostream.  If the value is a
-// protocol message, its debug string is printed; if it's an enum or
-// of a type implicitly convertible to BiggestInt, it's printed as an
-// integer; otherwise the bytes in the value are printed.  This is
-// what UniversalPrinter<T>::Print() does when it knows nothing about
-// type T and T has neither << operator nor PrintTo().
-//
-// A user can override this behavior for a class type Foo by defining
-// a << operator in the namespace where Foo is defined.
-//
-// We put this operator in namespace 'internal2' instead of 'internal'
-// to simplify the implementation, as much code in 'internal' needs to
-// use << in STL, which would conflict with our own << were it defined
-// in 'internal'.
-//
-// Note that this operator<< takes a generic std::basic_ostream<Char,
-// CharTraits> type instead of the more restricted std::ostream.  If
-// we define it to take an std::ostream instead, we'll get an
-// "ambiguous overloads" compiler error when trying to print a type
-// Foo that supports streaming to std::basic_ostream<Char,
-// CharTraits>, as the compiler cannot tell whether
-// operator<<(std::ostream&, const T&) or
-// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
-// specific.
-template <typename Char, typename CharTraits, typename T>
-::std::basic_ostream<Char, CharTraits>& operator<<(
-    ::std::basic_ostream<Char, CharTraits>& os, const T& x) {
-  TypeWithoutFormatter<T,
-      (internal::IsAProtocolMessage<T>::value ? kProtobuf :
-       internal::ImplicitlyConvertible<const T&, internal::BiggestInt>::value ?
-       kConvertibleToInteger : kOtherType)>::PrintValue(x, &os);
-  return os;
+inline MatchResultListener::~MatchResultListener() {
 }
 
-}  // namespace internal2
-}  // namespace testing
+// An instance of a subclass of this knows how to describe itself as a
+// matcher.
+class GTEST_API_ MatcherDescriberInterface {
+ public:
+  virtual ~MatcherDescriberInterface() {}
+
+  // Describes this matcher to an ostream.  The function should print
+  // a verb phrase that describes the property a value matching this
+  // matcher should have.  The subject of the verb phrase is the value
+  // being matched.  For example, the DescribeTo() method of the Gt(7)
+  // matcher prints "is greater than 7".
+  virtual void DescribeTo(::std::ostream* os) const = 0;
 
-// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
-// magic needed for implementing UniversalPrinter won't work.
-namespace testing_internal {
+  // Describes the negation of this matcher to an ostream.  For
+  // example, if the description of this matcher is "is greater than
+  // 7", the negated description could be "is not greater than 7".
+  // You are not required to override this when implementing
+  // MatcherInterface, but it is highly advised so that your matcher
+  // can produce good error messages.
+  virtual void DescribeNegationTo(::std::ostream* os) const {
+    *os << "not (";
+    DescribeTo(os);
+    *os << ")";
+  }
+};
 
-// Used to print a value that is not an STL-style container when the
-// user doesn't define PrintTo() for it.
+// The implementation of a matcher.
 template <typename T>
-void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
-  // With the following statement, during unqualified name lookup,
-  // testing::internal2::operator<< appears as if it was declared in
-  // the nearest enclosing namespace that contains both
-  // ::testing_internal and ::testing::internal2, i.e. the global
-  // namespace.  For more details, refer to the C++ Standard section
-  // 7.3.4-1 [namespace.udir].  This allows us to fall back onto
-  // testing::internal2::operator<< in case T doesn't come with a <<
-  // operator.
+class MatcherInterface : public MatcherDescriberInterface {
+ public:
+  // Returns true if and only if the matcher matches x; also explains the
+  // match result to 'listener' if necessary (see the next paragraph), in
+  // the form of a non-restrictive relative clause ("which ...",
+  // "whose ...", etc) that describes x.  For example, the
+  // MatchAndExplain() method of the Pointee(...) matcher should
+  // generate an explanation like "which points to ...".
   //
-  // We cannot write 'using ::testing::internal2::operator<<;', which
-  // gcc 3.3 fails to compile due to a compiler bug.
-  using namespace ::testing::internal2;  // NOLINT
-
-  // Assuming T is defined in namespace foo, in the next statement,
-  // the compiler will consider all of:
+  // Implementations of MatchAndExplain() should add an explanation of
+  // the match result *if and only if* they can provide additional
+  // information that's not already present (or not obvious) in the
+  // print-out of x and the matcher's description.  Whether the match
+  // succeeds is not a factor in deciding whether an explanation is
+  // needed, as sometimes the caller needs to print a failure message
+  // when the match succeeds (e.g. when the matcher is used inside
+  // Not()).
   //
-  //   1. foo::operator<< (thanks to Koenig look-up),
-  //   2. ::operator<< (as the current namespace is enclosed in ::),
-  //   3. testing::internal2::operator<< (thanks to the using statement above).
+  // For example, a "has at least 10 elements" matcher should explain
+  // what the actual element count is, regardless of the match result,
+  // as it is useful information to the reader; on the other hand, an
+  // "is empty" matcher probably only needs to explain what the actual
+  // size is when the match fails, as it's redundant to say that the
+  // size is 0 when the value is already known to be empty.
   //
-  // The operator<< whose type matches T best will be picked.
+  // You should override this method when defining a new matcher.
   //
-  // We deliberately allow #2 to be a candidate, as sometimes it's
-  // impossible to define #1 (e.g. when foo is ::std, defining
-  // anything in it is undefined behavior unless you are a compiler
-  // vendor.).
-  *os << value;
-}
+  // It's the responsibility of the caller (Google Test) to guarantee
+  // that 'listener' is not NULL.  This helps to simplify a matcher's
+  // implementation when it doesn't care about the performance, as it
+  // can talk to 'listener' without checking its validity first.
+  // However, in order to implement dummy listeners efficiently,
+  // listener->stream() may be NULL.
+  virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0;
 
-}  // namespace testing_internal
+  // Inherits these methods from MatcherDescriberInterface:
+  //   virtual void DescribeTo(::std::ostream* os) const = 0;
+  //   virtual void DescribeNegationTo(::std::ostream* os) const;
+};
 
-namespace testing {
 namespace internal {
 
-// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
-// value to the given ostream.  The caller must ensure that
-// 'ostream_ptr' is not NULL, or the behavior is undefined.
-//
-// We define UniversalPrinter as a class template (as opposed to a
-// function template), as we need to partially specialize it for
-// reference types, which cannot be done with function templates.
+struct AnyEq {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a == b; }
+};
+struct AnyNe {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a != b; }
+};
+struct AnyLt {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a < b; }
+};
+struct AnyGt {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a > b; }
+};
+struct AnyLe {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a <= b; }
+};
+struct AnyGe {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a >= b; }
+};
+
+// A match result listener that ignores the explanation.
+class DummyMatchResultListener : public MatchResultListener {
+ public:
+  DummyMatchResultListener() : MatchResultListener(nullptr) {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(DummyMatchResultListener);
+};
+
+// A match result listener that forwards the explanation to a given
+// ostream.  The difference between this and MatchResultListener is
+// that the former is concrete.
+class StreamMatchResultListener : public MatchResultListener {
+ public:
+  explicit StreamMatchResultListener(::std::ostream* os)
+      : MatchResultListener(os) {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamMatchResultListener);
+};
+
+struct SharedPayloadBase {
+  std::atomic<int> ref{1};
+  void Ref() { ref.fetch_add(1, std::memory_order_relaxed); }
+  bool Unref() { return ref.fetch_sub(1, std::memory_order_acq_rel) == 1; }
+};
+
 template <typename T>
-class UniversalPrinter;
+struct SharedPayload : SharedPayloadBase {
+  explicit SharedPayload(const T& v) : value(v) {}
+  explicit SharedPayload(T&& v) : value(std::move(v)) {}
+
+  static void Destroy(SharedPayloadBase* shared) {
+    delete static_cast<SharedPayload*>(shared);
+  }
+
+  T value;
+};
 
+// An internal class for implementing Matcher<T>, which will derive
+// from it.  We put functionalities common to all Matcher<T>
+// specializations here to avoid code duplication.
 template <typename T>
-void UniversalPrint(const T& value, ::std::ostream* os);
+class MatcherBase : private MatcherDescriberInterface {
+ public:
+  // Returns true if and only if the matcher matches x; also explains the
+  // match result to 'listener'.
+  bool MatchAndExplain(const T& x, MatchResultListener* listener) const {
+    GTEST_CHECK_(vtable_ != nullptr);
+    return vtable_->match_and_explain(*this, x, listener);
+  }
 
-// Used to print an STL-style container when the user doesn't define
-// a PrintTo() for it.
-template <typename C>
-void DefaultPrintTo(IsContainer /* dummy */,
-                    false_type /* is not a pointer */,
-                    const C& container, ::std::ostream* os) {
-  const size_t kMaxCount = 32;  // The maximum number of elements to print.
-  *os << '{';
-  size_t count = 0;
-  for (typename C::const_iterator it = container.begin();
-       it != container.end(); ++it, ++count) {
-    if (count > 0) {
-      *os << ',';
-      if (count == kMaxCount) {  // Enough has been printed.
-        *os << " ...";
-        break;
-      }
-    }
-    *os << ' ';
-    // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
-    // handle *it being a native array.
-    internal::UniversalPrint(*it, os);
+  // Returns true if and only if this matcher matches x.
+  bool Matches(const T& x) const {
+    DummyMatchResultListener dummy;
+    return MatchAndExplain(x, &dummy);
   }
 
-  if (count > 0) {
-    *os << ' ';
+  // Describes this matcher to an ostream.
+  void DescribeTo(::std::ostream* os) const final {
+    GTEST_CHECK_(vtable_ != nullptr);
+    vtable_->describe(*this, os, false);
   }
-  *os << '}';
-}
 
-// Used to print a pointer that is neither a char pointer nor a member
-// pointer, when the user doesn't define PrintTo() for it.  (A member
-// variable pointer or member function pointer doesn't really point to
-// a location in the address space.  Their representation is
-// implementation-defined.  Therefore they will be printed as raw
-// bytes.)
-template <typename T>
-void DefaultPrintTo(IsNotContainer /* dummy */,
-                    true_type /* is a pointer */,
-                    T* p, ::std::ostream* os) {
-  if (p == NULL) {
-    *os << "NULL";
-  } else {
-    // C++ doesn't allow casting from a function pointer to any object
-    // pointer.
-    //
-    // IsTrue() silences warnings: "Condition is always true",
-    // "unreachable code".
-    if (IsTrue(ImplicitlyConvertible<T*, const void*>::value)) {
-      // T is not a function type.  We just call << to print p,
-      // relying on ADL to pick up user-defined << for their pointer
-      // types, if any.
-      *os << p;
-    } else {
-      // T is a function type, so '*os << p' doesn't do what we want
-      // (it just prints p as bool).  We want to print p as a const
-      // void*.  However, we cannot cast it to const void* directly,
-      // even using reinterpret_cast, as earlier versions of gcc
-      // (e.g. 3.4.5) cannot compile the cast when p is a function
-      // pointer.  Casting to UInt64 first solves the problem.
-      *os << reinterpret_cast<const void*>(
-          reinterpret_cast<internal::UInt64>(p));
-    }
+  // Describes the negation of this matcher to an ostream.
+  void DescribeNegationTo(::std::ostream* os) const final {
+    GTEST_CHECK_(vtable_ != nullptr);
+    vtable_->describe(*this, os, true);
   }
-}
 
-// Used to print a non-container, non-pointer value when the user
-// doesn't define PrintTo() for it.
-template <typename T>
-void DefaultPrintTo(IsNotContainer /* dummy */,
-                    false_type /* is not a pointer */,
-                    const T& value, ::std::ostream* os) {
-  ::testing_internal::DefaultPrintNonContainerTo(value, os);
-}
+  // Explains why x matches, or doesn't match, the matcher.
+  void ExplainMatchResultTo(const T& x, ::std::ostream* os) const {
+    StreamMatchResultListener listener(os);
+    MatchAndExplain(x, &listener);
+  }
 
-// Prints the given value using the << operator if it has one;
-// otherwise prints the bytes in it.  This is what
-// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
-// or overloaded for type T.
-//
-// A user can override this behavior for a class type Foo by defining
-// an overload of PrintTo() in the namespace where Foo is defined.  We
-// give the user this option as sometimes defining a << operator for
-// Foo is not desirable (e.g. the coding style may prevent doing it,
-// or there is already a << operator but it doesn't do what the user
-// wants).
-template <typename T>
-void PrintTo(const T& value, ::std::ostream* os) {
-  // DefaultPrintTo() is overloaded.  The type of its first two
-  // arguments determine which version will be picked.  If T is an
-  // STL-style container, the version for container will be called; if
-  // T is a pointer, the pointer version will be called; otherwise the
-  // generic version will be called.
-  //
-  // Note that we check for container types here, prior to we check
-  // for protocol message types in our operator<<.  The rationale is:
-  //
-  // For protocol messages, we want to give people a chance to
-  // override Google Mock's format by defining a PrintTo() or
-  // operator<<.  For STL containers, other formats can be
-  // incompatible with Google Mock's format for the container
-  // elements; therefore we check for container types here to ensure
-  // that our format is used.
-  //
-  // The second argument of DefaultPrintTo() is needed to bypass a bug
-  // in Symbian's C++ compiler that prevents it from picking the right
-  // overload between:
-  //
-  //   PrintTo(const T& x, ...);
-  //   PrintTo(T* x, ...);
-  DefaultPrintTo(IsContainerTest<T>(0), is_pointer<T>(), value, os);
-}
+  // Returns the describer for this matcher object; retains ownership
+  // of the describer, which is only guaranteed to be alive when
+  // this matcher object is alive.
+  const MatcherDescriberInterface* GetDescriber() const {
+    if (vtable_ == nullptr) return nullptr;
+    return vtable_->get_describer(*this);
+  }
 
-// The following list of PrintTo() overloads tells
-// UniversalPrinter<T>::Print() how to print standard types (built-in
-// types, strings, plain arrays, and pointers).
+ protected:
+  MatcherBase() : vtable_(nullptr) {}
 
-// Overloads for various char types.
-GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
-GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
-inline void PrintTo(char c, ::std::ostream* os) {
-  // When printing a plain char, we always treat it as unsigned.  This
-  // way, the output won't be affected by whether the compiler thinks
-  // char is signed or not.
-  PrintTo(static_cast<unsigned char>(c), os);
-}
+  // Constructs a matcher from its implementation.
+  template <typename U>
+  explicit MatcherBase(const MatcherInterface<U>* impl) {
+    Init(impl);
+  }
 
-// Overloads for other simple built-in types.
-inline void PrintTo(bool x, ::std::ostream* os) {
-  *os << (x ? "true" : "false");
-}
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  MatcherBase(M&& m) {  // NOLINT
+    Init(std::forward<M>(m));
+  }
 
-// Overload for wchar_t type.
-// Prints a wchar_t as a symbol if it is printable or as its internal
-// code otherwise and also as its decimal code (except for L'\0').
-// The L'\0' char is printed as "L'\\0'". The decimal code is printed
-// as signed integer when wchar_t is implemented by the compiler
-// as a signed type and is printed as an unsigned integer when wchar_t
-// is implemented as an unsigned type.
-GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
+  MatcherBase(const MatcherBase& other)
+      : vtable_(other.vtable_), buffer_(other.buffer_) {
+    if (IsShared()) buffer_.shared->Ref();
+  }
 
-// Overloads for C strings.
-GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
-inline void PrintTo(char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const char*>(s), os);
-}
+  MatcherBase& operator=(const MatcherBase& other) {
+    if (this == &other) return *this;
+    Destroy();
+    vtable_ = other.vtable_;
+    buffer_ = other.buffer_;
+    if (IsShared()) buffer_.shared->Ref();
+    return *this;
+  }
 
-// signed/unsigned char is often used for representing binary data, so
-// we print pointers to it as void* to be safe.
-inline void PrintTo(const signed char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const void*>(s), os);
-}
-inline void PrintTo(signed char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const void*>(s), os);
-}
-inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const void*>(s), os);
-}
-inline void PrintTo(unsigned char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const void*>(s), os);
-}
+  MatcherBase(MatcherBase&& other)
+      : vtable_(other.vtable_), buffer_(other.buffer_) {
+    other.vtable_ = nullptr;
+  }
 
-// MSVC can be configured to define wchar_t as a typedef of unsigned
-// short.  It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
-// type.  When wchar_t is a typedef, defining an overload for const
-// wchar_t* would cause unsigned short* be printed as a wide string,
-// possibly causing invalid memory accesses.
-#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
-// Overloads for wide C strings
-GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
-inline void PrintTo(wchar_t* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const wchar_t*>(s), os);
-}
-#endif
+  MatcherBase& operator=(MatcherBase&& other) {
+    if (this == &other) return *this;
+    Destroy();
+    vtable_ = other.vtable_;
+    buffer_ = other.buffer_;
+    other.vtable_ = nullptr;
+    return *this;
+  }
 
-// Overload for C arrays.  Multi-dimensional arrays are printed
-// properly.
+  ~MatcherBase() override { Destroy(); }
 
-// Prints the given number of elements in an array, without printing
-// the curly braces.
-template <typename T>
-void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
-  UniversalPrint(a[0], os);
-  for (size_t i = 1; i != count; i++) {
-    *os << ", ";
-    UniversalPrint(a[i], os);
+ private:
+  struct VTable {
+    bool (*match_and_explain)(const MatcherBase&, const T&,
+                              MatchResultListener*);
+    void (*describe)(const MatcherBase&, std::ostream*, bool negation);
+    // Returns the captured object if it implements the interface, otherwise
+    // returns the MatcherBase itself.
+    const MatcherDescriberInterface* (*get_describer)(const MatcherBase&);
+    // Called on shared instances when the reference count reaches 0.
+    void (*shared_destroy)(SharedPayloadBase*);
+  };
+
+  bool IsShared() const {
+    return vtable_ != nullptr && vtable_->shared_destroy != nullptr;
   }
-}
 
-// Overloads for ::string and ::std::string.
-#if GTEST_HAS_GLOBAL_STRING
-GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os);
-inline void PrintTo(const ::string& s, ::std::ostream* os) {
-  PrintStringTo(s, os);
-}
-#endif  // GTEST_HAS_GLOBAL_STRING
+  // If the implementation uses a listener, call that.
+  template <typename P>
+  static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
+                                  MatchResultListener* listener)
+      -> decltype(P::Get(m).MatchAndExplain(value, listener->stream())) {
+    return P::Get(m).MatchAndExplain(value, listener->stream());
+  }
+
+  template <typename P>
+  static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
+                                  MatchResultListener* listener)
+      -> decltype(P::Get(m).MatchAndExplain(value, listener)) {
+    return P::Get(m).MatchAndExplain(value, listener);
+  }
+
+  template <typename P>
+  static void DescribeImpl(const MatcherBase& m, std::ostream* os,
+                           bool negation) {
+    if (negation) {
+      P::Get(m).DescribeNegationTo(os);
+    } else {
+      P::Get(m).DescribeTo(os);
+    }
+  }
+
+  template <typename P>
+  static const MatcherDescriberInterface* GetDescriberImpl(
+      const MatcherBase& m) {
+    // If the impl is a MatcherDescriberInterface, then return it.
+    // Otherwise use MatcherBase itself.
+    // This allows us to implement the GetDescriber() function without support
+    // from the impl, but some users really want to get their impl back when
+    // they call GetDescriber().
+    // We use std::get on a tuple as a workaround of not having `if constexpr`.
+    return std::get<(
+        std::is_convertible<decltype(&P::Get(m)),
+                            const MatcherDescriberInterface*>::value
+            ? 1
+            : 0)>(std::make_tuple(&m, &P::Get(m)));
+  }
+
+  template <typename P>
+  const VTable* GetVTable() {
+    static constexpr VTable kVTable = {&MatchAndExplainImpl<P>,
+                                       &DescribeImpl<P>, &GetDescriberImpl<P>,
+                                       P::shared_destroy};
+    return &kVTable;
+  }
+
+  union Buffer {
+    // Add some types to give Buffer some common alignment/size use cases.
+    void* ptr;
+    double d;
+    int64_t i;
+    // And add one for the out-of-line cases.
+    SharedPayloadBase* shared;
+  };
+
+  void Destroy() {
+    if (IsShared() && buffer_.shared->Unref()) {
+      vtable_->shared_destroy(buffer_.shared);
+    }
+  }
+
+  template <typename M>
+  static constexpr bool IsInlined() {
+    return sizeof(M) <= sizeof(Buffer) && alignof(M) <= alignof(Buffer) &&
+           std::is_trivially_copy_constructible<M>::value &&
+           std::is_trivially_destructible<M>::value;
+  }
+
+  template <typename M, bool = MatcherBase::IsInlined<M>()>
+  struct ValuePolicy {
+    static const M& Get(const MatcherBase& m) {
+      // When inlined along with Init, need to be explicit to avoid violating
+      // strict aliasing rules.
+      const M *ptr = static_cast<const M*>(
+          static_cast<const void*>(&m.buffer_));
+      return *ptr;
+    }
+    static void Init(MatcherBase& m, M impl) {
+      ::new (static_cast<void*>(&m.buffer_)) M(impl);
+    }
+    static constexpr auto shared_destroy = nullptr;
+  };
+
+  template <typename M>
+  struct ValuePolicy<M, false> {
+    using Shared = SharedPayload<M>;
+    static const M& Get(const MatcherBase& m) {
+      return static_cast<Shared*>(m.buffer_.shared)->value;
+    }
+    template <typename Arg>
+    static void Init(MatcherBase& m, Arg&& arg) {
+      m.buffer_.shared = new Shared(std::forward<Arg>(arg));
+    }
+    static constexpr auto shared_destroy = &Shared::Destroy;
+  };
+
+  template <typename U, bool B>
+  struct ValuePolicy<const MatcherInterface<U>*, B> {
+    using M = const MatcherInterface<U>;
+    using Shared = SharedPayload<std::unique_ptr<M>>;
+    static const M& Get(const MatcherBase& m) {
+      return *static_cast<Shared*>(m.buffer_.shared)->value;
+    }
+    static void Init(MatcherBase& m, M* impl) {
+      m.buffer_.shared = new Shared(std::unique_ptr<M>(impl));
+    }
 
-GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os);
-inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
-  PrintStringTo(s, os);
-}
+    static constexpr auto shared_destroy = &Shared::Destroy;
+  };
 
-// Overloads for ::wstring and ::std::wstring.
-#if GTEST_HAS_GLOBAL_WSTRING
-GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os);
-inline void PrintTo(const ::wstring& s, ::std::ostream* os) {
-  PrintWideStringTo(s, os);
-}
-#endif  // GTEST_HAS_GLOBAL_WSTRING
+  template <typename M>
+  void Init(M&& m) {
+    using MM = typename std::decay<M>::type;
+    using Policy = ValuePolicy<MM>;
+    vtable_ = GetVTable<Policy>();
+    Policy::Init(*this, std::forward<M>(m));
+  }
 
-#if GTEST_HAS_STD_WSTRING
-GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os);
-inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
-  PrintWideStringTo(s, os);
-}
-#endif  // GTEST_HAS_STD_WSTRING
+  const VTable* vtable_;
+  Buffer buffer_;
+};
 
-#if GTEST_HAS_TR1_TUPLE
-// Overload for ::std::tr1::tuple.  Needed for printing function arguments,
-// which are packed as tuples.
+}  // namespace internal
 
-// Helper function for printing a tuple.  T must be instantiated with
-// a tuple type.
+// A Matcher<T> is a copyable and IMMUTABLE (except by assignment)
+// object that can check whether a value of type T matches.  The
+// implementation of Matcher<T> is just a std::shared_ptr to const
+// MatcherInterface<T>.  Don't inherit from Matcher!
 template <typename T>
-void PrintTupleTo(const T& t, ::std::ostream* os);
+class Matcher : public internal::MatcherBase<T> {
+ public:
+  // Constructs a null matcher.  Needed for storing Matcher objects in STL
+  // containers.  A default-constructed matcher is not yet initialized.  You
+  // cannot use it until a valid value has been assigned to it.
+  explicit Matcher() {}  // NOLINT
 
-// Overloaded PrintTo() for tuples of various arities.  We support
-// tuples of up-to 10 fields.  The following implementation works
-// regardless of whether tr1::tuple is implemented using the
-// non-standard variadic template feature or not.
+  // Constructs a matcher from its implementation.
+  explicit Matcher(const MatcherInterface<const T&>* impl)
+      : internal::MatcherBase<T>(impl) {}
 
-inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+  template <typename U>
+  explicit Matcher(
+      const MatcherInterface<U>* impl,
+      typename std::enable_if<!std::is_same<U, const U&>::value>::type* =
+          nullptr)
+      : internal::MatcherBase<T>(impl) {}
 
-template <typename T1>
-void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m) : internal::MatcherBase<T>(std::forward<M>(m)) {}  // NOLINT
 
-template <typename T1, typename T2>
-void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+  // Implicit constructor here allows people to write
+  // EXPECT_CALL(foo, Bar(5)) instead of EXPECT_CALL(foo, Bar(Eq(5))) sometimes
+  Matcher(T value);  // NOLINT
+};
 
-template <typename T1, typename T2, typename T3>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+// The following two specializations allow the user to write str
+// instead of Eq(str) and "foo" instead of Eq("foo") when a std::string
+// matcher is expected.
+template <>
+class GTEST_API_ Matcher<const std::string&>
+    : public internal::MatcherBase<const std::string&> {
+ public:
+  Matcher() {}
 
-template <typename T1, typename T2, typename T3, typename T4>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+  explicit Matcher(const MatcherInterface<const std::string&>* impl)
+      : internal::MatcherBase<const std::string&>(impl) {}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t,
-             ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<const std::string&>(std::forward<M>(m)) {}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-          typename T6>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t,
-             ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+  // Allows the user to write str instead of Eq(str) sometimes, where
+  // str is a std::string object.
+  Matcher(const std::string& s);  // NOLINT
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-          typename T6, typename T7>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t,
-             ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+  // Allows the user to write "foo" instead of Eq("foo") sometimes.
+  Matcher(const char* s);  // NOLINT
+};
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-          typename T6, typename T7, typename T8>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t,
-             ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+template <>
+class GTEST_API_ Matcher<std::string>
+    : public internal::MatcherBase<std::string> {
+ public:
+  Matcher() {}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-          typename T6, typename T7, typename T8, typename T9>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t,
-             ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
+  explicit Matcher(const MatcherInterface<const std::string&>* impl)
+      : internal::MatcherBase<std::string>(impl) {}
+  explicit Matcher(const MatcherInterface<std::string>* impl)
+      : internal::MatcherBase<std::string>(impl) {}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-          typename T6, typename T7, typename T8, typename T9, typename T10>
-void PrintTo(
-    const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t,
-    ::std::ostream* os) {
-  PrintTupleTo(t, os);
-}
-#endif  // GTEST_HAS_TR1_TUPLE
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<std::string>(std::forward<M>(m)) {}
 
-// Overload for std::pair.
-template <typename T1, typename T2>
-void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
-  *os << '(';
-  // We cannot use UniversalPrint(value.first, os) here, as T1 may be
-  // a reference type.  The same for printing value.second.
-  UniversalPrinter<T1>::Print(value.first, os);
-  *os << ", ";
-  UniversalPrinter<T2>::Print(value.second, os);
-  *os << ')';
-}
+  // Allows the user to write str instead of Eq(str) sometimes, where
+  // str is a string object.
+  Matcher(const std::string& s);  // NOLINT
 
-// Implements printing a non-reference type T by letting the compiler
-// pick the right overload of PrintTo() for T.
-template <typename T>
-class UniversalPrinter {
+  // Allows the user to write "foo" instead of Eq("foo") sometimes.
+  Matcher(const char* s);  // NOLINT
+};
+
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+// The following two specializations allow the user to write str
+// instead of Eq(str) and "foo" instead of Eq("foo") when a absl::string_view
+// matcher is expected.
+template <>
+class GTEST_API_ Matcher<const internal::StringView&>
+    : public internal::MatcherBase<const internal::StringView&> {
  public:
-  // MSVC warns about adding const to a function type, so we want to
-  // disable the warning.
-#ifdef _MSC_VER
-# pragma warning(push)          // Saves the current warning state.
-# pragma warning(disable:4180)  // Temporarily disables warning 4180.
-#endif  // _MSC_VER
+  Matcher() {}
 
-  // Note: we deliberately don't call this PrintTo(), as that name
-  // conflicts with ::testing::internal::PrintTo in the body of the
-  // function.
-  static void Print(const T& value, ::std::ostream* os) {
-    // By default, ::testing::internal::PrintTo() is used for printing
-    // the value.
-    //
-    // Thanks to Koenig look-up, if T is a class and has its own
-    // PrintTo() function defined in its namespace, that function will
-    // be visible here.  Since it is more specific than the generic ones
-    // in ::testing::internal, it will be picked by the compiler in the
-    // following statement - exactly what we want.
-    PrintTo(value, os);
+  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
+      : internal::MatcherBase<const internal::StringView&>(impl) {}
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<const internal::StringView&>(std::forward<M>(m)) {
   }
 
-#ifdef _MSC_VER
-# pragma warning(pop)           // Restores the warning state.
-#endif  // _MSC_VER
-};
+  // Allows the user to write str instead of Eq(str) sometimes, where
+  // str is a std::string object.
+  Matcher(const std::string& s);  // NOLINT
 
-// UniversalPrintArray(begin, len, os) prints an array of 'len'
-// elements, starting at address 'begin'.
-template <typename T>
-void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
-  if (len == 0) {
-    *os << "{}";
-  } else {
-    *os << "{ ";
-    const size_t kThreshold = 18;
-    const size_t kChunkSize = 8;
-    // If the array has more than kThreshold elements, we'll have to
-    // omit some details by printing only the first and the last
-    // kChunkSize elements.
-    // TODO(wan@google.com): let the user control the threshold using a flag.
-    if (len <= kThreshold) {
-      PrintRawArrayTo(begin, len, os);
-    } else {
-      PrintRawArrayTo(begin, kChunkSize, os);
-      *os << ", ..., ";
-      PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
-    }
-    *os << " }";
-  }
-}
-// This overload prints a (const) char array compactly.
-GTEST_API_ void UniversalPrintArray(
-    const char* begin, size_t len, ::std::ostream* os);
+  // Allows the user to write "foo" instead of Eq("foo") sometimes.
+  Matcher(const char* s);  // NOLINT
 
-// This overload prints a (const) wchar_t array compactly.
-GTEST_API_ void UniversalPrintArray(
-    const wchar_t* begin, size_t len, ::std::ostream* os);
+  // Allows the user to pass absl::string_views or std::string_views directly.
+  Matcher(internal::StringView s);  // NOLINT
+};
 
-// Implements printing an array type T[N].
-template <typename T, size_t N>
-class UniversalPrinter<T[N]> {
+template <>
+class GTEST_API_ Matcher<internal::StringView>
+    : public internal::MatcherBase<internal::StringView> {
  public:
-  // Prints the given array, omitting some elements when there are too
-  // many.
-  static void Print(const T (&a)[N], ::std::ostream* os) {
-    UniversalPrintArray(a, N, os);
-  }
+  Matcher() {}
+
+  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
+      : internal::MatcherBase<internal::StringView>(impl) {}
+  explicit Matcher(const MatcherInterface<internal::StringView>* impl)
+      : internal::MatcherBase<internal::StringView>(impl) {}
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<internal::StringView>(std::forward<M>(m)) {}
+
+  // Allows the user to write str instead of Eq(str) sometimes, where
+  // str is a std::string object.
+  Matcher(const std::string& s);  // NOLINT
+
+  // Allows the user to write "foo" instead of Eq("foo") sometimes.
+  Matcher(const char* s);  // NOLINT
+
+  // Allows the user to pass absl::string_views or std::string_views directly.
+  Matcher(internal::StringView s);  // NOLINT
 };
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
 
-// Implements printing a reference type T&.
+// Prints a matcher in a human-readable format.
 template <typename T>
-class UniversalPrinter<T&> {
+std::ostream& operator<<(std::ostream& os, const Matcher<T>& matcher) {
+  matcher.DescribeTo(&os);
+  return os;
+}
+
+// The PolymorphicMatcher class template makes it easy to implement a
+// polymorphic matcher (i.e. a matcher that can match values of more
+// than one type, e.g. Eq(n) and NotNull()).
+//
+// To define a polymorphic matcher, a user should provide an Impl
+// class that has a DescribeTo() method and a DescribeNegationTo()
+// method, and define a member function (or member function template)
+//
+//   bool MatchAndExplain(const Value& value,
+//                        MatchResultListener* listener) const;
+//
+// See the definition of NotNull() for a complete example.
+template <class Impl>
+class PolymorphicMatcher {
  public:
-  // MSVC warns about adding const to a function type, so we want to
-  // disable the warning.
-#ifdef _MSC_VER
-# pragma warning(push)          // Saves the current warning state.
-# pragma warning(disable:4180)  // Temporarily disables warning 4180.
-#endif  // _MSC_VER
+  explicit PolymorphicMatcher(const Impl& an_impl) : impl_(an_impl) {}
 
-  static void Print(const T& value, ::std::ostream* os) {
-    // Prints the address of the value.  We use reinterpret_cast here
-    // as static_cast doesn't compile when T is a function type.
-    *os << "@" << reinterpret_cast<const void*>(&value) << " ";
+  // Returns a mutable reference to the underlying matcher
+  // implementation object.
+  Impl& mutable_impl() { return impl_; }
 
-    // Then prints the value itself.
-    UniversalPrint(value, os);
+  // Returns an immutable reference to the underlying matcher
+  // implementation object.
+  const Impl& impl() const { return impl_; }
+
+  template <typename T>
+  operator Matcher<T>() const {
+    return Matcher<T>(new MonomorphicImpl<const T&>(impl_));
   }
 
-#ifdef _MSC_VER
-# pragma warning(pop)           // Restores the warning state.
-#endif  // _MSC_VER
-};
+ private:
+  template <typename T>
+  class MonomorphicImpl : public MatcherInterface<T> {
+   public:
+    explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {}
 
-// Prints a value tersely: for a reference type, the referenced value
-// (but not the address) is printed; for a (const) char pointer, the
-// NUL-terminated string (but not the pointer) is printed.
+    void DescribeTo(::std::ostream* os) const override { impl_.DescribeTo(os); }
+
+    void DescribeNegationTo(::std::ostream* os) const override {
+      impl_.DescribeNegationTo(os);
+    }
+
+    bool MatchAndExplain(T x, MatchResultListener* listener) const override {
+      return impl_.MatchAndExplain(x, listener);
+    }
+
+   private:
+    const Impl impl_;
+  };
+
+  Impl impl_;
+};
 
+// Creates a matcher from its implementation.
+// DEPRECATED: Especially in the generic code, prefer:
+//   Matcher<T>(new MyMatcherImpl<const T&>(...));
+//
+// MakeMatcher may create a Matcher that accepts its argument by value, which
+// leads to unnecessary copies & lack of support for non-copyable types.
 template <typename T>
-class UniversalTersePrinter {
+inline Matcher<T> MakeMatcher(const MatcherInterface<T>* impl) {
+  return Matcher<T>(impl);
+}
+
+// Creates a polymorphic matcher from its implementation.  This is
+// easier to use than the PolymorphicMatcher<Impl> constructor as it
+// doesn't require you to explicitly write the template argument, e.g.
+//
+//   MakePolymorphicMatcher(foo);
+// vs
+//   PolymorphicMatcher<TypeOfFoo>(foo);
+template <class Impl>
+inline PolymorphicMatcher<Impl> MakePolymorphicMatcher(const Impl& impl) {
+  return PolymorphicMatcher<Impl>(impl);
+}
+
+namespace internal {
+// Implements a matcher that compares a given value with a
+// pre-supplied value using one of the ==, <=, <, etc, operators.  The
+// two values being compared don't have to have the same type.
+//
+// The matcher defined here is polymorphic (for example, Eq(5) can be
+// used to match an int, a short, a double, etc).  Therefore we use
+// a template type conversion operator in the implementation.
+//
+// The following template definition assumes that the Rhs parameter is
+// a "bare" type (i.e. neither 'const T' nor 'T&').
+template <typename D, typename Rhs, typename Op>
+class ComparisonBase {
  public:
-  static void Print(const T& value, ::std::ostream* os) {
-    UniversalPrint(value, os);
+  explicit ComparisonBase(const Rhs& rhs) : rhs_(rhs) {}
+
+  using is_gtest_matcher = void;
+
+  template <typename Lhs>
+  bool MatchAndExplain(const Lhs& lhs, std::ostream*) const {
+    return Op()(lhs, Unwrap(rhs_));
+  }
+  void DescribeTo(std::ostream* os) const {
+    *os << D::Desc() << " ";
+    UniversalPrint(Unwrap(rhs_), os);
+  }
+  void DescribeNegationTo(std::ostream* os) const {
+    *os << D::NegatedDesc() << " ";
+    UniversalPrint(Unwrap(rhs_), os);
+  }
+
+ private:
+  template <typename T>
+  static const T& Unwrap(const T& v) {
+    return v;
+  }
+  template <typename T>
+  static const T& Unwrap(std::reference_wrapper<T> v) {
+    return v;
   }
+
+  Rhs rhs_;
 };
-template <typename T>
-class UniversalTersePrinter<T&> {
+
+template <typename Rhs>
+class EqMatcher : public ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq> {
  public:
-  static void Print(const T& value, ::std::ostream* os) {
-    UniversalPrint(value, os);
-  }
+  explicit EqMatcher(const Rhs& rhs)
+      : ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq>(rhs) { }
+  static const char* Desc() { return "is equal to"; }
+  static const char* NegatedDesc() { return "isn't equal to"; }
 };
-template <typename T, size_t N>
-class UniversalTersePrinter<T[N]> {
+template <typename Rhs>
+class NeMatcher : public ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe> {
  public:
-  static void Print(const T (&value)[N], ::std::ostream* os) {
-    UniversalPrinter<T[N]>::Print(value, os);
-  }
+  explicit NeMatcher(const Rhs& rhs)
+      : ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe>(rhs) { }
+  static const char* Desc() { return "isn't equal to"; }
+  static const char* NegatedDesc() { return "is equal to"; }
 };
-template <>
-class UniversalTersePrinter<const char*> {
+template <typename Rhs>
+class LtMatcher : public ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt> {
  public:
-  static void Print(const char* str, ::std::ostream* os) {
-    if (str == NULL) {
-      *os << "NULL";
-    } else {
-      UniversalPrint(string(str), os);
-    }
-  }
+  explicit LtMatcher(const Rhs& rhs)
+      : ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt>(rhs) { }
+  static const char* Desc() { return "is <"; }
+  static const char* NegatedDesc() { return "isn't <"; }
 };
-template <>
-class UniversalTersePrinter<char*> {
+template <typename Rhs>
+class GtMatcher : public ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt> {
  public:
-  static void Print(char* str, ::std::ostream* os) {
-    UniversalTersePrinter<const char*>::Print(str, os);
-  }
+  explicit GtMatcher(const Rhs& rhs)
+      : ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt>(rhs) { }
+  static const char* Desc() { return "is >"; }
+  static const char* NegatedDesc() { return "isn't >"; }
 };
-
-#if GTEST_HAS_STD_WSTRING
-template <>
-class UniversalTersePrinter<const wchar_t*> {
+template <typename Rhs>
+class LeMatcher : public ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe> {
  public:
-  static void Print(const wchar_t* str, ::std::ostream* os) {
-    if (str == NULL) {
-      *os << "NULL";
-    } else {
-      UniversalPrint(::std::wstring(str), os);
-    }
-  }
+  explicit LeMatcher(const Rhs& rhs)
+      : ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe>(rhs) { }
+  static const char* Desc() { return "is <="; }
+  static const char* NegatedDesc() { return "isn't <="; }
 };
-#endif
-
-template <>
-class UniversalTersePrinter<wchar_t*> {
+template <typename Rhs>
+class GeMatcher : public ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe> {
  public:
-  static void Print(wchar_t* str, ::std::ostream* os) {
-    UniversalTersePrinter<const wchar_t*>::Print(str, os);
-  }
+  explicit GeMatcher(const Rhs& rhs)
+      : ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe>(rhs) { }
+  static const char* Desc() { return "is >="; }
+  static const char* NegatedDesc() { return "isn't >="; }
 };
 
-template <typename T>
-void UniversalTersePrint(const T& value, ::std::ostream* os) {
-  UniversalTersePrinter<T>::Print(value, os);
-}
+template <typename T, typename = typename std::enable_if<
+                          std::is_constructible<std::string, T>::value>::type>
+using StringLike = T;
 
-// Prints a value using the type inferred by the compiler.  The
-// difference between this and UniversalTersePrint() is that for a
-// (const) char pointer, this prints both the pointer and the
-// NUL-terminated string.
-template <typename T>
-void UniversalPrint(const T& value, ::std::ostream* os) {
-  // A workaround for the bug in VC++ 7.1 that prevents us from instantiating
-  // UniversalPrinter with T directly.
-  typedef T T1;
-  UniversalPrinter<T1>::Print(value, os);
-}
+// Implements polymorphic matchers MatchesRegex(regex) and
+// ContainsRegex(regex), which can be used as a Matcher<T> as long as
+// T can be converted to a string.
+class MatchesRegexMatcher {
+ public:
+  MatchesRegexMatcher(const RE* regex, bool full_match)
+      : regex_(regex), full_match_(full_match) {}
+
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+  bool MatchAndExplain(const internal::StringView& s,
+                       MatchResultListener* listener) const {
+    return MatchAndExplain(std::string(s), listener);
+  }
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
 
-#if GTEST_HAS_TR1_TUPLE
-typedef ::std::vector<string> Strings;
+  // Accepts pointer types, particularly:
+  //   const char*
+  //   char*
+  //   const wchar_t*
+  //   wchar_t*
+  template <typename CharType>
+  bool MatchAndExplain(CharType* s, MatchResultListener* listener) const {
+    return s != nullptr && MatchAndExplain(std::string(s), listener);
+  }
 
-// This helper template allows PrintTo() for tuples and
-// UniversalTersePrintTupleFieldsToStrings() to be defined by
-// induction on the number of tuple fields.  The idea is that
-// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N
-// fields in tuple t, and can be defined in terms of
-// TuplePrefixPrinter<N - 1>.
+  // Matches anything that can convert to std::string.
+  //
+  // This is a template, not just a plain function with const std::string&,
+  // because absl::string_view has some interfering non-explicit constructors.
+  template <class MatcheeStringType>
+  bool MatchAndExplain(const MatcheeStringType& s,
+                       MatchResultListener* /* listener */) const {
+    const std::string& s2(s);
+    return full_match_ ? RE::FullMatch(s2, *regex_)
+                       : RE::PartialMatch(s2, *regex_);
+  }
 
-// The inductive case.
-template <size_t N>
-struct TuplePrefixPrinter {
-  // Prints the first N fields of a tuple.
-  template <typename Tuple>
-  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
-    TuplePrefixPrinter<N - 1>::PrintPrefixTo(t, os);
-    *os << ", ";
-    UniversalPrinter<typename ::std::tr1::tuple_element<N - 1, Tuple>::type>
-        ::Print(::std::tr1::get<N - 1>(t), os);
+  void DescribeTo(::std::ostream* os) const {
+    *os << (full_match_ ? "matches" : "contains") << " regular expression ";
+    UniversalPrinter<std::string>::Print(regex_->pattern(), os);
   }
 
-  // Tersely prints the first N fields of a tuple to a string vector,
-  // one element for each field.
-  template <typename Tuple>
-  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
-    TuplePrefixPrinter<N - 1>::TersePrintPrefixToStrings(t, strings);
-    ::std::stringstream ss;
-    UniversalTersePrint(::std::tr1::get<N - 1>(t), &ss);
-    strings->push_back(ss.str());
+  void DescribeNegationTo(::std::ostream* os) const {
+    *os << "doesn't " << (full_match_ ? "match" : "contain")
+        << " regular expression ";
+    UniversalPrinter<std::string>::Print(regex_->pattern(), os);
   }
+
+ private:
+  const std::shared_ptr<const RE> regex_;
+  const bool full_match_;
 };
+}  // namespace internal
 
-// Base cases.
-template <>
-struct TuplePrefixPrinter<0> {
-  template <typename Tuple>
-  static void PrintPrefixTo(const Tuple&, ::std::ostream*) {}
+// Matches a string that fully matches regular expression 'regex'.
+// The matcher takes ownership of 'regex'.
+inline PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
+    const internal::RE* regex) {
+  return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, true));
+}
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
+    const internal::StringLike<T>& regex) {
+  return MatchesRegex(new internal::RE(std::string(regex)));
+}
 
-  template <typename Tuple>
-  static void TersePrintPrefixToStrings(const Tuple&, Strings*) {}
-};
-// We have to specialize the entire TuplePrefixPrinter<> class
-// template here, even though the definition of
-// TersePrintPrefixToStrings() is the same as the generic version, as
-// Embarcadero (formerly CodeGear, formerly Borland) C++ doesn't
-// support specializing a method template of a class template.
-template <>
-struct TuplePrefixPrinter<1> {
-  template <typename Tuple>
-  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
-    UniversalPrinter<typename ::std::tr1::tuple_element<0, Tuple>::type>::
-        Print(::std::tr1::get<0>(t), os);
-  }
+// Matches a string that contains regular expression 'regex'.
+// The matcher takes ownership of 'regex'.
+inline PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
+    const internal::RE* regex) {
+  return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, false));
+}
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
+    const internal::StringLike<T>& regex) {
+  return ContainsRegex(new internal::RE(std::string(regex)));
+}
 
-  template <typename Tuple>
-  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
-    ::std::stringstream ss;
-    UniversalTersePrint(::std::tr1::get<0>(t), &ss);
-    strings->push_back(ss.str());
-  }
-};
+// Creates a polymorphic matcher that matches anything equal to x.
+// Note: if the parameter of Eq() were declared as const T&, Eq("foo")
+// wouldn't compile.
+template <typename T>
+inline internal::EqMatcher<T> Eq(T x) { return internal::EqMatcher<T>(x); }
 
-// Helper function for printing a tuple.  T must be instantiated with
-// a tuple type.
+// Constructs a Matcher<T> from a 'value' of type T.  The constructed
+// matcher matches any value that's equal to 'value'.
 template <typename T>
-void PrintTupleTo(const T& t, ::std::ostream* os) {
-  *os << "(";
-  TuplePrefixPrinter< ::std::tr1::tuple_size<T>::value>::
-      PrintPrefixTo(t, os);
-  *os << ")";
+Matcher<T>::Matcher(T value) { *this = Eq(value); }
+
+// Creates a monomorphic matcher that matches anything with type Lhs
+// and equal to rhs.  A user may need to use this instead of Eq(...)
+// in order to resolve an overloading ambiguity.
+//
+// TypedEq<T>(x) is just a convenient short-hand for Matcher<T>(Eq(x))
+// or Matcher<T>(x), but more readable than the latter.
+//
+// We could define similar monomorphic matchers for other comparison
+// operations (e.g. TypedLt, TypedGe, and etc), but decided not to do
+// it yet as those are used much less than Eq() in practice.  A user
+// can always write Matcher<T>(Lt(5)) to be explicit about the type,
+// for example.
+template <typename Lhs, typename Rhs>
+inline Matcher<Lhs> TypedEq(const Rhs& rhs) { return Eq(rhs); }
+
+// Creates a polymorphic matcher that matches anything >= x.
+template <typename Rhs>
+inline internal::GeMatcher<Rhs> Ge(Rhs x) {
+  return internal::GeMatcher<Rhs>(x);
 }
 
-// Prints the fields of a tuple tersely to a string vector, one
-// element for each field.  See the comment before
-// UniversalTersePrint() for how we define "tersely".
-template <typename Tuple>
-Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
-  Strings result;
-  TuplePrefixPrinter< ::std::tr1::tuple_size<Tuple>::value>::
-      TersePrintPrefixToStrings(value, &result);
-  return result;
+// Creates a polymorphic matcher that matches anything > x.
+template <typename Rhs>
+inline internal::GtMatcher<Rhs> Gt(Rhs x) {
+  return internal::GtMatcher<Rhs>(x);
 }
-#endif  // GTEST_HAS_TR1_TUPLE
 
-}  // namespace internal
+// Creates a polymorphic matcher that matches anything <= x.
+template <typename Rhs>
+inline internal::LeMatcher<Rhs> Le(Rhs x) {
+  return internal::LeMatcher<Rhs>(x);
+}
 
-template <typename T>
-::std::string PrintToString(const T& value) {
-  ::std::stringstream ss;
-  internal::UniversalTersePrinter<T>::Print(value, &ss);
-  return ss.str();
+// Creates a polymorphic matcher that matches anything < x.
+template <typename Rhs>
+inline internal::LtMatcher<Rhs> Lt(Rhs x) {
+  return internal::LtMatcher<Rhs>(x);
 }
 
+// Creates a polymorphic matcher that matches anything != x.
+template <typename Rhs>
+inline internal::NeMatcher<Rhs> Ne(Rhs x) {
+  return internal::NeMatcher<Rhs>(x);
+}
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251 5046
 
-#if GTEST_HAS_PARAM_TEST
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
+
+#include <stdio.h>
+#include <memory>
 
 namespace testing {
 namespace internal {
 
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Outputs a message explaining invalid registration of different
-// fixture class for the same test case. This may happen when
-// TEST_P macro is used to define two tests with the same name
-// but in different namespaces.
-GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
-                                          const char* file, int line);
+GTEST_DECLARE_string_(internal_run_death_test);
 
-template <typename> class ParamGeneratorInterface;
-template <typename> class ParamGenerator;
+// Names of the flags (needed for parsing Google Test flags).
+const char kDeathTestStyleFlag[] = "death_test_style";
+const char kDeathTestUseFork[] = "death_test_use_fork";
+const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
 
-// Interface for iterating over elements provided by an implementation
-// of ParamGeneratorInterface<T>.
-template <typename T>
-class ParamIteratorInterface {
- public:
-  virtual ~ParamIteratorInterface() {}
-  // A pointer to the base generator instance.
-  // Used only for the purposes of iterator comparison
-  // to make sure that two iterators belong to the same generator.
-  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
-  // Advances iterator to point to the next element
-  // provided by the generator. The caller is responsible
-  // for not calling Advance() on an iterator equal to
-  // BaseGenerator()->End().
-  virtual void Advance() = 0;
-  // Clones the iterator object. Used for implementing copy semantics
-  // of ParamIterator<T>.
-  virtual ParamIteratorInterface* Clone() const = 0;
-  // Dereferences the current iterator and provides (read-only) access
-  // to the pointed value. It is the caller's responsibility not to call
-  // Current() on an iterator equal to BaseGenerator()->End().
-  // Used for implementing ParamGenerator<T>::operator*().
-  virtual const T* Current() const = 0;
-  // Determines whether the given iterator and other point to the same
-  // element in the sequence generated by the generator.
-  // Used for implementing ParamGenerator<T>::operator==().
-  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
-};
+#if GTEST_HAS_DEATH_TEST
 
-// Class iterating over elements provided by an implementation of
-// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
-// and implements the const forward iterator concept.
-template <typename T>
-class ParamIterator {
- public:
-  typedef T value_type;
-  typedef const T& reference;
-  typedef ptrdiff_t difference_type;
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
 
-  // ParamIterator assumes ownership of the impl_ pointer.
-  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
-  ParamIterator& operator=(const ParamIterator& other) {
-    if (this != &other)
-      impl_.reset(other.impl_->Clone());
-    return *this;
-  }
+// DeathTest is a class that hides much of the complexity of the
+// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
+// returns a concrete class that depends on the prevailing death test
+// style, as defined by the --gtest_death_test_style and/or
+// --gtest_internal_run_death_test flags.
 
-  const T& operator*() const { return *impl_->Current(); }
-  const T* operator->() const { return impl_->Current(); }
-  // Prefix version of operator++.
-  ParamIterator& operator++() {
-    impl_->Advance();
-    return *this;
-  }
-  // Postfix version of operator++.
-  ParamIterator operator++(int /*unused*/) {
-    ParamIteratorInterface<T>* clone = impl_->Clone();
-    impl_->Advance();
-    return ParamIterator(clone);
-  }
-  bool operator==(const ParamIterator& other) const {
-    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
-  }
-  bool operator!=(const ParamIterator& other) const {
-    return !(*this == other);
-  }
+// In describing the results of death tests, these terms are used with
+// the corresponding definitions:
+//
+// exit status:  The integer exit information in the format specified
+//               by wait(2)
+// exit code:    The integer code passed to exit(3), _exit(2), or
+//               returned from main()
+class GTEST_API_ DeathTest {
+ public:
+  // Create returns false if there was an error determining the
+  // appropriate action to take for the current death test; for example,
+  // if the gtest_death_test_style flag is set to an invalid value.
+  // The LastMessage method will return a more detailed message in that
+  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
+  // argument is set.  If the death test should be skipped, the pointer
+  // is set to NULL; otherwise, it is set to the address of a new concrete
+  // DeathTest object that controls the execution of the current test.
+  static bool Create(const char* statement, Matcher<const std::string&> matcher,
+                     const char* file, int line, DeathTest** test);
+  DeathTest();
+  virtual ~DeathTest() { }
 
- private:
-  friend class ParamGenerator<T>;
-  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
-  scoped_ptr<ParamIteratorInterface<T> > impl_;
-};
+  // A helper class that aborts a death test when it's deleted.
+  class ReturnSentinel {
+   public:
+    explicit ReturnSentinel(DeathTest* test) : test_(test) { }
+    ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
+   private:
+    DeathTest* const test_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
+  } GTEST_ATTRIBUTE_UNUSED_;
 
-// ParamGeneratorInterface<T> is the binary interface to access generators
-// defined in other translation units.
-template <typename T>
-class ParamGeneratorInterface {
- public:
-  typedef T ParamType;
+  // An enumeration of possible roles that may be taken when a death
+  // test is encountered.  EXECUTE means that the death test logic should
+  // be executed immediately.  OVERSEE means that the program should prepare
+  // the appropriate environment for a child process to execute the death
+  // test, then wait for it to complete.
+  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
 
-  virtual ~ParamGeneratorInterface() {}
+  // An enumeration of the three reasons that a test might be aborted.
+  enum AbortReason {
+    TEST_ENCOUNTERED_RETURN_STATEMENT,
+    TEST_THREW_EXCEPTION,
+    TEST_DID_NOT_DIE
+  };
 
-  // Generator interface definition
-  virtual ParamIteratorInterface<T>* Begin() const = 0;
-  virtual ParamIteratorInterface<T>* End() const = 0;
-};
+  // Assumes one of the above roles.
+  virtual TestRole AssumeRole() = 0;
 
-// Wraps ParamGeneratorInterface<T> and provides general generator syntax
-// compatible with the STL Container concept.
-// This class implements copy initialization semantics and the contained
-// ParamGeneratorInterface<T> instance is shared among all copies
-// of the original object. This is possible because that instance is immutable.
-template<typename T>
-class ParamGenerator {
- public:
-  typedef ParamIterator<T> iterator;
+  // Waits for the death test to finish and returns its status.
+  virtual int Wait() = 0;
 
-  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
-  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
+  // Returns true if the death test passed; that is, the test process
+  // exited during the test, its exit status matches a user-supplied
+  // predicate, and its stderr output matches a user-supplied regular
+  // expression.
+  // The user-supplied predicate may be a macro expression rather
+  // than a function pointer or functor, or else Wait and Passed could
+  // be combined.
+  virtual bool Passed(bool exit_status_ok) = 0;
 
-  ParamGenerator& operator=(const ParamGenerator& other) {
-    impl_ = other.impl_;
-    return *this;
-  }
+  // Signals that the death test did not die as expected.
+  virtual void Abort(AbortReason reason) = 0;
 
-  iterator begin() const { return iterator(impl_->Begin()); }
-  iterator end() const { return iterator(impl_->End()); }
+  // Returns a human-readable outcome message regarding the outcome of
+  // the last death test.
+  static const char* LastMessage();
+
+  static void set_last_death_test_message(const std::string& message);
 
  private:
-  linked_ptr<const ParamGeneratorInterface<T> > impl_;
+  // A string containing a description of the outcome of the last death test.
+  static std::string last_death_test_message_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
 };
 
-// Generates values from a range of two comparable values. Can be used to
-// generate sequences of user-defined types that implement operator+() and
-// operator<().
-// This class is used in the Range() function.
-template <typename T, typename IncrementT>
-class RangeGenerator : public ParamGeneratorInterface<T> {
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+// Factory interface for death tests.  May be mocked out for testing.
+class DeathTestFactory {
  public:
-  RangeGenerator(T begin, T end, IncrementT step)
-      : begin_(begin), end_(end),
-        step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
-  virtual ~RangeGenerator() {}
+  virtual ~DeathTestFactory() { }
+  virtual bool Create(const char* statement,
+                      Matcher<const std::string&> matcher, const char* file,
+                      int line, DeathTest** test) = 0;
+};
 
-  virtual ParamIteratorInterface<T>* Begin() const {
-    return new Iterator(this, begin_, 0, step_);
-  }
-  virtual ParamIteratorInterface<T>* End() const {
-    return new Iterator(this, end_, end_index_, step_);
+// A concrete DeathTestFactory implementation for normal use.
+class DefaultDeathTestFactory : public DeathTestFactory {
+ public:
+  bool Create(const char* statement, Matcher<const std::string&> matcher,
+              const char* file, int line, DeathTest** test) override;
+};
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
+
+// A string passed to EXPECT_DEATH (etc.) is caught by one of these overloads
+// and interpreted as a regex (rather than an Eq matcher) for legacy
+// compatibility.
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+    ::testing::internal::RE regex) {
+  return ContainsRegex(regex.pattern());
+}
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(const char* regex) {
+  return ContainsRegex(regex);
+}
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+    const ::std::string& regex) {
+  return ContainsRegex(regex);
+}
+
+// If a Matcher<const ::std::string&> is passed to EXPECT_DEATH (etc.), it's
+// used directly.
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+    Matcher<const ::std::string&> matcher) {
+  return matcher;
+}
+
+// Traps C++ exceptions escaping statement and reports them as test
+// failures. Note that trapping SEH exceptions is not implemented here.
+# if GTEST_HAS_EXCEPTIONS
+#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+  try { \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+  } catch (const ::std::exception& gtest_exception) { \
+    fprintf(\
+        stderr, \
+        "\n%s: Caught std::exception-derived exception escaping the " \
+        "death test statement. Exception message: %s\n", \
+        ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
+        gtest_exception.what()); \
+    fflush(stderr); \
+    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+  } catch (...) { \
+    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
   }
 
- private:
-  class Iterator : public ParamIteratorInterface<T> {
-   public:
-    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
-             IncrementT step)
-        : base_(base), value_(value), index_(index), step_(step) {}
-    virtual ~Iterator() {}
+# else
+#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+  GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
 
-    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
-      return base_;
-    }
-    virtual void Advance() {
-      value_ = value_ + step_;
-      index_++;
-    }
-    virtual ParamIteratorInterface<T>* Clone() const {
-      return new Iterator(*this);
-    }
-    virtual const T* Current() const { return &value_; }
-    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const int other_index =
-          CheckedDowncastToActualType<const Iterator>(&other)->index_;
-      return index_ == other_index;
-    }
+# endif
 
-   private:
-    Iterator(const Iterator& other)
-        : ParamIteratorInterface<T>(),
-          base_(other.base_), value_(other.value_), index_(other.index_),
-          step_(other.step_) {}
+// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
+// ASSERT_EXIT*, and EXPECT_EXIT*.
+#define GTEST_DEATH_TEST_(statement, predicate, regex_or_matcher, fail)        \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                                \
+  if (::testing::internal::AlwaysTrue()) {                                     \
+    ::testing::internal::DeathTest* gtest_dt;                                  \
+    if (!::testing::internal::DeathTest::Create(                               \
+            #statement,                                                        \
+            ::testing::internal::MakeDeathTestMatcher(regex_or_matcher),       \
+            __FILE__, __LINE__, &gtest_dt)) {                                  \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__);                        \
+    }                                                                          \
+    if (gtest_dt != nullptr) {                                                 \
+      std::unique_ptr< ::testing::internal::DeathTest> gtest_dt_ptr(gtest_dt); \
+      switch (gtest_dt->AssumeRole()) {                                        \
+        case ::testing::internal::DeathTest::OVERSEE_TEST:                     \
+          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) {                \
+            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__);                  \
+          }                                                                    \
+          break;                                                               \
+        case ::testing::internal::DeathTest::EXECUTE_TEST: {                   \
+          ::testing::internal::DeathTest::ReturnSentinel gtest_sentinel(       \
+              gtest_dt);                                                       \
+          GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt);            \
+          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE);   \
+          break;                                                               \
+        }                                                                      \
+        default:                                                               \
+          break;                                                               \
+      }                                                                        \
+    }                                                                          \
+  } else                                                                       \
+    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__)                                \
+        : fail(::testing::internal::DeathTest::LastMessage())
+// The symbol "fail" here expands to something into which a message
+// can be streamed.
 
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
+// NDEBUG mode. In this case we need the statements to be executed and the macro
+// must accept a streamed message even though the message is never printed.
+// The regex object is not evaluated, but it is used to prevent "unused"
+// warnings and to avoid an expression that doesn't compile in debug mode.
+#define GTEST_EXECUTE_STATEMENT_(statement, regex_or_matcher)    \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                  \
+  if (::testing::internal::AlwaysTrue()) {                       \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);   \
+  } else if (!::testing::internal::AlwaysTrue()) {               \
+    ::testing::internal::MakeDeathTestMatcher(regex_or_matcher); \
+  } else                                                         \
+    ::testing::Message()
 
-    const ParamGeneratorInterface<T>* const base_;
-    T value_;
-    int index_;
-    const IncrementT step_;
-  };  // class RangeGenerator::Iterator
+// A class representing the parsed contents of the
+// --gtest_internal_run_death_test flag, as it existed when
+// RUN_ALL_TESTS was called.
+class InternalRunDeathTestFlag {
+ public:
+  InternalRunDeathTestFlag(const std::string& a_file,
+                           int a_line,
+                           int an_index,
+                           int a_write_fd)
+      : file_(a_file), line_(a_line), index_(an_index),
+        write_fd_(a_write_fd) {}
 
-  static int CalculateEndIndex(const T& begin,
-                               const T& end,
-                               const IncrementT& step) {
-    int end_index = 0;
-    for (T i = begin; i < end; i = i + step)
-      end_index++;
-    return end_index;
+  ~InternalRunDeathTestFlag() {
+    if (write_fd_ >= 0)
+      posix::Close(write_fd_);
   }
 
-  // No implementation - assignment is unsupported.
-  void operator=(const RangeGenerator& other);
+  const std::string& file() const { return file_; }
+  int line() const { return line_; }
+  int index() const { return index_; }
+  int write_fd() const { return write_fd_; }
 
-  const T begin_;
-  const T end_;
-  const IncrementT step_;
-  // The index for the end() iterator. All the elements in the generated
-  // sequence are indexed (0-based) to aid iterator comparison.
-  const int end_index_;
-};  // class RangeGenerator
+ private:
+  std::string file_;
+  int line_;
+  int index_;
+  int write_fd_;
 
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
+};
 
-// Generates values from a pair of STL-style iterators. Used in the
-// ValuesIn() function. The elements are copied from the source range
-// since the source can be located on the stack, and the generator
-// is likely to persist beyond that stack frame.
-template <typename T>
-class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
- public:
-  template <typename ForwardIterator>
-  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
-      : container_(begin, end) {}
-  virtual ~ValuesInIteratorRangeGenerator() {}
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
 
-  virtual ParamIteratorInterface<T>* Begin() const {
-    return new Iterator(this, container_.begin());
-  }
-  virtual ParamIteratorInterface<T>* End() const {
-    return new Iterator(this, container_.end());
-  }
+#endif  // GTEST_HAS_DEATH_TEST
 
- private:
-  typedef typename ::std::vector<T> ContainerType;
+}  // namespace internal
+}  // namespace testing
 
-  class Iterator : public ParamIteratorInterface<T> {
-   public:
-    Iterator(const ParamGeneratorInterface<T>* base,
-             typename ContainerType::const_iterator iterator)
-        : base_(base), iterator_(iterator) {}
-    virtual ~Iterator() {}
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
 
-    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
-      return base_;
-    }
-    virtual void Advance() {
-      ++iterator_;
-      value_.reset();
-    }
-    virtual ParamIteratorInterface<T>* Clone() const {
-      return new Iterator(*this);
-    }
-    // We need to use cached value referenced by iterator_ because *iterator_
-    // can return a temporary object (and of type other then T), so just
-    // having "return &*iterator_;" doesn't work.
-    // value_ is updated here and not in Advance() because Advance()
-    // can advance iterator_ beyond the end of the range, and we cannot
-    // detect that fact. The client code, on the other hand, is
-    // responsible for not calling Current() on an out-of-range iterator.
-    virtual const T* Current() const {
-      if (value_.get() == NULL)
-        value_.reset(new T(*iterator_));
-      return value_.get();
-    }
-    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      return iterator_ ==
-          CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
-    }
+namespace testing {
 
-   private:
-    Iterator(const Iterator& other)
-          // The explicit constructor call suppresses a false warning
-          // emitted by gcc when supplied with the -Wextra option.
-        : ParamIteratorInterface<T>(),
-          base_(other.base_),
-          iterator_(other.iterator_) {}
+// This flag controls the style of death tests.  Valid values are "threadsafe",
+// meaning that the death test child process will re-execute the test binary
+// from the start, running only a single death test, or "fast",
+// meaning that the child process will execute the test logic immediately
+// after forking.
+GTEST_DECLARE_string_(death_test_style);
 
-    const ParamGeneratorInterface<T>* const base_;
-    typename ContainerType::const_iterator iterator_;
-    // A cached value of *iterator_. We keep it here to allow access by
-    // pointer in the wrapping iterator's operator->().
-    // value_ needs to be mutable to be accessed in Current().
-    // Use of scoped_ptr helps manage cached value's lifetime,
-    // which is bound by the lifespan of the iterator itself.
-    mutable scoped_ptr<const T> value_;
-  };  // class ValuesInIteratorRangeGenerator::Iterator
+#if GTEST_HAS_DEATH_TEST
 
-  // No implementation - assignment is unsupported.
-  void operator=(const ValuesInIteratorRangeGenerator& other);
+namespace internal {
 
-  const ContainerType container_;
-};  // class ValuesInIteratorRangeGenerator
+// Returns a Boolean value indicating whether the caller is currently
+// executing in the context of the death test child process.  Tools such as
+// Valgrind heap checkers may need this to modify their behavior in death
+// tests.  IMPORTANT: This is an internal utility.  Using it may break the
+// implementation of death tests.  User code MUST NOT use it.
+GTEST_API_ bool InDeathTestChild();
 
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+}  // namespace internal
+
+// The following macros are useful for writing death tests.
+
+// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
+// executed:
+//
+//   1. It generates a warning if there is more than one active
+//   thread.  This is because it's safe to fork() or clone() only
+//   when there is a single thread.
+//
+//   2. The parent process clone()s a sub-process and runs the death
+//   test in it; the sub-process exits with code 0 at the end of the
+//   death test, if it hasn't exited already.
+//
+//   3. The parent process waits for the sub-process to terminate.
+//
+//   4. The parent process checks the exit code and error message of
+//   the sub-process.
+//
+// Examples:
+//
+//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
+//   for (int i = 0; i < 5; i++) {
+//     EXPECT_DEATH(server.ProcessRequest(i),
+//                  "Invalid request .* in ProcessRequest()")
+//                  << "Failed to die on request " << i;
+//   }
+//
+//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
+//
+//   bool KilledBySIGHUP(int exit_code) {
+//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
+//   }
+//
+//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
+//
+// The final parameter to each of these macros is a matcher applied to any data
+// the sub-process wrote to stderr.  For compatibility with existing tests, a
+// bare string is interpreted as a regular expression matcher.
+//
+// On the regular expressions used in death tests:
+//
+//   GOOGLETEST_CM0005 DO NOT DELETE
+//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
+//   which uses the POSIX extended regex syntax.
+//
+//   On other platforms (e.g. Windows or Mac), we only support a simple regex
+//   syntax implemented as part of Google Test.  This limited
+//   implementation should be enough most of the time when writing
+//   death tests; though it lacks many features you can find in PCRE
+//   or POSIX extended regex syntax.  For example, we don't support
+//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
+//   repetition count ("x{5,7}"), among others.
+//
+//   Below is the syntax that we do support.  We chose it to be a
+//   subset of both PCRE and POSIX extended regex, so it's easy to
+//   learn wherever you come from.  In the following: 'A' denotes a
+//   literal character, period (.), or a single \\ escape sequence;
+//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
+//   natural numbers.
+//
+//     c     matches any literal character c
+//     \\d   matches any decimal digit
+//     \\D   matches any character that's not a decimal digit
+//     \\f   matches \f
+//     \\n   matches \n
+//     \\r   matches \r
+//     \\s   matches any ASCII whitespace, including \n
+//     \\S   matches any character that's not a whitespace
+//     \\t   matches \t
+//     \\v   matches \v
+//     \\w   matches any letter, _, or decimal digit
+//     \\W   matches any character that \\w doesn't match
+//     \\c   matches any literal character c, which must be a punctuation
+//     .     matches any single character except \n
+//     A?    matches 0 or 1 occurrences of A
+//     A*    matches 0 or many occurrences of A
+//     A+    matches 1 or many occurrences of A
+//     ^     matches the beginning of a string (not that of each line)
+//     $     matches the end of a string (not that of each line)
+//     xy    matches x followed by y
+//
+//   If you accidentally use PCRE or POSIX extended regex features
+//   not implemented by us, you will get a run-time failure.  In that
+//   case, please try to rewrite your regular expression within the
+//   above syntax.
+//
+//   This implementation is *not* meant to be as highly tuned or robust
+//   as a compiled regex library, but should perform well enough for a
+//   death test, which already incurs significant overhead by launching
+//   a child process.
 //
-// Stores a parameter value and later creates tests parameterized with that
-// value.
-template <class TestClass>
-class ParameterizedTestFactory : public TestFactoryBase {
- public:
-  typedef typename TestClass::ParamType ParamType;
-  explicit ParameterizedTestFactory(ParamType parameter) :
-      parameter_(parameter) {}
-  virtual Test* CreateTest() {
-    TestClass::SetParam(&parameter_);
-    return new TestClass();
-  }
-
- private:
-  const ParamType parameter_;
-
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
-};
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// Known caveats:
+//
+//   A "threadsafe" style death test obtains the path to the test
+//   program from argv[0] and re-executes it in the sub-process.  For
+//   simplicity, the current implementation doesn't search the PATH
+//   when launching the sub-process.  This means that the user must
+//   invoke the test program via a path that contains at least one
+//   path separator (e.g. path/to/foo_test and
+//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
+//   is rarely a problem as people usually don't put the test binary
+//   directory in PATH.
 //
-// TestMetaFactoryBase is a base class for meta-factories that create
-// test factories for passing into MakeAndRegisterTestInfo function.
-template <class ParamType>
-class TestMetaFactoryBase {
- public:
-  virtual ~TestMetaFactoryBase() {}
 
-  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
-};
+// Asserts that a given `statement` causes the program to exit, with an
+// integer exit status that satisfies `predicate`, and emitting error output
+// that matches `matcher`.
+# define ASSERT_EXIT(statement, predicate, matcher) \
+    GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_FATAL_FAILURE_)
 
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// TestMetaFactory creates test factories for passing into
-// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
-// ownership of test factory pointer, same factory object cannot be passed
-// into that method twice. But ParameterizedTestCaseInfo is going to call
-// it for each Test/Parameter value combination. Thus it needs meta factory
-// creator class.
-template <class TestCase>
-class TestMetaFactory
-    : public TestMetaFactoryBase<typename TestCase::ParamType> {
- public:
-  typedef typename TestCase::ParamType ParamType;
+// Like `ASSERT_EXIT`, but continues on to successive tests in the
+// test suite, if any:
+# define EXPECT_EXIT(statement, predicate, matcher) \
+    GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_NONFATAL_FAILURE_)
 
-  TestMetaFactory() {}
+// Asserts that a given `statement` causes the program to exit, either by
+// explicitly exiting with a nonzero exit code or being killed by a
+// signal, and emitting error output that matches `matcher`.
+# define ASSERT_DEATH(statement, matcher) \
+    ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
 
-  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
-    return new ParameterizedTestFactory<TestCase>(parameter);
-  }
+// Like `ASSERT_DEATH`, but continues on to successive tests in the
+// test suite, if any:
+# define EXPECT_DEATH(statement, matcher) \
+    EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
+
+// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
 
+// Tests that an exit code describes a normal exit with a given exit code.
+class GTEST_API_ ExitedWithCode {
+ public:
+  explicit ExitedWithCode(int exit_code);
+  ExitedWithCode(const ExitedWithCode&) = default;
+  void operator=(const ExitedWithCode& other) = delete;
+  bool operator()(int exit_status) const;
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
+  const int exit_code_;
 };
 
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// ParameterizedTestCaseInfoBase is a generic interface
-// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
-// accumulates test information provided by TEST_P macro invocations
-// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
-// and uses that information to register all resulting test instances
-// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
-// a collection of pointers to the ParameterizedTestCaseInfo objects
-// and calls RegisterTests() on each of them when asked.
-class ParameterizedTestCaseInfoBase {
+# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
+// Tests that an exit code describes an exit due to termination by a
+// given signal.
+// GOOGLETEST_CM0006 DO NOT DELETE
+class GTEST_API_ KilledBySignal {
  public:
-  virtual ~ParameterizedTestCaseInfoBase() {}
-
-  // Base part of test case name for display purposes.
-  virtual const string& GetTestCaseName() const = 0;
-  // Test case id to verify identity.
-  virtual TypeId GetTestCaseTypeId() const = 0;
-  // UnitTest class invokes this method to register tests in this
-  // test case right before running them in RUN_ALL_TESTS macro.
-  // This method should not be called more then once on any single
-  // instance of a ParameterizedTestCaseInfoBase derived class.
-  virtual void RegisterTests() = 0;
-
- protected:
-  ParameterizedTestCaseInfoBase() {}
-
+  explicit KilledBySignal(int signum);
+  bool operator()(int exit_status) const;
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
+  const int signum_;
 };
+# endif  // !GTEST_OS_WINDOWS
 
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
+// The death testing framework causes this to have interesting semantics,
+// since the sideeffects of the call are only visible in opt mode, and not
+// in debug mode.
 //
-// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
-// macro invocations for a particular test case and generators
-// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
-// test case. It registers tests with all values generated by all
-// generators when asked.
-template <class TestCase>
-class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
- public:
-  // ParamType and GeneratorCreationFunc are private types but are required
-  // for declarations of public methods AddTestPattern() and
-  // AddTestCaseInstantiation().
-  typedef typename TestCase::ParamType ParamType;
-  // A function that returns an instance of appropriate generator type.
-  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
-
-  explicit ParameterizedTestCaseInfo(const char* name)
-      : test_case_name_(name) {}
+// In practice, this can be used to test functions that utilize the
+// LOG(DFATAL) macro using the following style:
+//
+// int DieInDebugOr12(int* sideeffect) {
+//   if (sideeffect) {
+//     *sideeffect = 12;
+//   }
+//   LOG(DFATAL) << "death";
+//   return 12;
+// }
+//
+// TEST(TestSuite, TestDieOr12WorksInDgbAndOpt) {
+//   int sideeffect = 0;
+//   // Only asserts in dbg.
+//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
+//
+// #ifdef NDEBUG
+//   // opt-mode has sideeffect visible.
+//   EXPECT_EQ(12, sideeffect);
+// #else
+//   // dbg-mode no visible sideeffect.
+//   EXPECT_EQ(0, sideeffect);
+// #endif
+// }
+//
+// This will assert that DieInDebugReturn12InOpt() crashes in debug
+// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
+// appropriate fallback value (12 in this case) in opt mode. If you
+// need to test that a function has appropriate side-effects in opt
+// mode, include assertions against the side-effects.  A general
+// pattern for this is:
+//
+// EXPECT_DEBUG_DEATH({
+//   // Side-effects here will have an effect after this statement in
+//   // opt mode, but none in debug mode.
+//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
+// }, "death");
+//
+# ifdef NDEBUG
 
-  // Test case base name for display purposes.
-  virtual const string& GetTestCaseName() const { return test_case_name_; }
-  // Test case id to verify identity.
-  virtual TypeId GetTestCaseTypeId() const { return GetTypeId<TestCase>(); }
-  // TEST_P macro uses AddTestPattern() to record information
-  // about a single test in a LocalTestInfo structure.
-  // test_case_name is the base name of the test case (without invocation
-  // prefix). test_base_name is the name of an individual test without
-  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
-  // test case base name and DoBar is test base name.
-  void AddTestPattern(const char* test_case_name,
-                      const char* test_base_name,
-                      TestMetaFactoryBase<ParamType>* meta_factory) {
-    tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
-                                                       test_base_name,
-                                                       meta_factory)));
-  }
-  // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
-  // about a generator.
-  int AddTestCaseInstantiation(const string& instantiation_name,
-                               GeneratorCreationFunc* func,
-                               const char* /* file */,
-                               int /* line */) {
-    instantiations_.push_back(::std::make_pair(instantiation_name, func));
-    return 0;  // Return value used only to run this method in namespace scope.
-  }
-  // UnitTest class invokes this method to register tests in this test case
-  // test cases right before running tests in RUN_ALL_TESTS macro.
-  // This method should not be called more then once on any single
-  // instance of a ParameterizedTestCaseInfoBase derived class.
-  // UnitTest has a guard to prevent from calling this method more then once.
-  virtual void RegisterTests() {
-    for (typename TestInfoContainer::iterator test_it = tests_.begin();
-         test_it != tests_.end(); ++test_it) {
-      linked_ptr<TestInfo> test_info = *test_it;
-      for (typename InstantiationContainer::iterator gen_it =
-               instantiations_.begin(); gen_it != instantiations_.end();
-               ++gen_it) {
-        const string& instantiation_name = gen_it->first;
-        ParamGenerator<ParamType> generator((*gen_it->second)());
+#  define EXPECT_DEBUG_DEATH(statement, regex) \
+  GTEST_EXECUTE_STATEMENT_(statement, regex)
 
-        string test_case_name;
-        if ( !instantiation_name.empty() )
-          test_case_name = instantiation_name + "/";
-        test_case_name += test_info->test_case_base_name;
+#  define ASSERT_DEBUG_DEATH(statement, regex) \
+  GTEST_EXECUTE_STATEMENT_(statement, regex)
 
-        int i = 0;
-        for (typename ParamGenerator<ParamType>::iterator param_it =
-                 generator.begin();
-             param_it != generator.end(); ++param_it, ++i) {
-          Message test_name_stream;
-          test_name_stream << test_info->test_base_name << "/" << i;
-          MakeAndRegisterTestInfo(
-              test_case_name.c_str(),
-              test_name_stream.GetString().c_str(),
-              NULL,  // No type parameter.
-              PrintToString(*param_it).c_str(),
-              GetTestCaseTypeId(),
-              TestCase::SetUpTestCase,
-              TestCase::TearDownTestCase,
-              test_info->test_meta_factory->CreateTestFactory(*param_it));
-        }  // for param_it
-      }  // for gen_it
-    }  // for test_it
-  }  // RegisterTests
+# else
 
- private:
-  // LocalTestInfo structure keeps information about a single test registered
-  // with TEST_P macro.
-  struct TestInfo {
-    TestInfo(const char* a_test_case_base_name,
-             const char* a_test_base_name,
-             TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
-        test_case_base_name(a_test_case_base_name),
-        test_base_name(a_test_base_name),
-        test_meta_factory(a_test_meta_factory) {}
-
-    const string test_case_base_name;
-    const string test_base_name;
-    const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
-  };
-  typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
-  // Keeps pairs of <Instantiation name, Sequence generator creation function>
-  // received from INSTANTIATE_TEST_CASE_P macros.
-  typedef ::std::vector<std::pair<string, GeneratorCreationFunc*> >
-      InstantiationContainer;
+#  define EXPECT_DEBUG_DEATH(statement, regex) \
+  EXPECT_DEATH(statement, regex)
 
-  const string test_case_name_;
-  TestInfoContainer tests_;
-  InstantiationContainer instantiations_;
+#  define ASSERT_DEBUG_DEATH(statement, regex) \
+  ASSERT_DEATH(statement, regex)
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
-};  // class ParameterizedTestCaseInfo
+# endif  // NDEBUG for EXPECT_DEBUG_DEATH
+#endif  // GTEST_HAS_DEATH_TEST
 
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// This macro is used for implementing macros such as
+// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
+// death tests are not supported. Those macros must compile on such systems
+// if and only if EXPECT_DEATH and ASSERT_DEATH compile with the same parameters
+// on systems that support death tests. This allows one to write such a macro on
+// a system that does not support death tests and be sure that it will compile
+// on a death-test supporting system. It is exposed publicly so that systems
+// that have death-tests with stricter requirements than GTEST_HAS_DEATH_TEST
+// can write their own equivalent of EXPECT_DEATH_IF_SUPPORTED and
+// ASSERT_DEATH_IF_SUPPORTED.
 //
-// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
-// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
-// macros use it to locate their corresponding ParameterizedTestCaseInfo
-// descriptors.
-class ParameterizedTestCaseRegistry {
- public:
-  ParameterizedTestCaseRegistry() {}
-  ~ParameterizedTestCaseRegistry() {
-    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
-         it != test_case_infos_.end(); ++it) {
-      delete *it;
-    }
-  }
-
-  // Looks up or creates and returns a structure containing information about
-  // tests and instantiations of a particular test case.
-  template <class TestCase>
-  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
-      const char* test_case_name,
-      const char* file,
-      int line) {
-    ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;
-    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
-         it != test_case_infos_.end(); ++it) {
-      if ((*it)->GetTestCaseName() == test_case_name) {
-        if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
-          // Complain about incorrect usage of Google Test facilities
-          // and terminate the program since we cannot guaranty correct
-          // test case setup and tear-down in this case.
-          ReportInvalidTestCaseType(test_case_name,  file, line);
-          posix::Abort();
-        } else {
-          // At this point we are sure that the object we found is of the same
-          // type we are looking for, so we downcast it to that type
-          // without further checks.
-          typed_test_info = CheckedDowncastToActualType<
-              ParameterizedTestCaseInfo<TestCase> >(*it);
-        }
-        break;
-      }
-    }
-    if (typed_test_info == NULL) {
-      typed_test_info = new ParameterizedTestCaseInfo<TestCase>(test_case_name);
-      test_case_infos_.push_back(typed_test_info);
-    }
-    return typed_test_info;
-  }
-  void RegisterTests() {
-    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
-         it != test_case_infos_.end(); ++it) {
-      (*it)->RegisterTests();
-    }
-  }
-
- private:
-  typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;
-
-  TestCaseInfoContainer test_case_infos_;
+// Parameters:
+//   statement -  A statement that a macro such as EXPECT_DEATH would test
+//                for program termination. This macro has to make sure this
+//                statement is compiled but not executed, to ensure that
+//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
+//                parameter if and only if EXPECT_DEATH compiles with it.
+//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
+//                the output of statement.  This parameter has to be
+//                compiled but not evaluated by this macro, to ensure that
+//                this macro only accepts expressions that a macro such as
+//                EXPECT_DEATH would accept.
+//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
+//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
+//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
+//                compile inside functions where ASSERT_DEATH doesn't
+//                compile.
+//
+//  The branch that has an always false condition is used to ensure that
+//  statement and regex are compiled (and thus syntactically correct) but
+//  never executed. The unreachable code macro protects the terminator
+//  statement from generating an 'unreachable code' warning in case
+//  statement unconditionally returns or throws. The Message constructor at
+//  the end allows the syntax of streaming additional messages into the
+//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
+# define GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, terminator) \
+    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+    if (::testing::internal::AlwaysTrue()) { \
+      GTEST_LOG_(WARNING) \
+          << "Death tests are not supported on this platform.\n" \
+          << "Statement '" #statement "' cannot be verified."; \
+    } else if (::testing::internal::AlwaysFalse()) { \
+      ::testing::internal::RE::PartialMatch(".*", (regex)); \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+      terminator; \
+    } else \
+      ::testing::Message()
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
-};
+// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
+// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
+// death tests are supported; otherwise they just issue a warning.  This is
+// useful when you are combining death test assertions with normal test
+// assertions in one test.
+#if GTEST_HAS_DEATH_TEST
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    EXPECT_DEATH(statement, regex)
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    ASSERT_DEATH(statement, regex)
+#else
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, )
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, return)
+#endif
 
-}  // namespace internal
 }  // namespace testing
 
-#endif  //  GTEST_HAS_PARAM_TEST
-
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
-// This file was GENERATED by command:
-//     pump.py gtest-param-util-generated.h.pump
-// DO NOT EDIT BY HAND!!!
-
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+// Copyright 2008, Google Inc.
+// All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -10653,5127 +7607,1289 @@ class ParameterizedTestCaseRegistry {
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
-// Author: vladl@google.com (Vlad Losev)
-
-// Type and function utilities for implementing parameterized tests.
-// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+// Macros and functions for implementing parameterized tests
+// in Google C++ Testing and Mocking Framework (Google Test)
 //
-// Currently Google Test supports at most 50 arguments in Values,
-// and at most 10 arguments in Combine. Please contact
-// googletestframework@googlegroups.com if you need more.
-// Please note that the number of arguments to Combine is limited
-// by the maximum arity of the implementation of tr1::tuple which is
-// currently set at 10.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
-
-// scripts/fuse_gtest.py depends on gtest's own header being #included
-// *unconditionally*.  Therefore these #includes cannot be moved
-// inside #if GTEST_HAS_PARAM_TEST.
-
-#if GTEST_HAS_PARAM_TEST
-
-namespace testing {
-
-// Forward declarations of ValuesIn(), which is implemented in
-// include/gtest/gtest-param-test.h.
-template <typename ForwardIterator>
-internal::ParamGenerator<
-  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
-ValuesIn(ForwardIterator begin, ForwardIterator end);
-
-template <typename T, size_t N>
-internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);
-
-template <class Container>
-internal::ParamGenerator<typename Container::value_type> ValuesIn(
-    const Container& container);
-
-namespace internal {
-
-// Used in the Values() function to provide polymorphic capabilities.
-template <typename T1>
-class ValueArray1 {
- public:
-  explicit ValueArray1(T1 v1) : v1_(v1) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const { return ValuesIn(&v1_, &v1_ + 1); }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray1& other);
-
-  const T1 v1_;
-};
-
-template <typename T1, typename T2>
-class ValueArray2 {
- public:
-  ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray2& other);
-
-  const T1 v1_;
-  const T2 v2_;
-};
-
-template <typename T1, typename T2, typename T3>
-class ValueArray3 {
- public:
-  ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray3& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4>
-class ValueArray4 {
- public:
-  ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3),
-      v4_(v4) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray4& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-class ValueArray5 {
- public:
-  ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3),
-      v4_(v4), v5_(v5) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray5& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6>
-class ValueArray6 {
- public:
-  ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2),
-      v3_(v3), v4_(v4), v5_(v5), v6_(v6) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray6& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7>
-class ValueArray7 {
- public:
-  ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1),
-      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray7& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8>
-class ValueArray8 {
- public:
-  ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
-      T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray8& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9>
-class ValueArray9 {
- public:
-  ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
-      T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray9& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10>
-class ValueArray10 {
- public:
-  ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray10& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11>
-class ValueArray11 {
- public:
-  ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
-      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray11& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-};
+// GOOGLETEST_CM0001 DO NOT DELETE
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12>
-class ValueArray12 {
- public:
-  ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
-      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray12& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13>
-class ValueArray13 {
- public:
-  ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
-      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
-      v12_(v12), v13_(v13) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray13& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14>
-class ValueArray14 {
- public:
-  ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3),
-      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray14& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15>
-class ValueArray15 {
- public:
-  ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2),
-      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray15& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16>
-class ValueArray16 {
- public:
-  ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1),
-      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
-      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
-      v16_(v16) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray16& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17>
-class ValueArray17 {
- public:
-  ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
-      T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray17& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18>
-class ValueArray18 {
- public:
-  ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17), v18_(v18) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray18& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19>
-class ValueArray19 {
- public:
-  ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
-      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
-      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray19& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20>
-class ValueArray20 {
- public:
-  ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
-      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
-      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
-      v19_(v19), v20_(v20) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray20& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21>
-class ValueArray21 {
- public:
-  ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
-      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
-      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
-      v18_(v18), v19_(v19), v20_(v20), v21_(v21) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray21& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22>
-class ValueArray22 {
- public:
-  ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3),
-      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
-      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray22& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23>
-class ValueArray23 {
- public:
-  ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2),
-      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
-      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
-      v23_(v23) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray23& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24>
-class ValueArray24 {
- public:
-  ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1),
-      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
-      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
-      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
-      v22_(v22), v23_(v23), v24_(v24) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray24& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25>
-class ValueArray25 {
- public:
-  ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
-      T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
-      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray25& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-};
+// Value-parameterized tests allow you to test your code with different
+// parameters without writing multiple copies of the same test.
+//
+// Here is how you use value-parameterized tests:
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26>
-class ValueArray26 {
- public:
-  ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
-      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {}
+#if 0
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_)};
-    return ValuesIn(array);
-  }
+// To write value-parameterized tests, first you should define a fixture
+// class. It is usually derived from testing::TestWithParam<T> (see below for
+// another inheritance scheme that's sometimes useful in more complicated
+// class hierarchies), where the type of your parameter values.
+// TestWithParam<T> is itself derived from testing::Test. T can be any
+// copyable type. If it's a raw pointer, you are responsible for managing the
+// lifespan of the pointed values.
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray26& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
+class FooTest : public ::testing::TestWithParam<const char*> {
+  // You can implement all the usual class fixture members here.
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27>
-class ValueArray27 {
- public:
-  ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
-      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
-      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
-      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
-      v26_(v26), v27_(v27) {}
+// Then, use the TEST_P macro to define as many parameterized tests
+// for this fixture as you want. The _P suffix is for "parameterized"
+// or "pattern", whichever you prefer to think.
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_)};
-    return ValuesIn(array);
-  }
+TEST_P(FooTest, DoesBlah) {
+  // Inside a test, access the test parameter with the GetParam() method
+  // of the TestWithParam<T> class:
+  EXPECT_TRUE(foo.Blah(GetParam()));
+  ...
+}
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray27& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-};
+TEST_P(FooTest, HasBlahBlah) {
+  ...
+}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28>
-class ValueArray28 {
- public:
-  ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
-      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
-      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
-      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
-      v25_(v25), v26_(v26), v27_(v27), v28_(v28) {}
+// Finally, you can use INSTANTIATE_TEST_SUITE_P to instantiate the test
+// case with any set of parameters you want. Google Test defines a number
+// of functions for generating test parameters. They return what we call
+// (surprise!) parameter generators. Here is a summary of them, which
+// are all in the testing namespace:
+//
+//
+//  Range(begin, end [, step]) - Yields values {begin, begin+step,
+//                               begin+step+step, ...}. The values do not
+//                               include end. step defaults to 1.
+//  Values(v1, v2, ..., vN)    - Yields values {v1, v2, ..., vN}.
+//  ValuesIn(container)        - Yields values from a C-style array, an STL
+//  ValuesIn(begin,end)          container, or an iterator range [begin, end).
+//  Bool()                     - Yields sequence {false, true}.
+//  Combine(g1, g2, ..., gN)   - Yields all combinations (the Cartesian product
+//                               for the math savvy) of the values generated
+//                               by the N generators.
+//
+// For more details, see comments at the definitions of these functions below
+// in this file.
+//
+// The following statement will instantiate tests from the FooTest test suite
+// each with parameter values "meeny", "miny", and "moe".
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_)};
-    return ValuesIn(array);
-  }
+INSTANTIATE_TEST_SUITE_P(InstantiationName,
+                         FooTest,
+                         Values("meeny", "miny", "moe"));
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray28& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-};
+// To distinguish different instances of the pattern, (yes, you
+// can instantiate it more than once) the first argument to the
+// INSTANTIATE_TEST_SUITE_P macro is a prefix that will be added to the
+// actual test suite name. Remember to pick unique prefixes for different
+// instantiations. The tests from the instantiation above will have
+// these names:
+//
+//    * InstantiationName/FooTest.DoesBlah/0 for "meeny"
+//    * InstantiationName/FooTest.DoesBlah/1 for "miny"
+//    * InstantiationName/FooTest.DoesBlah/2 for "moe"
+//    * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
+//    * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
+//    * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
+//
+// You can use these names in --gtest_filter.
+//
+// This statement will instantiate all tests from FooTest again, each
+// with parameter values "cat" and "dog":
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29>
-class ValueArray29 {
- public:
-  ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
-      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
-      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
-      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
-      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {}
+const char* pets[] = {"cat", "dog"};
+INSTANTIATE_TEST_SUITE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_)};
-    return ValuesIn(array);
-  }
+// The tests from the instantiation above will have these names:
+//
+//    * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
+//    * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
+//    * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
+//    * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
+//
+// Please note that INSTANTIATE_TEST_SUITE_P will instantiate all tests
+// in the given test suite, whether their definitions come before or
+// AFTER the INSTANTIATE_TEST_SUITE_P statement.
+//
+// Please also note that generator expressions (including parameters to the
+// generators) are evaluated in InitGoogleTest(), after main() has started.
+// This allows the user on one hand, to adjust generator parameters in order
+// to dynamically determine a set of tests to run and on the other hand,
+// give the user a chance to inspect the generated tests with Google Test
+// reflection API before RUN_ALL_TESTS() is executed.
+//
+// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
+// for more examples.
+//
+// In the future, we plan to publish the API for defining new parameter
+// generators. But for now this interface remains part of the internal
+// implementation and is subject to change.
+//
+//
+// A parameterized test fixture must be derived from testing::Test and from
+// testing::WithParamInterface<T>, where T is the type of the parameter
+// values. Inheriting from TestWithParam<T> satisfies that requirement because
+// TestWithParam<T> inherits from both Test and WithParamInterface. In more
+// complicated hierarchies, however, it is occasionally useful to inherit
+// separately from Test and WithParamInterface. For example:
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray29& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
+class BaseTest : public ::testing::Test {
+  // You can inherit all the usual members for a non-parameterized test
+  // fixture here.
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30>
-class ValueArray30 {
- public:
-  ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3),
-      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
-      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
-      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
-      v29_(v29), v30_(v30) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_)};
-    return ValuesIn(array);
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray30& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
+class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
+  // The usual test fixture members go here too.
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31>
-class ValueArray31 {
- public:
-  ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2),
-      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
-      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
-      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
-      v29_(v29), v30_(v30), v31_(v31) {}
+TEST_F(BaseTest, HasFoo) {
+  // This is an ordinary non-parameterized test.
+}
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_)};
-    return ValuesIn(array);
-  }
+TEST_P(DerivedTest, DoesBlah) {
+  // GetParam works just the same here as if you inherit from TestWithParam.
+  EXPECT_TRUE(foo.Blah(GetParam()));
+}
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray31& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-};
+#endif  // 0
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32>
-class ValueArray32 {
- public:
-  ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1),
-      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
-      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
-      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
-      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
-      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {}
+#include <iterator>
+#include <utility>
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_)};
-    return ValuesIn(array);
-  }
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray32& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-};
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33>
-class ValueArray33 {
- public:
-  ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
-      T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
-      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
-      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
-      v33_(v33) {}
+// Type and function utilities for implementing parameterized tests.
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_)};
-    return ValuesIn(array);
-  }
+// GOOGLETEST_CM0001 DO NOT DELETE
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray33& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-};
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34>
-class ValueArray34 {
- public:
-  ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
-      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
-      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
-      v33_(v33), v34_(v34) {}
+#include <ctype.h>
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_)};
-    return ValuesIn(array);
-  }
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <set>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray34& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-};
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35>
-class ValueArray35 {
- public:
-  ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
-      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
-      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
-      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
-      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
-      v32_(v32), v33_(v33), v34_(v34), v35_(v35) {}
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_)};
-    return ValuesIn(array);
-  }
+#include <iosfwd>
+#include <vector>
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray35& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-};
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36>
-class ValueArray36 {
- public:
-  ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
-      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
-      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
-      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
-      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
-      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {}
+namespace testing {
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_)};
-    return ValuesIn(array);
-  }
+// A copyable object representing the result of a test part (i.e. an
+// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
+//
+// Don't inherit from TestPartResult as its destructor is not virtual.
+class GTEST_API_ TestPartResult {
+ public:
+  // The possible outcomes of a test part (i.e. an assertion or an
+  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
+  enum Type {
+    kSuccess,          // Succeeded.
+    kNonFatalFailure,  // Failed but the test can continue.
+    kFatalFailure,     // Failed and the test should be terminated.
+    kSkip              // Skipped.
+  };
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray36& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-};
+  // C'tor.  TestPartResult does NOT have a default constructor.
+  // Always use this constructor (with parameters) to create a
+  // TestPartResult object.
+  TestPartResult(Type a_type, const char* a_file_name, int a_line_number,
+                 const char* a_message)
+      : type_(a_type),
+        file_name_(a_file_name == nullptr ? "" : a_file_name),
+        line_number_(a_line_number),
+        summary_(ExtractSummary(a_message)),
+        message_(a_message) {}
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37>
-class ValueArray37 {
- public:
-  ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
-      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
-      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
-      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
-      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
-      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
-      v36_(v36), v37_(v37) {}
+  // Gets the outcome of the test part.
+  Type type() const { return type_; }
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_)};
-    return ValuesIn(array);
+  // Gets the name of the source file where the test part took place, or
+  // NULL if it's unknown.
+  const char* file_name() const {
+    return file_name_.empty() ? nullptr : file_name_.c_str();
   }
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray37& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38>
-class ValueArray38 {
- public:
-  ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3),
-      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
-      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
-      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
-      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
-      v35_(v35), v36_(v36), v37_(v37), v38_(v38) {}
+  // Gets the line in the source file where the test part took place,
+  // or -1 if it's unknown.
+  int line_number() const { return line_number_; }
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_)};
-    return ValuesIn(array);
-  }
+  // Gets the summary of the failure message.
+  const char* summary() const { return summary_.c_str(); }
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray38& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-};
+  // Gets the message associated with the test part.
+  const char* message() const { return message_.c_str(); }
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39>
-class ValueArray39 {
- public:
-  ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2),
-      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
-      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
-      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
-      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
-      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {}
+  // Returns true if and only if the test part was skipped.
+  bool skipped() const { return type_ == kSkip; }
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_)};
-    return ValuesIn(array);
-  }
+  // Returns true if and only if the test part passed.
+  bool passed() const { return type_ == kSuccess; }
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray39& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-};
+  // Returns true if and only if the test part non-fatally failed.
+  bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40>
-class ValueArray40 {
- public:
-  ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1),
-      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
-      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
-      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
-      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
-      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
-      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
-      v40_(v40) {}
+  // Returns true if and only if the test part fatally failed.
+  bool fatally_failed() const { return type_ == kFatalFailure; }
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_)};
-    return ValuesIn(array);
-  }
+  // Returns true if and only if the test part failed.
+  bool failed() const { return fatally_failed() || nonfatally_failed(); }
 
  private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray40& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41>
-class ValueArray41 {
- public:
-  ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
-      T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
-      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
-      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
-      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
-      v39_(v39), v40_(v40), v41_(v41) {}
+  Type type_;
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_)};
-    return ValuesIn(array);
-  }
+  // Gets the summary of the failure message by omitting the stack
+  // trace in it.
+  static std::string ExtractSummary(const char* message);
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray41& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
+  // The name of the source file where the test part took place, or
+  // "" if the source file is unknown.
+  std::string file_name_;
+  // The line in the source file where the test part took place, or -1
+  // if the line number is unknown.
+  int line_number_;
+  std::string summary_;  // The test failure summary.
+  std::string message_;  // The test failure message.
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42>
-class ValueArray42 {
- public:
-  ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-      T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
-      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
-      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
-      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
-      v39_(v39), v40_(v40), v41_(v41), v42_(v42) {}
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
-        static_cast<T>(v42_)};
-    return ValuesIn(array);
-  }
+// An array of TestPartResult objects.
+//
+// Don't inherit from TestPartResultArray as its destructor is not
+// virtual.
+class GTEST_API_ TestPartResultArray {
+ public:
+  TestPartResultArray() {}
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray42& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
-  const T42 v42_;
-};
+  // Appends the given TestPartResult to the array.
+  void Append(const TestPartResult& result);
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43>
-class ValueArray43 {
- public:
-  ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-      T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
-      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
-      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
-      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
-      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
-      v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37),
-      v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {}
+  // Returns the TestPartResult at the given index (0-based).
+  const TestPartResult& GetTestPartResult(int index) const;
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
-        static_cast<T>(v42_), static_cast<T>(v43_)};
-    return ValuesIn(array);
-  }
+  // Returns the number of TestPartResult objects in the array.
+  int size() const;
 
  private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray43& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
-  const T42 v42_;
-  const T43 v43_;
+  std::vector<TestPartResult> array_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44>
-class ValueArray44 {
+// This interface knows how to report a test part result.
+class GTEST_API_ TestPartResultReporterInterface {
  public:
-  ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-      T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
-      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
-      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
-      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
-      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
-      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36),
-      v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42),
-      v43_(v43), v44_(v44) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
-        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_)};
-    return ValuesIn(array);
-  }
+  virtual ~TestPartResultReporterInterface() {}
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray44& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
-  const T42 v42_;
-  const T43 v43_;
-  const T44 v44_;
+  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45>
-class ValueArray45 {
- public:
-  ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-      T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
-      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
-      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
-      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
-      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
-      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
-      v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41),
-      v42_(v42), v43_(v43), v44_(v44), v45_(v45) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
-        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
-        static_cast<T>(v45_)};
-    return ValuesIn(array);
-  }
+namespace internal {
 
+// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
+// statement generates new fatal failures. To do so it registers itself as the
+// current test part result reporter. Besides checking if fatal failures were
+// reported, it only delegates the reporting to the former result reporter.
+// The original result reporter is restored in the destructor.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+class GTEST_API_ HasNewFatalFailureHelper
+    : public TestPartResultReporterInterface {
+ public:
+  HasNewFatalFailureHelper();
+  ~HasNewFatalFailureHelper() override;
+  void ReportTestPartResult(const TestPartResult& result) override;
+  bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
  private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray45& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
-  const T42 v42_;
-  const T43 v43_;
-  const T44 v44_;
-  const T45 v45_;
-};
+  bool has_new_fatal_failure_;
+  TestPartResultReporterInterface* original_reporter_;
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46>
-class ValueArray46 {
- public:
-  ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3),
-      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
-      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
-      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
-      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
-      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
-      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {}
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
+};
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
-        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
-        static_cast<T>(v45_), static_cast<T>(v46_)};
-    return ValuesIn(array);
-  }
+}  // namespace internal
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray46& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
-  const T42 v42_;
-  const T43 v43_;
-  const T44 v44_;
-  const T45 v45_;
-  const T46 v46_;
-};
+}  // namespace testing
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47>
-class ValueArray47 {
- public:
-  ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2),
-      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
-      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
-      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
-      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
-      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
-      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
-      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46),
-      v47_(v47) {}
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
-        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
-        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_)};
-    return ValuesIn(array);
-  }
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray47& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
-  const T42 v42_;
-  const T43 v43_;
-  const T44 v44_;
-  const T45 v45_;
-  const T46 v46_;
-  const T47 v47_;
+namespace testing {
+// Input to a parameterized test name generator, describing a test parameter.
+// Consists of the parameter value and the integer parameter index.
+template <class ParamType>
+struct TestParamInfo {
+  TestParamInfo(const ParamType& a_param, size_t an_index) :
+    param(a_param),
+    index(an_index) {}
+  ParamType param;
+  size_t index;
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48>
-class ValueArray48 {
- public:
-  ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1),
-      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
-      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
-      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
-      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
-      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
-      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
-      v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45),
-      v46_(v46), v47_(v47), v48_(v48) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
-        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
-        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
-        static_cast<T>(v48_)};
-    return ValuesIn(array);
+// A builtin parameterized test name generator which returns the result of
+// testing::PrintToString.
+struct PrintToStringParamName {
+  template <class ParamType>
+  std::string operator()(const TestParamInfo<ParamType>& info) const {
+    return PrintToString(info.param);
   }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray48& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
-  const T42 v42_;
-  const T43 v43_;
-  const T44 v44_;
-  const T45 v45_;
-  const T46 v46_;
-  const T47 v47_;
-  const T48 v48_;
 };
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49>
-class ValueArray49 {
- public:
-  ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48,
-      T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
-      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
-      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
-      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
-      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
-      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
-        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
-        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
-        static_cast<T>(v48_), static_cast<T>(v49_)};
-    return ValuesIn(array);
-  }
+namespace internal {
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray49& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
-  const T42 v42_;
-  const T43 v43_;
-  const T44 v44_;
-  const T45 v45_;
-  const T46 v46_;
-  const T47 v47_;
-  const T48 v48_;
-  const T49 v49_;
-};
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// Utility Functions
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49, typename T50>
-class ValueArray50 {
- public:
-  ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49,
-      T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
-      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
-      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
-      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
-      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
-      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
-      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
-      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {}
+// Outputs a message explaining invalid registration of different
+// fixture class for the same test suite. This may happen when
+// TEST_P macro is used to define two tests with the same name
+// but in different namespaces.
+GTEST_API_ void ReportInvalidTestSuiteType(const char* test_suite_name,
+                                           CodeLocation code_location);
 
-  template <typename T>
-  operator ParamGenerator<T>() const {
-    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
-        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
-        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
-        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
-        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
-        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
-        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
-        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
-        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
-        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
-        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
-        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
-        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
-        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
-        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
-        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
-        static_cast<T>(v48_), static_cast<T>(v49_), static_cast<T>(v50_)};
-    return ValuesIn(array);
-  }
+template <typename> class ParamGeneratorInterface;
+template <typename> class ParamGenerator;
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ValueArray50& other);
-
-  const T1 v1_;
-  const T2 v2_;
-  const T3 v3_;
-  const T4 v4_;
-  const T5 v5_;
-  const T6 v6_;
-  const T7 v7_;
-  const T8 v8_;
-  const T9 v9_;
-  const T10 v10_;
-  const T11 v11_;
-  const T12 v12_;
-  const T13 v13_;
-  const T14 v14_;
-  const T15 v15_;
-  const T16 v16_;
-  const T17 v17_;
-  const T18 v18_;
-  const T19 v19_;
-  const T20 v20_;
-  const T21 v21_;
-  const T22 v22_;
-  const T23 v23_;
-  const T24 v24_;
-  const T25 v25_;
-  const T26 v26_;
-  const T27 v27_;
-  const T28 v28_;
-  const T29 v29_;
-  const T30 v30_;
-  const T31 v31_;
-  const T32 v32_;
-  const T33 v33_;
-  const T34 v34_;
-  const T35 v35_;
-  const T36 v36_;
-  const T37 v37_;
-  const T38 v38_;
-  const T39 v39_;
-  const T40 v40_;
-  const T41 v41_;
-  const T42 v42_;
-  const T43 v43_;
-  const T44 v44_;
-  const T45 v45_;
-  const T46 v46_;
-  const T47 v47_;
-  const T48 v48_;
-  const T49 v49_;
-  const T50 v50_;
+// Interface for iterating over elements provided by an implementation
+// of ParamGeneratorInterface<T>.
+template <typename T>
+class ParamIteratorInterface {
+ public:
+  virtual ~ParamIteratorInterface() {}
+  // A pointer to the base generator instance.
+  // Used only for the purposes of iterator comparison
+  // to make sure that two iterators belong to the same generator.
+  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
+  // Advances iterator to point to the next element
+  // provided by the generator. The caller is responsible
+  // for not calling Advance() on an iterator equal to
+  // BaseGenerator()->End().
+  virtual void Advance() = 0;
+  // Clones the iterator object. Used for implementing copy semantics
+  // of ParamIterator<T>.
+  virtual ParamIteratorInterface* Clone() const = 0;
+  // Dereferences the current iterator and provides (read-only) access
+  // to the pointed value. It is the caller's responsibility not to call
+  // Current() on an iterator equal to BaseGenerator()->End().
+  // Used for implementing ParamGenerator<T>::operator*().
+  virtual const T* Current() const = 0;
+  // Determines whether the given iterator and other point to the same
+  // element in the sequence generated by the generator.
+  // Used for implementing ParamGenerator<T>::operator==().
+  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
 };
 
-# if GTEST_HAS_COMBINE
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Generates values from the Cartesian product of values produced
-// by the argument generators.
-//
-template <typename T1, typename T2>
-class CartesianProductGenerator2
-    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2> > {
+// Class iterating over elements provided by an implementation of
+// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
+// and implements the const forward iterator concept.
+template <typename T>
+class ParamIterator {
  public:
-  typedef ::std::tr1::tuple<T1, T2> ParamType;
+  typedef T value_type;
+  typedef const T& reference;
+  typedef ptrdiff_t difference_type;
 
-  CartesianProductGenerator2(const ParamGenerator<T1>& g1,
-      const ParamGenerator<T2>& g2)
-      : g1_(g1), g2_(g2) {}
-  virtual ~CartesianProductGenerator2() {}
+  // ParamIterator assumes ownership of the impl_ pointer.
+  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
+  ParamIterator& operator=(const ParamIterator& other) {
+    if (this != &other)
+      impl_.reset(other.impl_->Clone());
+    return *this;
+  }
 
-  virtual ParamIteratorInterface<ParamType>* Begin() const {
-    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin());
+  const T& operator*() const { return *impl_->Current(); }
+  const T* operator->() const { return impl_->Current(); }
+  // Prefix version of operator++.
+  ParamIterator& operator++() {
+    impl_->Advance();
+    return *this;
+  }
+  // Postfix version of operator++.
+  ParamIterator operator++(int /*unused*/) {
+    ParamIteratorInterface<T>* clone = impl_->Clone();
+    impl_->Advance();
+    return ParamIterator(clone);
+  }
+  bool operator==(const ParamIterator& other) const {
+    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
   }
-  virtual ParamIteratorInterface<ParamType>* End() const {
-    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end());
+  bool operator!=(const ParamIterator& other) const {
+    return !(*this == other);
   }
 
  private:
-  class Iterator : public ParamIteratorInterface<ParamType> {
-   public:
-    Iterator(const ParamGeneratorInterface<ParamType>* base,
-      const ParamGenerator<T1>& g1,
-      const typename ParamGenerator<T1>::iterator& current1,
-      const ParamGenerator<T2>& g2,
-      const typename ParamGenerator<T2>::iterator& current2)
-        : base_(base),
-          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
-          begin2_(g2.begin()), end2_(g2.end()), current2_(current2)    {
-      ComputeCurrentValue();
-    }
-    virtual ~Iterator() {}
-
-    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
-      return base_;
-    }
-    // Advance should not be called on beyond-of-range iterators
-    // so no component iterators must be beyond end of range, either.
-    virtual void Advance() {
-      assert(!AtEnd());
-      ++current2_;
-      if (current2_ == end2_) {
-        current2_ = begin2_;
-        ++current1_;
-      }
-      ComputeCurrentValue();
-    }
-    virtual ParamIteratorInterface<ParamType>* Clone() const {
-      return new Iterator(*this);
-    }
-    virtual const ParamType* Current() const { return &current_value_; }
-    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const Iterator* typed_other =
-          CheckedDowncastToActualType<const Iterator>(&other);
-      // We must report iterators equal if they both point beyond their
-      // respective ranges. That can happen in a variety of fashions,
-      // so we have to consult AtEnd().
-      return (AtEnd() && typed_other->AtEnd()) ||
-         (
-          current1_ == typed_other->current1_ &&
-          current2_ == typed_other->current2_);
-    }
+  friend class ParamGenerator<T>;
+  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
+  std::unique_ptr<ParamIteratorInterface<T> > impl_;
+};
 
-   private:
-    Iterator(const Iterator& other)
-        : base_(other.base_),
-        begin1_(other.begin1_),
-        end1_(other.end1_),
-        current1_(other.current1_),
-        begin2_(other.begin2_),
-        end2_(other.end2_),
-        current2_(other.current2_) {
-      ComputeCurrentValue();
-    }
+// ParamGeneratorInterface<T> is the binary interface to access generators
+// defined in other translation units.
+template <typename T>
+class ParamGeneratorInterface {
+ public:
+  typedef T ParamType;
 
-    void ComputeCurrentValue() {
-      if (!AtEnd())
-        current_value_ = ParamType(*current1_, *current2_);
-    }
-    bool AtEnd() const {
-      // We must report iterator past the end of the range when either of the
-      // component iterators has reached the end of its range.
-      return
-          current1_ == end1_ ||
-          current2_ == end2_;
-    }
+  virtual ~ParamGeneratorInterface() {}
 
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+  // Generator interface definition
+  virtual ParamIteratorInterface<T>* Begin() const = 0;
+  virtual ParamIteratorInterface<T>* End() const = 0;
+};
 
-    const ParamGeneratorInterface<ParamType>* const base_;
-    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
-    // current[i]_ is the actual traversing iterator.
-    const typename ParamGenerator<T1>::iterator begin1_;
-    const typename ParamGenerator<T1>::iterator end1_;
-    typename ParamGenerator<T1>::iterator current1_;
-    const typename ParamGenerator<T2>::iterator begin2_;
-    const typename ParamGenerator<T2>::iterator end2_;
-    typename ParamGenerator<T2>::iterator current2_;
-    ParamType current_value_;
-  };  // class CartesianProductGenerator2::Iterator
+// Wraps ParamGeneratorInterface<T> and provides general generator syntax
+// compatible with the STL Container concept.
+// This class implements copy initialization semantics and the contained
+// ParamGeneratorInterface<T> instance is shared among all copies
+// of the original object. This is possible because that instance is immutable.
+template<typename T>
+class ParamGenerator {
+ public:
+  typedef ParamIterator<T> iterator;
 
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator2& other);
+  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
+  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
 
-  const ParamGenerator<T1> g1_;
-  const ParamGenerator<T2> g2_;
-};  // class CartesianProductGenerator2
+  ParamGenerator& operator=(const ParamGenerator& other) {
+    impl_ = other.impl_;
+    return *this;
+  }
 
+  iterator begin() const { return iterator(impl_->Begin()); }
+  iterator end() const { return iterator(impl_->End()); }
 
-template <typename T1, typename T2, typename T3>
-class CartesianProductGenerator3
-    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3> > {
- public:
-  typedef ::std::tr1::tuple<T1, T2, T3> ParamType;
+ private:
+  std::shared_ptr<const ParamGeneratorInterface<T> > impl_;
+};
 
-  CartesianProductGenerator3(const ParamGenerator<T1>& g1,
-      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3)
-      : g1_(g1), g2_(g2), g3_(g3) {}
-  virtual ~CartesianProductGenerator3() {}
+// Generates values from a range of two comparable values. Can be used to
+// generate sequences of user-defined types that implement operator+() and
+// operator<().
+// This class is used in the Range() function.
+template <typename T, typename IncrementT>
+class RangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+  RangeGenerator(T begin, T end, IncrementT step)
+      : begin_(begin), end_(end),
+        step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
+  ~RangeGenerator() override {}
 
-  virtual ParamIteratorInterface<ParamType>* Begin() const {
-    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
-        g3_.begin());
+  ParamIteratorInterface<T>* Begin() const override {
+    return new Iterator(this, begin_, 0, step_);
   }
-  virtual ParamIteratorInterface<ParamType>* End() const {
-    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end());
+  ParamIteratorInterface<T>* End() const override {
+    return new Iterator(this, end_, end_index_, step_);
   }
 
  private:
-  class Iterator : public ParamIteratorInterface<ParamType> {
+  class Iterator : public ParamIteratorInterface<T> {
    public:
-    Iterator(const ParamGeneratorInterface<ParamType>* base,
-      const ParamGenerator<T1>& g1,
-      const typename ParamGenerator<T1>::iterator& current1,
-      const ParamGenerator<T2>& g2,
-      const typename ParamGenerator<T2>::iterator& current2,
-      const ParamGenerator<T3>& g3,
-      const typename ParamGenerator<T3>::iterator& current3)
-        : base_(base),
-          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
-          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
-          begin3_(g3.begin()), end3_(g3.end()), current3_(current3)    {
-      ComputeCurrentValue();
-    }
-    virtual ~Iterator() {}
+    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
+             IncrementT step)
+        : base_(base), value_(value), index_(index), step_(step) {}
+    ~Iterator() override {}
 
-    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+    const ParamGeneratorInterface<T>* BaseGenerator() const override {
       return base_;
     }
-    // Advance should not be called on beyond-of-range iterators
-    // so no component iterators must be beyond end of range, either.
-    virtual void Advance() {
-      assert(!AtEnd());
-      ++current3_;
-      if (current3_ == end3_) {
-        current3_ = begin3_;
-        ++current2_;
-      }
-      if (current2_ == end2_) {
-        current2_ = begin2_;
-        ++current1_;
-      }
-      ComputeCurrentValue();
+    void Advance() override {
+      value_ = static_cast<T>(value_ + step_);
+      index_++;
     }
-    virtual ParamIteratorInterface<ParamType>* Clone() const {
+    ParamIteratorInterface<T>* Clone() const override {
       return new Iterator(*this);
     }
-    virtual const ParamType* Current() const { return &current_value_; }
-    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+    const T* Current() const override { return &value_; }
+    bool Equals(const ParamIteratorInterface<T>& other) const override {
       // Having the same base generator guarantees that the other
       // iterator is of the same type and we can downcast.
       GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
           << "The program attempted to compare iterators "
           << "from different generators." << std::endl;
-      const Iterator* typed_other =
-          CheckedDowncastToActualType<const Iterator>(&other);
-      // We must report iterators equal if they both point beyond their
-      // respective ranges. That can happen in a variety of fashions,
-      // so we have to consult AtEnd().
-      return (AtEnd() && typed_other->AtEnd()) ||
-         (
-          current1_ == typed_other->current1_ &&
-          current2_ == typed_other->current2_ &&
-          current3_ == typed_other->current3_);
+      const int other_index =
+          CheckedDowncastToActualType<const Iterator>(&other)->index_;
+      return index_ == other_index;
     }
 
    private:
     Iterator(const Iterator& other)
-        : base_(other.base_),
-        begin1_(other.begin1_),
-        end1_(other.end1_),
-        current1_(other.current1_),
-        begin2_(other.begin2_),
-        end2_(other.end2_),
-        current2_(other.current2_),
-        begin3_(other.begin3_),
-        end3_(other.end3_),
-        current3_(other.current3_) {
-      ComputeCurrentValue();
-    }
-
-    void ComputeCurrentValue() {
-      if (!AtEnd())
-        current_value_ = ParamType(*current1_, *current2_, *current3_);
-    }
-    bool AtEnd() const {
-      // We must report iterator past the end of the range when either of the
-      // component iterators has reached the end of its range.
-      return
-          current1_ == end1_ ||
-          current2_ == end2_ ||
-          current3_ == end3_;
-    }
+        : ParamIteratorInterface<T>(),
+          base_(other.base_), value_(other.value_), index_(other.index_),
+          step_(other.step_) {}
 
     // No implementation - assignment is unsupported.
     void operator=(const Iterator& other);
 
-    const ParamGeneratorInterface<ParamType>* const base_;
-    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
-    // current[i]_ is the actual traversing iterator.
-    const typename ParamGenerator<T1>::iterator begin1_;
-    const typename ParamGenerator<T1>::iterator end1_;
-    typename ParamGenerator<T1>::iterator current1_;
-    const typename ParamGenerator<T2>::iterator begin2_;
-    const typename ParamGenerator<T2>::iterator end2_;
-    typename ParamGenerator<T2>::iterator current2_;
-    const typename ParamGenerator<T3>::iterator begin3_;
-    const typename ParamGenerator<T3>::iterator end3_;
-    typename ParamGenerator<T3>::iterator current3_;
-    ParamType current_value_;
-  };  // class CartesianProductGenerator3::Iterator
+    const ParamGeneratorInterface<T>* const base_;
+    T value_;
+    int index_;
+    const IncrementT step_;
+  };  // class RangeGenerator::Iterator
+
+  static int CalculateEndIndex(const T& begin,
+                               const T& end,
+                               const IncrementT& step) {
+    int end_index = 0;
+    for (T i = begin; i < end; i = static_cast<T>(i + step))
+      end_index++;
+    return end_index;
+  }
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator3& other);
+  void operator=(const RangeGenerator& other);
 
-  const ParamGenerator<T1> g1_;
-  const ParamGenerator<T2> g2_;
-  const ParamGenerator<T3> g3_;
-};  // class CartesianProductGenerator3
+  const T begin_;
+  const T end_;
+  const IncrementT step_;
+  // The index for the end() iterator. All the elements in the generated
+  // sequence are indexed (0-based) to aid iterator comparison.
+  const int end_index_;
+};  // class RangeGenerator
 
 
-template <typename T1, typename T2, typename T3, typename T4>
-class CartesianProductGenerator4
-    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4> > {
+// Generates values from a pair of STL-style iterators. Used in the
+// ValuesIn() function. The elements are copied from the source range
+// since the source can be located on the stack, and the generator
+// is likely to persist beyond that stack frame.
+template <typename T>
+class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
  public:
-  typedef ::std::tr1::tuple<T1, T2, T3, T4> ParamType;
-
-  CartesianProductGenerator4(const ParamGenerator<T1>& g1,
-      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
-      const ParamGenerator<T4>& g4)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
-  virtual ~CartesianProductGenerator4() {}
+  template <typename ForwardIterator>
+  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
+      : container_(begin, end) {}
+  ~ValuesInIteratorRangeGenerator() override {}
 
-  virtual ParamIteratorInterface<ParamType>* Begin() const {
-    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
-        g3_.begin(), g4_, g4_.begin());
+  ParamIteratorInterface<T>* Begin() const override {
+    return new Iterator(this, container_.begin());
   }
-  virtual ParamIteratorInterface<ParamType>* End() const {
-    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
-        g4_, g4_.end());
+  ParamIteratorInterface<T>* End() const override {
+    return new Iterator(this, container_.end());
   }
 
  private:
-  class Iterator : public ParamIteratorInterface<ParamType> {
+  typedef typename ::std::vector<T> ContainerType;
+
+  class Iterator : public ParamIteratorInterface<T> {
    public:
-    Iterator(const ParamGeneratorInterface<ParamType>* base,
-      const ParamGenerator<T1>& g1,
-      const typename ParamGenerator<T1>::iterator& current1,
-      const ParamGenerator<T2>& g2,
-      const typename ParamGenerator<T2>::iterator& current2,
-      const ParamGenerator<T3>& g3,
-      const typename ParamGenerator<T3>::iterator& current3,
-      const ParamGenerator<T4>& g4,
-      const typename ParamGenerator<T4>::iterator& current4)
-        : base_(base),
-          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
-          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
-          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
-          begin4_(g4.begin()), end4_(g4.end()), current4_(current4)    {
-      ComputeCurrentValue();
-    }
-    virtual ~Iterator() {}
+    Iterator(const ParamGeneratorInterface<T>* base,
+             typename ContainerType::const_iterator iterator)
+        : base_(base), iterator_(iterator) {}
+    ~Iterator() override {}
 
-    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+    const ParamGeneratorInterface<T>* BaseGenerator() const override {
       return base_;
     }
-    // Advance should not be called on beyond-of-range iterators
-    // so no component iterators must be beyond end of range, either.
-    virtual void Advance() {
-      assert(!AtEnd());
-      ++current4_;
-      if (current4_ == end4_) {
-        current4_ = begin4_;
-        ++current3_;
-      }
-      if (current3_ == end3_) {
-        current3_ = begin3_;
-        ++current2_;
-      }
-      if (current2_ == end2_) {
-        current2_ = begin2_;
-        ++current1_;
-      }
-      ComputeCurrentValue();
+    void Advance() override {
+      ++iterator_;
+      value_.reset();
     }
-    virtual ParamIteratorInterface<ParamType>* Clone() const {
+    ParamIteratorInterface<T>* Clone() const override {
       return new Iterator(*this);
     }
-    virtual const ParamType* Current() const { return &current_value_; }
-    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+    // We need to use cached value referenced by iterator_ because *iterator_
+    // can return a temporary object (and of type other then T), so just
+    // having "return &*iterator_;" doesn't work.
+    // value_ is updated here and not in Advance() because Advance()
+    // can advance iterator_ beyond the end of the range, and we cannot
+    // detect that fact. The client code, on the other hand, is
+    // responsible for not calling Current() on an out-of-range iterator.
+    const T* Current() const override {
+      if (value_.get() == nullptr) value_.reset(new T(*iterator_));
+      return value_.get();
+    }
+    bool Equals(const ParamIteratorInterface<T>& other) const override {
       // Having the same base generator guarantees that the other
       // iterator is of the same type and we can downcast.
       GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
           << "The program attempted to compare iterators "
           << "from different generators." << std::endl;
-      const Iterator* typed_other =
-          CheckedDowncastToActualType<const Iterator>(&other);
-      // We must report iterators equal if they both point beyond their
-      // respective ranges. That can happen in a variety of fashions,
-      // so we have to consult AtEnd().
-      return (AtEnd() && typed_other->AtEnd()) ||
-         (
-          current1_ == typed_other->current1_ &&
-          current2_ == typed_other->current2_ &&
-          current3_ == typed_other->current3_ &&
-          current4_ == typed_other->current4_);
+      return iterator_ ==
+          CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
     }
 
    private:
     Iterator(const Iterator& other)
-        : base_(other.base_),
-        begin1_(other.begin1_),
-        end1_(other.end1_),
-        current1_(other.current1_),
-        begin2_(other.begin2_),
-        end2_(other.end2_),
-        current2_(other.current2_),
-        begin3_(other.begin3_),
-        end3_(other.end3_),
-        current3_(other.current3_),
-        begin4_(other.begin4_),
-        end4_(other.end4_),
-        current4_(other.current4_) {
-      ComputeCurrentValue();
-    }
+          // The explicit constructor call suppresses a false warning
+          // emitted by gcc when supplied with the -Wextra option.
+        : ParamIteratorInterface<T>(),
+          base_(other.base_),
+          iterator_(other.iterator_) {}
 
-    void ComputeCurrentValue() {
-      if (!AtEnd())
-        current_value_ = ParamType(*current1_, *current2_, *current3_,
-            *current4_);
-    }
-    bool AtEnd() const {
-      // We must report iterator past the end of the range when either of the
-      // component iterators has reached the end of its range.
-      return
-          current1_ == end1_ ||
-          current2_ == end2_ ||
-          current3_ == end3_ ||
-          current4_ == end4_;
-    }
+    const ParamGeneratorInterface<T>* const base_;
+    typename ContainerType::const_iterator iterator_;
+    // A cached value of *iterator_. We keep it here to allow access by
+    // pointer in the wrapping iterator's operator->().
+    // value_ needs to be mutable to be accessed in Current().
+    // Use of std::unique_ptr helps manage cached value's lifetime,
+    // which is bound by the lifespan of the iterator itself.
+    mutable std::unique_ptr<const T> value_;
+  };  // class ValuesInIteratorRangeGenerator::Iterator
 
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+  // No implementation - assignment is unsupported.
+  void operator=(const ValuesInIteratorRangeGenerator& other);
 
-    const ParamGeneratorInterface<ParamType>* const base_;
-    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
-    // current[i]_ is the actual traversing iterator.
-    const typename ParamGenerator<T1>::iterator begin1_;
-    const typename ParamGenerator<T1>::iterator end1_;
-    typename ParamGenerator<T1>::iterator current1_;
-    const typename ParamGenerator<T2>::iterator begin2_;
-    const typename ParamGenerator<T2>::iterator end2_;
-    typename ParamGenerator<T2>::iterator current2_;
-    const typename ParamGenerator<T3>::iterator begin3_;
-    const typename ParamGenerator<T3>::iterator end3_;
-    typename ParamGenerator<T3>::iterator current3_;
-    const typename ParamGenerator<T4>::iterator begin4_;
-    const typename ParamGenerator<T4>::iterator end4_;
-    typename ParamGenerator<T4>::iterator current4_;
-    ParamType current_value_;
-  };  // class CartesianProductGenerator4::Iterator
+  const ContainerType container_;
+};  // class ValuesInIteratorRangeGenerator
 
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator4& other);
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Default parameterized test name generator, returns a string containing the
+// integer test parameter index.
+template <class ParamType>
+std::string DefaultParamName(const TestParamInfo<ParamType>& info) {
+  Message name_stream;
+  name_stream << info.index;
+  return name_stream.GetString();
+}
+
+template <typename T = int>
+void TestNotEmpty() {
+  static_assert(sizeof(T) == 0, "Empty arguments are not allowed.");
+}
+template <typename T = int>
+void TestNotEmpty(const T&) {}
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Stores a parameter value and later creates tests parameterized with that
+// value.
+template <class TestClass>
+class ParameterizedTestFactory : public TestFactoryBase {
+ public:
+  typedef typename TestClass::ParamType ParamType;
+  explicit ParameterizedTestFactory(ParamType parameter) :
+      parameter_(parameter) {}
+  Test* CreateTest() override {
+    TestClass::SetParam(&parameter_);
+    return new TestClass();
+  }
+
+ private:
+  const ParamType parameter_;
 
-  const ParamGenerator<T1> g1_;
-  const ParamGenerator<T2> g2_;
-  const ParamGenerator<T3> g3_;
-  const ParamGenerator<T4> g4_;
-};  // class CartesianProductGenerator4
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactoryBase is a base class for meta-factories that create
+// test factories for passing into MakeAndRegisterTestInfo function.
+template <class ParamType>
+class TestMetaFactoryBase {
+ public:
+  virtual ~TestMetaFactoryBase() {}
 
+  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
+};
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-class CartesianProductGenerator5
-    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5> > {
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactory creates test factories for passing into
+// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
+// ownership of test factory pointer, same factory object cannot be passed
+// into that method twice. But ParameterizedTestSuiteInfo is going to call
+// it for each Test/Parameter value combination. Thus it needs meta factory
+// creator class.
+template <class TestSuite>
+class TestMetaFactory
+    : public TestMetaFactoryBase<typename TestSuite::ParamType> {
  public:
-  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5> ParamType;
+  using ParamType = typename TestSuite::ParamType;
 
-  CartesianProductGenerator5(const ParamGenerator<T1>& g1,
-      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
-      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
-  virtual ~CartesianProductGenerator5() {}
+  TestMetaFactory() {}
 
-  virtual ParamIteratorInterface<ParamType>* Begin() const {
-    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
-        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin());
-  }
-  virtual ParamIteratorInterface<ParamType>* End() const {
-    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
-        g4_, g4_.end(), g5_, g5_.end());
+  TestFactoryBase* CreateTestFactory(ParamType parameter) override {
+    return new ParameterizedTestFactory<TestSuite>(parameter);
   }
 
  private:
-  class Iterator : public ParamIteratorInterface<ParamType> {
-   public:
-    Iterator(const ParamGeneratorInterface<ParamType>* base,
-      const ParamGenerator<T1>& g1,
-      const typename ParamGenerator<T1>::iterator& current1,
-      const ParamGenerator<T2>& g2,
-      const typename ParamGenerator<T2>::iterator& current2,
-      const ParamGenerator<T3>& g3,
-      const typename ParamGenerator<T3>::iterator& current3,
-      const ParamGenerator<T4>& g4,
-      const typename ParamGenerator<T4>::iterator& current4,
-      const ParamGenerator<T5>& g5,
-      const typename ParamGenerator<T5>::iterator& current5)
-        : base_(base),
-          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
-          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
-          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
-          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
-          begin5_(g5.begin()), end5_(g5.end()), current5_(current5)    {
-      ComputeCurrentValue();
-    }
-    virtual ~Iterator() {}
-
-    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
-      return base_;
-    }
-    // Advance should not be called on beyond-of-range iterators
-    // so no component iterators must be beyond end of range, either.
-    virtual void Advance() {
-      assert(!AtEnd());
-      ++current5_;
-      if (current5_ == end5_) {
-        current5_ = begin5_;
-        ++current4_;
-      }
-      if (current4_ == end4_) {
-        current4_ = begin4_;
-        ++current3_;
-      }
-      if (current3_ == end3_) {
-        current3_ = begin3_;
-        ++current2_;
-      }
-      if (current2_ == end2_) {
-        current2_ = begin2_;
-        ++current1_;
-      }
-      ComputeCurrentValue();
-    }
-    virtual ParamIteratorInterface<ParamType>* Clone() const {
-      return new Iterator(*this);
-    }
-    virtual const ParamType* Current() const { return &current_value_; }
-    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const Iterator* typed_other =
-          CheckedDowncastToActualType<const Iterator>(&other);
-      // We must report iterators equal if they both point beyond their
-      // respective ranges. That can happen in a variety of fashions,
-      // so we have to consult AtEnd().
-      return (AtEnd() && typed_other->AtEnd()) ||
-         (
-          current1_ == typed_other->current1_ &&
-          current2_ == typed_other->current2_ &&
-          current3_ == typed_other->current3_ &&
-          current4_ == typed_other->current4_ &&
-          current5_ == typed_other->current5_);
-    }
-
-   private:
-    Iterator(const Iterator& other)
-        : base_(other.base_),
-        begin1_(other.begin1_),
-        end1_(other.end1_),
-        current1_(other.current1_),
-        begin2_(other.begin2_),
-        end2_(other.end2_),
-        current2_(other.current2_),
-        begin3_(other.begin3_),
-        end3_(other.end3_),
-        current3_(other.current3_),
-        begin4_(other.begin4_),
-        end4_(other.end4_),
-        current4_(other.current4_),
-        begin5_(other.begin5_),
-        end5_(other.end5_),
-        current5_(other.current5_) {
-      ComputeCurrentValue();
-    }
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
+};
 
-    void ComputeCurrentValue() {
-      if (!AtEnd())
-        current_value_ = ParamType(*current1_, *current2_, *current3_,
-            *current4_, *current5_);
-    }
-    bool AtEnd() const {
-      // We must report iterator past the end of the range when either of the
-      // component iterators has reached the end of its range.
-      return
-          current1_ == end1_ ||
-          current2_ == end2_ ||
-          current3_ == end3_ ||
-          current4_ == end4_ ||
-          current5_ == end5_;
-    }
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestSuiteInfoBase is a generic interface
+// to ParameterizedTestSuiteInfo classes. ParameterizedTestSuiteInfoBase
+// accumulates test information provided by TEST_P macro invocations
+// and generators provided by INSTANTIATE_TEST_SUITE_P macro invocations
+// and uses that information to register all resulting test instances
+// in RegisterTests method. The ParameterizeTestSuiteRegistry class holds
+// a collection of pointers to the ParameterizedTestSuiteInfo objects
+// and calls RegisterTests() on each of them when asked.
+class ParameterizedTestSuiteInfoBase {
+ public:
+  virtual ~ParameterizedTestSuiteInfoBase() {}
 
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+  // Base part of test suite name for display purposes.
+  virtual const std::string& GetTestSuiteName() const = 0;
+  // Test suite id to verify identity.
+  virtual TypeId GetTestSuiteTypeId() const = 0;
+  // UnitTest class invokes this method to register tests in this
+  // test suite right before running them in RUN_ALL_TESTS macro.
+  // This method should not be called more than once on any single
+  // instance of a ParameterizedTestSuiteInfoBase derived class.
+  virtual void RegisterTests() = 0;
 
-    const ParamGeneratorInterface<ParamType>* const base_;
-    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
-    // current[i]_ is the actual traversing iterator.
-    const typename ParamGenerator<T1>::iterator begin1_;
-    const typename ParamGenerator<T1>::iterator end1_;
-    typename ParamGenerator<T1>::iterator current1_;
-    const typename ParamGenerator<T2>::iterator begin2_;
-    const typename ParamGenerator<T2>::iterator end2_;
-    typename ParamGenerator<T2>::iterator current2_;
-    const typename ParamGenerator<T3>::iterator begin3_;
-    const typename ParamGenerator<T3>::iterator end3_;
-    typename ParamGenerator<T3>::iterator current3_;
-    const typename ParamGenerator<T4>::iterator begin4_;
-    const typename ParamGenerator<T4>::iterator end4_;
-    typename ParamGenerator<T4>::iterator current4_;
-    const typename ParamGenerator<T5>::iterator begin5_;
-    const typename ParamGenerator<T5>::iterator end5_;
-    typename ParamGenerator<T5>::iterator current5_;
-    ParamType current_value_;
-  };  // class CartesianProductGenerator5::Iterator
+ protected:
+  ParameterizedTestSuiteInfoBase() {}
 
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator5& other);
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteInfoBase);
+};
 
-  const ParamGenerator<T1> g1_;
-  const ParamGenerator<T2> g2_;
-  const ParamGenerator<T3> g3_;
-  const ParamGenerator<T4> g4_;
-  const ParamGenerator<T5> g5_;
-};  // class CartesianProductGenerator5
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Report a the name of a test_suit as safe to ignore
+// as the side effect of construction of this type.
+struct GTEST_API_ MarkAsIgnored {
+  explicit MarkAsIgnored(const char* test_suite);
+};
 
+GTEST_API_ void InsertSyntheticTestCase(const std::string& name,
+                                        CodeLocation location, bool has_test_p);
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6>
-class CartesianProductGenerator6
-    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5,
-        T6> > {
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestSuiteInfo accumulates tests obtained from TEST_P
+// macro invocations for a particular test suite and generators
+// obtained from INSTANTIATE_TEST_SUITE_P macro invocations for that
+// test suite. It registers tests with all values generated by all
+// generators when asked.
+template <class TestSuite>
+class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
  public:
-  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> ParamType;
-
-  CartesianProductGenerator6(const ParamGenerator<T1>& g1,
-      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
-      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
-      const ParamGenerator<T6>& g6)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
-  virtual ~CartesianProductGenerator6() {}
-
-  virtual ParamIteratorInterface<ParamType>* Begin() const {
-    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
-        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin());
+  // ParamType and GeneratorCreationFunc are private types but are required
+  // for declarations of public methods AddTestPattern() and
+  // AddTestSuiteInstantiation().
+  using ParamType = typename TestSuite::ParamType;
+  // A function that returns an instance of appropriate generator type.
+  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
+  using ParamNameGeneratorFunc = std::string(const TestParamInfo<ParamType>&);
+
+  explicit ParameterizedTestSuiteInfo(const char* name,
+                                      CodeLocation code_location)
+      : test_suite_name_(name), code_location_(code_location) {}
+
+  // Test suite base name for display purposes.
+  const std::string& GetTestSuiteName() const override {
+    return test_suite_name_;
   }
-  virtual ParamIteratorInterface<ParamType>* End() const {
-    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
-        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end());
+  // Test suite id to verify identity.
+  TypeId GetTestSuiteTypeId() const override { return GetTypeId<TestSuite>(); }
+  // TEST_P macro uses AddTestPattern() to record information
+  // about a single test in a LocalTestInfo structure.
+  // test_suite_name is the base name of the test suite (without invocation
+  // prefix). test_base_name is the name of an individual test without
+  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
+  // test suite base name and DoBar is test base name.
+  void AddTestPattern(const char* test_suite_name, const char* test_base_name,
+                      TestMetaFactoryBase<ParamType>* meta_factory,
+                      CodeLocation code_location) {
+    tests_.push_back(std::shared_ptr<TestInfo>(new TestInfo(
+        test_suite_name, test_base_name, meta_factory, code_location)));
+  }
+  // INSTANTIATE_TEST_SUITE_P macro uses AddGenerator() to record information
+  // about a generator.
+  int AddTestSuiteInstantiation(const std::string& instantiation_name,
+                                GeneratorCreationFunc* func,
+                                ParamNameGeneratorFunc* name_func,
+                                const char* file, int line) {
+    instantiations_.push_back(
+        InstantiationInfo(instantiation_name, func, name_func, file, line));
+    return 0;  // Return value used only to run this method in namespace scope.
   }
+  // UnitTest class invokes this method to register tests in this test suite
+  // right before running tests in RUN_ALL_TESTS macro.
+  // This method should not be called more than once on any single
+  // instance of a ParameterizedTestSuiteInfoBase derived class.
+  // UnitTest has a guard to prevent from calling this method more than once.
+  void RegisterTests() override {
+    bool generated_instantiations = false;
 
- private:
-  class Iterator : public ParamIteratorInterface<ParamType> {
-   public:
-    Iterator(const ParamGeneratorInterface<ParamType>* base,
-      const ParamGenerator<T1>& g1,
-      const typename ParamGenerator<T1>::iterator& current1,
-      const ParamGenerator<T2>& g2,
-      const typename ParamGenerator<T2>::iterator& current2,
-      const ParamGenerator<T3>& g3,
-      const typename ParamGenerator<T3>::iterator& current3,
-      const ParamGenerator<T4>& g4,
-      const typename ParamGenerator<T4>::iterator& current4,
-      const ParamGenerator<T5>& g5,
-      const typename ParamGenerator<T5>::iterator& current5,
-      const ParamGenerator<T6>& g6,
-      const typename ParamGenerator<T6>::iterator& current6)
-        : base_(base),
-          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
-          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
-          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
-          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
-          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
-          begin6_(g6.begin()), end6_(g6.end()), current6_(current6)    {
-      ComputeCurrentValue();
-    }
-    virtual ~Iterator() {}
+    for (typename TestInfoContainer::iterator test_it = tests_.begin();
+         test_it != tests_.end(); ++test_it) {
+      std::shared_ptr<TestInfo> test_info = *test_it;
+      for (typename InstantiationContainer::iterator gen_it =
+               instantiations_.begin(); gen_it != instantiations_.end();
+               ++gen_it) {
+        const std::string& instantiation_name = gen_it->name;
+        ParamGenerator<ParamType> generator((*gen_it->generator)());
+        ParamNameGeneratorFunc* name_func = gen_it->name_func;
+        const char* file = gen_it->file;
+        int line = gen_it->line;
 
-    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
-      return base_;
-    }
-    // Advance should not be called on beyond-of-range iterators
-    // so no component iterators must be beyond end of range, either.
-    virtual void Advance() {
-      assert(!AtEnd());
-      ++current6_;
-      if (current6_ == end6_) {
-        current6_ = begin6_;
-        ++current5_;
-      }
-      if (current5_ == end5_) {
-        current5_ = begin5_;
-        ++current4_;
-      }
-      if (current4_ == end4_) {
-        current4_ = begin4_;
-        ++current3_;
-      }
-      if (current3_ == end3_) {
-        current3_ = begin3_;
-        ++current2_;
-      }
-      if (current2_ == end2_) {
-        current2_ = begin2_;
-        ++current1_;
-      }
-      ComputeCurrentValue();
-    }
-    virtual ParamIteratorInterface<ParamType>* Clone() const {
-      return new Iterator(*this);
-    }
-    virtual const ParamType* Current() const { return &current_value_; }
-    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const Iterator* typed_other =
-          CheckedDowncastToActualType<const Iterator>(&other);
-      // We must report iterators equal if they both point beyond their
-      // respective ranges. That can happen in a variety of fashions,
-      // so we have to consult AtEnd().
-      return (AtEnd() && typed_other->AtEnd()) ||
-         (
-          current1_ == typed_other->current1_ &&
-          current2_ == typed_other->current2_ &&
-          current3_ == typed_other->current3_ &&
-          current4_ == typed_other->current4_ &&
-          current5_ == typed_other->current5_ &&
-          current6_ == typed_other->current6_);
-    }
+        std::string test_suite_name;
+        if ( !instantiation_name.empty() )
+          test_suite_name = instantiation_name + "/";
+        test_suite_name += test_info->test_suite_base_name;
 
-   private:
-    Iterator(const Iterator& other)
-        : base_(other.base_),
-        begin1_(other.begin1_),
-        end1_(other.end1_),
-        current1_(other.current1_),
-        begin2_(other.begin2_),
-        end2_(other.end2_),
-        current2_(other.current2_),
-        begin3_(other.begin3_),
-        end3_(other.end3_),
-        current3_(other.current3_),
-        begin4_(other.begin4_),
-        end4_(other.end4_),
-        current4_(other.current4_),
-        begin5_(other.begin5_),
-        end5_(other.end5_),
-        current5_(other.current5_),
-        begin6_(other.begin6_),
-        end6_(other.end6_),
-        current6_(other.current6_) {
-      ComputeCurrentValue();
-    }
+        size_t i = 0;
+        std::set<std::string> test_param_names;
+        for (typename ParamGenerator<ParamType>::iterator param_it =
+                 generator.begin();
+             param_it != generator.end(); ++param_it, ++i) {
+          generated_instantiations = true;
 
-    void ComputeCurrentValue() {
-      if (!AtEnd())
-        current_value_ = ParamType(*current1_, *current2_, *current3_,
-            *current4_, *current5_, *current6_);
-    }
-    bool AtEnd() const {
-      // We must report iterator past the end of the range when either of the
-      // component iterators has reached the end of its range.
-      return
-          current1_ == end1_ ||
-          current2_ == end2_ ||
-          current3_ == end3_ ||
-          current4_ == end4_ ||
-          current5_ == end5_ ||
-          current6_ == end6_;
-    }
+          Message test_name_stream;
 
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+          std::string param_name = name_func(
+              TestParamInfo<ParamType>(*param_it, i));
 
-    const ParamGeneratorInterface<ParamType>* const base_;
-    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
-    // current[i]_ is the actual traversing iterator.
-    const typename ParamGenerator<T1>::iterator begin1_;
-    const typename ParamGenerator<T1>::iterator end1_;
-    typename ParamGenerator<T1>::iterator current1_;
-    const typename ParamGenerator<T2>::iterator begin2_;
-    const typename ParamGenerator<T2>::iterator end2_;
-    typename ParamGenerator<T2>::iterator current2_;
-    const typename ParamGenerator<T3>::iterator begin3_;
-    const typename ParamGenerator<T3>::iterator end3_;
-    typename ParamGenerator<T3>::iterator current3_;
-    const typename ParamGenerator<T4>::iterator begin4_;
-    const typename ParamGenerator<T4>::iterator end4_;
-    typename ParamGenerator<T4>::iterator current4_;
-    const typename ParamGenerator<T5>::iterator begin5_;
-    const typename ParamGenerator<T5>::iterator end5_;
-    typename ParamGenerator<T5>::iterator current5_;
-    const typename ParamGenerator<T6>::iterator begin6_;
-    const typename ParamGenerator<T6>::iterator end6_;
-    typename ParamGenerator<T6>::iterator current6_;
-    ParamType current_value_;
-  };  // class CartesianProductGenerator6::Iterator
+          GTEST_CHECK_(IsValidParamName(param_name))
+              << "Parameterized test name '" << param_name
+              << "' is invalid, in " << file
+              << " line " << line << std::endl;
 
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator6& other);
-
-  const ParamGenerator<T1> g1_;
-  const ParamGenerator<T2> g2_;
-  const ParamGenerator<T3> g3_;
-  const ParamGenerator<T4> g4_;
-  const ParamGenerator<T5> g5_;
-  const ParamGenerator<T6> g6_;
-};  // class CartesianProductGenerator6
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7>
-class CartesianProductGenerator7
-    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
-        T7> > {
- public:
-  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> ParamType;
-
-  CartesianProductGenerator7(const ParamGenerator<T1>& g1,
-      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
-      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
-      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
-  virtual ~CartesianProductGenerator7() {}
-
-  virtual ParamIteratorInterface<ParamType>* Begin() const {
-    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
-        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
-        g7_.begin());
-  }
-  virtual ParamIteratorInterface<ParamType>* End() const {
-    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
-        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end());
-  }
+          GTEST_CHECK_(test_param_names.count(param_name) == 0)
+              << "Duplicate parameterized test name '" << param_name
+              << "', in " << file << " line " << line << std::endl;
 
- private:
-  class Iterator : public ParamIteratorInterface<ParamType> {
-   public:
-    Iterator(const ParamGeneratorInterface<ParamType>* base,
-      const ParamGenerator<T1>& g1,
-      const typename ParamGenerator<T1>::iterator& current1,
-      const ParamGenerator<T2>& g2,
-      const typename ParamGenerator<T2>::iterator& current2,
-      const ParamGenerator<T3>& g3,
-      const typename ParamGenerator<T3>::iterator& current3,
-      const ParamGenerator<T4>& g4,
-      const typename ParamGenerator<T4>::iterator& current4,
-      const ParamGenerator<T5>& g5,
-      const typename ParamGenerator<T5>::iterator& current5,
-      const ParamGenerator<T6>& g6,
-      const typename ParamGenerator<T6>::iterator& current6,
-      const ParamGenerator<T7>& g7,
-      const typename ParamGenerator<T7>::iterator& current7)
-        : base_(base),
-          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
-          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
-          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
-          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
-          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
-          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
-          begin7_(g7.begin()), end7_(g7.end()), current7_(current7)    {
-      ComputeCurrentValue();
-    }
-    virtual ~Iterator() {}
+          test_param_names.insert(param_name);
 
-    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
-      return base_;
-    }
-    // Advance should not be called on beyond-of-range iterators
-    // so no component iterators must be beyond end of range, either.
-    virtual void Advance() {
-      assert(!AtEnd());
-      ++current7_;
-      if (current7_ == end7_) {
-        current7_ = begin7_;
-        ++current6_;
-      }
-      if (current6_ == end6_) {
-        current6_ = begin6_;
-        ++current5_;
-      }
-      if (current5_ == end5_) {
-        current5_ = begin5_;
-        ++current4_;
-      }
-      if (current4_ == end4_) {
-        current4_ = begin4_;
-        ++current3_;
-      }
-      if (current3_ == end3_) {
-        current3_ = begin3_;
-        ++current2_;
-      }
-      if (current2_ == end2_) {
-        current2_ = begin2_;
-        ++current1_;
-      }
-      ComputeCurrentValue();
-    }
-    virtual ParamIteratorInterface<ParamType>* Clone() const {
-      return new Iterator(*this);
-    }
-    virtual const ParamType* Current() const { return &current_value_; }
-    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const Iterator* typed_other =
-          CheckedDowncastToActualType<const Iterator>(&other);
-      // We must report iterators equal if they both point beyond their
-      // respective ranges. That can happen in a variety of fashions,
-      // so we have to consult AtEnd().
-      return (AtEnd() && typed_other->AtEnd()) ||
-         (
-          current1_ == typed_other->current1_ &&
-          current2_ == typed_other->current2_ &&
-          current3_ == typed_other->current3_ &&
-          current4_ == typed_other->current4_ &&
-          current5_ == typed_other->current5_ &&
-          current6_ == typed_other->current6_ &&
-          current7_ == typed_other->current7_);
-    }
+          if (!test_info->test_base_name.empty()) {
+            test_name_stream << test_info->test_base_name << "/";
+          }
+          test_name_stream << param_name;
+          MakeAndRegisterTestInfo(
+              test_suite_name.c_str(), test_name_stream.GetString().c_str(),
+              nullptr,  // No type parameter.
+              PrintToString(*param_it).c_str(), test_info->code_location,
+              GetTestSuiteTypeId(),
+              SuiteApiResolver<TestSuite>::GetSetUpCaseOrSuite(file, line),
+              SuiteApiResolver<TestSuite>::GetTearDownCaseOrSuite(file, line),
+              test_info->test_meta_factory->CreateTestFactory(*param_it));
+        }  // for param_it
+      }  // for gen_it
+    }  // for test_it
 
-   private:
-    Iterator(const Iterator& other)
-        : base_(other.base_),
-        begin1_(other.begin1_),
-        end1_(other.end1_),
-        current1_(other.current1_),
-        begin2_(other.begin2_),
-        end2_(other.end2_),
-        current2_(other.current2_),
-        begin3_(other.begin3_),
-        end3_(other.end3_),
-        current3_(other.current3_),
-        begin4_(other.begin4_),
-        end4_(other.end4_),
-        current4_(other.current4_),
-        begin5_(other.begin5_),
-        end5_(other.end5_),
-        current5_(other.current5_),
-        begin6_(other.begin6_),
-        end6_(other.end6_),
-        current6_(other.current6_),
-        begin7_(other.begin7_),
-        end7_(other.end7_),
-        current7_(other.current7_) {
-      ComputeCurrentValue();
+    if (!generated_instantiations) {
+      // There are no generaotrs, or they all generate nothing ...
+      InsertSyntheticTestCase(GetTestSuiteName(), code_location_,
+                              !tests_.empty());
     }
+  }    // RegisterTests
 
-    void ComputeCurrentValue() {
-      if (!AtEnd())
-        current_value_ = ParamType(*current1_, *current2_, *current3_,
-            *current4_, *current5_, *current6_, *current7_);
-    }
-    bool AtEnd() const {
-      // We must report iterator past the end of the range when either of the
-      // component iterators has reached the end of its range.
-      return
-          current1_ == end1_ ||
-          current2_ == end2_ ||
-          current3_ == end3_ ||
-          current4_ == end4_ ||
-          current5_ == end5_ ||
-          current6_ == end6_ ||
-          current7_ == end7_;
-    }
+ private:
+  // LocalTestInfo structure keeps information about a single test registered
+  // with TEST_P macro.
+  struct TestInfo {
+    TestInfo(const char* a_test_suite_base_name, const char* a_test_base_name,
+             TestMetaFactoryBase<ParamType>* a_test_meta_factory,
+             CodeLocation a_code_location)
+        : test_suite_base_name(a_test_suite_base_name),
+          test_base_name(a_test_base_name),
+          test_meta_factory(a_test_meta_factory),
+          code_location(a_code_location) {}
+
+    const std::string test_suite_base_name;
+    const std::string test_base_name;
+    const std::unique_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
+    const CodeLocation code_location;
+  };
+  using TestInfoContainer = ::std::vector<std::shared_ptr<TestInfo> >;
+  // Records data received from INSTANTIATE_TEST_SUITE_P macros:
+  //  <Instantiation name, Sequence generator creation function,
+  //     Name generator function, Source file, Source line>
+  struct InstantiationInfo {
+      InstantiationInfo(const std::string &name_in,
+                        GeneratorCreationFunc* generator_in,
+                        ParamNameGeneratorFunc* name_func_in,
+                        const char* file_in,
+                        int line_in)
+          : name(name_in),
+            generator(generator_in),
+            name_func(name_func_in),
+            file(file_in),
+            line(line_in) {}
+
+      std::string name;
+      GeneratorCreationFunc* generator;
+      ParamNameGeneratorFunc* name_func;
+      const char* file;
+      int line;
+  };
+  typedef ::std::vector<InstantiationInfo> InstantiationContainer;
 
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+  static bool IsValidParamName(const std::string& name) {
+    // Check for empty string
+    if (name.empty())
+      return false;
 
-    const ParamGeneratorInterface<ParamType>* const base_;
-    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
-    // current[i]_ is the actual traversing iterator.
-    const typename ParamGenerator<T1>::iterator begin1_;
-    const typename ParamGenerator<T1>::iterator end1_;
-    typename ParamGenerator<T1>::iterator current1_;
-    const typename ParamGenerator<T2>::iterator begin2_;
-    const typename ParamGenerator<T2>::iterator end2_;
-    typename ParamGenerator<T2>::iterator current2_;
-    const typename ParamGenerator<T3>::iterator begin3_;
-    const typename ParamGenerator<T3>::iterator end3_;
-    typename ParamGenerator<T3>::iterator current3_;
-    const typename ParamGenerator<T4>::iterator begin4_;
-    const typename ParamGenerator<T4>::iterator end4_;
-    typename ParamGenerator<T4>::iterator current4_;
-    const typename ParamGenerator<T5>::iterator begin5_;
-    const typename ParamGenerator<T5>::iterator end5_;
-    typename ParamGenerator<T5>::iterator current5_;
-    const typename ParamGenerator<T6>::iterator begin6_;
-    const typename ParamGenerator<T6>::iterator end6_;
-    typename ParamGenerator<T6>::iterator current6_;
-    const typename ParamGenerator<T7>::iterator begin7_;
-    const typename ParamGenerator<T7>::iterator end7_;
-    typename ParamGenerator<T7>::iterator current7_;
-    ParamType current_value_;
-  };  // class CartesianProductGenerator7::Iterator
+    // Check for invalid characters
+    for (std::string::size_type index = 0; index < name.size(); ++index) {
+      if (!IsAlNum(name[index]) && name[index] != '_')
+        return false;
+    }
 
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator7& other);
-
-  const ParamGenerator<T1> g1_;
-  const ParamGenerator<T2> g2_;
-  const ParamGenerator<T3> g3_;
-  const ParamGenerator<T4> g4_;
-  const ParamGenerator<T5> g5_;
-  const ParamGenerator<T6> g6_;
-  const ParamGenerator<T7> g7_;
-};  // class CartesianProductGenerator7
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8>
-class CartesianProductGenerator8
-    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
-        T7, T8> > {
- public:
-  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> ParamType;
-
-  CartesianProductGenerator8(const ParamGenerator<T1>& g1,
-      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
-      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
-      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
-      const ParamGenerator<T8>& g8)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
-          g8_(g8) {}
-  virtual ~CartesianProductGenerator8() {}
-
-  virtual ParamIteratorInterface<ParamType>* Begin() const {
-    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
-        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
-        g7_.begin(), g8_, g8_.begin());
-  }
-  virtual ParamIteratorInterface<ParamType>* End() const {
-    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
-        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
-        g8_.end());
+    return true;
   }
 
- private:
-  class Iterator : public ParamIteratorInterface<ParamType> {
-   public:
-    Iterator(const ParamGeneratorInterface<ParamType>* base,
-      const ParamGenerator<T1>& g1,
-      const typename ParamGenerator<T1>::iterator& current1,
-      const ParamGenerator<T2>& g2,
-      const typename ParamGenerator<T2>::iterator& current2,
-      const ParamGenerator<T3>& g3,
-      const typename ParamGenerator<T3>::iterator& current3,
-      const ParamGenerator<T4>& g4,
-      const typename ParamGenerator<T4>::iterator& current4,
-      const ParamGenerator<T5>& g5,
-      const typename ParamGenerator<T5>::iterator& current5,
-      const ParamGenerator<T6>& g6,
-      const typename ParamGenerator<T6>::iterator& current6,
-      const ParamGenerator<T7>& g7,
-      const typename ParamGenerator<T7>::iterator& current7,
-      const ParamGenerator<T8>& g8,
-      const typename ParamGenerator<T8>::iterator& current8)
-        : base_(base),
-          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
-          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
-          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
-          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
-          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
-          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
-          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
-          begin8_(g8.begin()), end8_(g8.end()), current8_(current8)    {
-      ComputeCurrentValue();
-    }
-    virtual ~Iterator() {}
+  const std::string test_suite_name_;
+  CodeLocation code_location_;
+  TestInfoContainer tests_;
+  InstantiationContainer instantiations_;
 
-    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
-      return base_;
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteInfo);
+};  // class ParameterizedTestSuiteInfo
+
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+template <class TestCase>
+using ParameterizedTestCaseInfo = ParameterizedTestSuiteInfo<TestCase>;
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestSuiteRegistry contains a map of
+// ParameterizedTestSuiteInfoBase classes accessed by test suite names. TEST_P
+// and INSTANTIATE_TEST_SUITE_P macros use it to locate their corresponding
+// ParameterizedTestSuiteInfo descriptors.
+class ParameterizedTestSuiteRegistry {
+ public:
+  ParameterizedTestSuiteRegistry() {}
+  ~ParameterizedTestSuiteRegistry() {
+    for (auto& test_suite_info : test_suite_infos_) {
+      delete test_suite_info;
     }
-    // Advance should not be called on beyond-of-range iterators
-    // so no component iterators must be beyond end of range, either.
-    virtual void Advance() {
-      assert(!AtEnd());
-      ++current8_;
-      if (current8_ == end8_) {
-        current8_ = begin8_;
-        ++current7_;
-      }
-      if (current7_ == end7_) {
-        current7_ = begin7_;
-        ++current6_;
-      }
-      if (current6_ == end6_) {
-        current6_ = begin6_;
-        ++current5_;
-      }
-      if (current5_ == end5_) {
-        current5_ = begin5_;
-        ++current4_;
-      }
-      if (current4_ == end4_) {
-        current4_ = begin4_;
-        ++current3_;
-      }
-      if (current3_ == end3_) {
-        current3_ = begin3_;
-        ++current2_;
-      }
-      if (current2_ == end2_) {
-        current2_ = begin2_;
-        ++current1_;
+  }
+
+  // Looks up or creates and returns a structure containing information about
+  // tests and instantiations of a particular test suite.
+  template <class TestSuite>
+  ParameterizedTestSuiteInfo<TestSuite>* GetTestSuitePatternHolder(
+      const char* test_suite_name, CodeLocation code_location) {
+    ParameterizedTestSuiteInfo<TestSuite>* typed_test_info = nullptr;
+    for (auto& test_suite_info : test_suite_infos_) {
+      if (test_suite_info->GetTestSuiteName() == test_suite_name) {
+        if (test_suite_info->GetTestSuiteTypeId() != GetTypeId<TestSuite>()) {
+          // Complain about incorrect usage of Google Test facilities
+          // and terminate the program since we cannot guaranty correct
+          // test suite setup and tear-down in this case.
+          ReportInvalidTestSuiteType(test_suite_name, code_location);
+          posix::Abort();
+        } else {
+          // At this point we are sure that the object we found is of the same
+          // type we are looking for, so we downcast it to that type
+          // without further checks.
+          typed_test_info = CheckedDowncastToActualType<
+              ParameterizedTestSuiteInfo<TestSuite> >(test_suite_info);
+        }
+        break;
       }
-      ComputeCurrentValue();
     }
-    virtual ParamIteratorInterface<ParamType>* Clone() const {
-      return new Iterator(*this);
+    if (typed_test_info == nullptr) {
+      typed_test_info = new ParameterizedTestSuiteInfo<TestSuite>(
+          test_suite_name, code_location);
+      test_suite_infos_.push_back(typed_test_info);
     }
-    virtual const ParamType* Current() const { return &current_value_; }
-    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const Iterator* typed_other =
-          CheckedDowncastToActualType<const Iterator>(&other);
-      // We must report iterators equal if they both point beyond their
-      // respective ranges. That can happen in a variety of fashions,
-      // so we have to consult AtEnd().
-      return (AtEnd() && typed_other->AtEnd()) ||
-         (
-          current1_ == typed_other->current1_ &&
-          current2_ == typed_other->current2_ &&
-          current3_ == typed_other->current3_ &&
-          current4_ == typed_other->current4_ &&
-          current5_ == typed_other->current5_ &&
-          current6_ == typed_other->current6_ &&
-          current7_ == typed_other->current7_ &&
-          current8_ == typed_other->current8_);
+    return typed_test_info;
+  }
+  void RegisterTests() {
+    for (auto& test_suite_info : test_suite_infos_) {
+      test_suite_info->RegisterTests();
     }
+  }
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  template <class TestCase>
+  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
+      const char* test_case_name, CodeLocation code_location) {
+    return GetTestSuitePatternHolder<TestCase>(test_case_name, code_location);
+  }
 
-   private:
-    Iterator(const Iterator& other)
-        : base_(other.base_),
-        begin1_(other.begin1_),
-        end1_(other.end1_),
-        current1_(other.current1_),
-        begin2_(other.begin2_),
-        end2_(other.end2_),
-        current2_(other.current2_),
-        begin3_(other.begin3_),
-        end3_(other.end3_),
-        current3_(other.current3_),
-        begin4_(other.begin4_),
-        end4_(other.end4_),
-        current4_(other.current4_),
-        begin5_(other.begin5_),
-        end5_(other.end5_),
-        current5_(other.current5_),
-        begin6_(other.begin6_),
-        end6_(other.end6_),
-        current6_(other.current6_),
-        begin7_(other.begin7_),
-        end7_(other.end7_),
-        current7_(other.current7_),
-        begin8_(other.begin8_),
-        end8_(other.end8_),
-        current8_(other.current8_) {
-      ComputeCurrentValue();
-    }
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-    void ComputeCurrentValue() {
-      if (!AtEnd())
-        current_value_ = ParamType(*current1_, *current2_, *current3_,
-            *current4_, *current5_, *current6_, *current7_, *current8_);
-    }
-    bool AtEnd() const {
-      // We must report iterator past the end of the range when either of the
-      // component iterators has reached the end of its range.
-      return
-          current1_ == end1_ ||
-          current2_ == end2_ ||
-          current3_ == end3_ ||
-          current4_ == end4_ ||
-          current5_ == end5_ ||
-          current6_ == end6_ ||
-          current7_ == end7_ ||
-          current8_ == end8_;
-    }
+ private:
+  using TestSuiteInfoContainer = ::std::vector<ParameterizedTestSuiteInfoBase*>;
 
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+  TestSuiteInfoContainer test_suite_infos_;
 
-    const ParamGeneratorInterface<ParamType>* const base_;
-    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
-    // current[i]_ is the actual traversing iterator.
-    const typename ParamGenerator<T1>::iterator begin1_;
-    const typename ParamGenerator<T1>::iterator end1_;
-    typename ParamGenerator<T1>::iterator current1_;
-    const typename ParamGenerator<T2>::iterator begin2_;
-    const typename ParamGenerator<T2>::iterator end2_;
-    typename ParamGenerator<T2>::iterator current2_;
-    const typename ParamGenerator<T3>::iterator begin3_;
-    const typename ParamGenerator<T3>::iterator end3_;
-    typename ParamGenerator<T3>::iterator current3_;
-    const typename ParamGenerator<T4>::iterator begin4_;
-    const typename ParamGenerator<T4>::iterator end4_;
-    typename ParamGenerator<T4>::iterator current4_;
-    const typename ParamGenerator<T5>::iterator begin5_;
-    const typename ParamGenerator<T5>::iterator end5_;
-    typename ParamGenerator<T5>::iterator current5_;
-    const typename ParamGenerator<T6>::iterator begin6_;
-    const typename ParamGenerator<T6>::iterator end6_;
-    typename ParamGenerator<T6>::iterator current6_;
-    const typename ParamGenerator<T7>::iterator begin7_;
-    const typename ParamGenerator<T7>::iterator end7_;
-    typename ParamGenerator<T7>::iterator current7_;
-    const typename ParamGenerator<T8>::iterator begin8_;
-    const typename ParamGenerator<T8>::iterator end8_;
-    typename ParamGenerator<T8>::iterator current8_;
-    ParamType current_value_;
-  };  // class CartesianProductGenerator8::Iterator
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteRegistry);
+};
 
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator8& other);
-
-  const ParamGenerator<T1> g1_;
-  const ParamGenerator<T2> g2_;
-  const ParamGenerator<T3> g3_;
-  const ParamGenerator<T4> g4_;
-  const ParamGenerator<T5> g5_;
-  const ParamGenerator<T6> g6_;
-  const ParamGenerator<T7> g7_;
-  const ParamGenerator<T8> g8_;
-};  // class CartesianProductGenerator8
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9>
-class CartesianProductGenerator9
-    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
-        T7, T8, T9> > {
+// Keep track of what type-parameterized test suite are defined and
+// where as well as which are intatiated. This allows susequently
+// identifying suits that are defined but never used.
+class TypeParameterizedTestSuiteRegistry {
  public:
-  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9> ParamType;
-
-  CartesianProductGenerator9(const ParamGenerator<T1>& g1,
-      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
-      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
-      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
-      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
-          g9_(g9) {}
-  virtual ~CartesianProductGenerator9() {}
-
-  virtual ParamIteratorInterface<ParamType>* Begin() const {
-    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
-        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
-        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin());
-  }
-  virtual ParamIteratorInterface<ParamType>* End() const {
-    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
-        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
-        g8_.end(), g9_, g9_.end());
-  }
+  // Add a suite definition
+  void RegisterTestSuite(const char* test_suite_name,
+                         CodeLocation code_location);
+
+  // Add an instantiation of a suit.
+  void RegisterInstantiation(const char* test_suite_name);
+
+  // For each suit repored as defined but not reported as instantiation,
+  // emit a test that reports that fact (configurably, as an error).
+  void CheckForInstantiations();
 
  private:
-  class Iterator : public ParamIteratorInterface<ParamType> {
-   public:
-    Iterator(const ParamGeneratorInterface<ParamType>* base,
-      const ParamGenerator<T1>& g1,
-      const typename ParamGenerator<T1>::iterator& current1,
-      const ParamGenerator<T2>& g2,
-      const typename ParamGenerator<T2>::iterator& current2,
-      const ParamGenerator<T3>& g3,
-      const typename ParamGenerator<T3>::iterator& current3,
-      const ParamGenerator<T4>& g4,
-      const typename ParamGenerator<T4>::iterator& current4,
-      const ParamGenerator<T5>& g5,
-      const typename ParamGenerator<T5>::iterator& current5,
-      const ParamGenerator<T6>& g6,
-      const typename ParamGenerator<T6>::iterator& current6,
-      const ParamGenerator<T7>& g7,
-      const typename ParamGenerator<T7>::iterator& current7,
-      const ParamGenerator<T8>& g8,
-      const typename ParamGenerator<T8>::iterator& current8,
-      const ParamGenerator<T9>& g9,
-      const typename ParamGenerator<T9>::iterator& current9)
-        : base_(base),
-          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
-          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
-          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
-          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
-          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
-          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
-          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
-          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
-          begin9_(g9.begin()), end9_(g9.end()), current9_(current9)    {
-      ComputeCurrentValue();
-    }
-    virtual ~Iterator() {}
+  struct TypeParameterizedTestSuiteInfo {
+    explicit TypeParameterizedTestSuiteInfo(CodeLocation c)
+        : code_location(c), instantiated(false) {}
 
-    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
-      return base_;
-    }
-    // Advance should not be called on beyond-of-range iterators
-    // so no component iterators must be beyond end of range, either.
-    virtual void Advance() {
-      assert(!AtEnd());
-      ++current9_;
-      if (current9_ == end9_) {
-        current9_ = begin9_;
-        ++current8_;
-      }
-      if (current8_ == end8_) {
-        current8_ = begin8_;
-        ++current7_;
-      }
-      if (current7_ == end7_) {
-        current7_ = begin7_;
-        ++current6_;
-      }
-      if (current6_ == end6_) {
-        current6_ = begin6_;
-        ++current5_;
-      }
-      if (current5_ == end5_) {
-        current5_ = begin5_;
-        ++current4_;
-      }
-      if (current4_ == end4_) {
-        current4_ = begin4_;
-        ++current3_;
-      }
-      if (current3_ == end3_) {
-        current3_ = begin3_;
-        ++current2_;
-      }
-      if (current2_ == end2_) {
-        current2_ = begin2_;
-        ++current1_;
-      }
-      ComputeCurrentValue();
-    }
-    virtual ParamIteratorInterface<ParamType>* Clone() const {
-      return new Iterator(*this);
-    }
-    virtual const ParamType* Current() const { return &current_value_; }
-    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const Iterator* typed_other =
-          CheckedDowncastToActualType<const Iterator>(&other);
-      // We must report iterators equal if they both point beyond their
-      // respective ranges. That can happen in a variety of fashions,
-      // so we have to consult AtEnd().
-      return (AtEnd() && typed_other->AtEnd()) ||
-         (
-          current1_ == typed_other->current1_ &&
-          current2_ == typed_other->current2_ &&
-          current3_ == typed_other->current3_ &&
-          current4_ == typed_other->current4_ &&
-          current5_ == typed_other->current5_ &&
-          current6_ == typed_other->current6_ &&
-          current7_ == typed_other->current7_ &&
-          current8_ == typed_other->current8_ &&
-          current9_ == typed_other->current9_);
-    }
+    CodeLocation code_location;
+    bool instantiated;
+  };
+
+  std::map<std::string, TypeParameterizedTestSuiteInfo> suites_;
+};
+
+}  // namespace internal
+
+// Forward declarations of ValuesIn(), which is implemented in
+// include/gtest/gtest-param-test.h.
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+    const Container& container);
+
+namespace internal {
+// Used in the Values() function to provide polymorphic capabilities.
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#endif
 
-   private:
-    Iterator(const Iterator& other)
-        : base_(other.base_),
-        begin1_(other.begin1_),
-        end1_(other.end1_),
-        current1_(other.current1_),
-        begin2_(other.begin2_),
-        end2_(other.end2_),
-        current2_(other.current2_),
-        begin3_(other.begin3_),
-        end3_(other.end3_),
-        current3_(other.current3_),
-        begin4_(other.begin4_),
-        end4_(other.end4_),
-        current4_(other.current4_),
-        begin5_(other.begin5_),
-        end5_(other.end5_),
-        current5_(other.current5_),
-        begin6_(other.begin6_),
-        end6_(other.end6_),
-        current6_(other.current6_),
-        begin7_(other.begin7_),
-        end7_(other.end7_),
-        current7_(other.current7_),
-        begin8_(other.begin8_),
-        end8_(other.end8_),
-        current8_(other.current8_),
-        begin9_(other.begin9_),
-        end9_(other.end9_),
-        current9_(other.current9_) {
-      ComputeCurrentValue();
-    }
+template <typename... Ts>
+class ValueArray {
+ public:
+  explicit ValueArray(Ts... v) : v_(FlatTupleConstructTag{}, std::move(v)...) {}
 
-    void ComputeCurrentValue() {
-      if (!AtEnd())
-        current_value_ = ParamType(*current1_, *current2_, *current3_,
-            *current4_, *current5_, *current6_, *current7_, *current8_,
-            *current9_);
-    }
-    bool AtEnd() const {
-      // We must report iterator past the end of the range when either of the
-      // component iterators has reached the end of its range.
-      return
-          current1_ == end1_ ||
-          current2_ == end2_ ||
-          current3_ == end3_ ||
-          current4_ == end4_ ||
-          current5_ == end5_ ||
-          current6_ == end6_ ||
-          current7_ == end7_ ||
-          current8_ == end8_ ||
-          current9_ == end9_;
-    }
+  template <typename T>
+  operator ParamGenerator<T>() const {  // NOLINT
+    return ValuesIn(MakeVector<T>(MakeIndexSequence<sizeof...(Ts)>()));
+  }
 
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+ private:
+  template <typename T, size_t... I>
+  std::vector<T> MakeVector(IndexSequence<I...>) const {
+    return std::vector<T>{static_cast<T>(v_.template Get<I>())...};
+  }
 
-    const ParamGeneratorInterface<ParamType>* const base_;
-    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
-    // current[i]_ is the actual traversing iterator.
-    const typename ParamGenerator<T1>::iterator begin1_;
-    const typename ParamGenerator<T1>::iterator end1_;
-    typename ParamGenerator<T1>::iterator current1_;
-    const typename ParamGenerator<T2>::iterator begin2_;
-    const typename ParamGenerator<T2>::iterator end2_;
-    typename ParamGenerator<T2>::iterator current2_;
-    const typename ParamGenerator<T3>::iterator begin3_;
-    const typename ParamGenerator<T3>::iterator end3_;
-    typename ParamGenerator<T3>::iterator current3_;
-    const typename ParamGenerator<T4>::iterator begin4_;
-    const typename ParamGenerator<T4>::iterator end4_;
-    typename ParamGenerator<T4>::iterator current4_;
-    const typename ParamGenerator<T5>::iterator begin5_;
-    const typename ParamGenerator<T5>::iterator end5_;
-    typename ParamGenerator<T5>::iterator current5_;
-    const typename ParamGenerator<T6>::iterator begin6_;
-    const typename ParamGenerator<T6>::iterator end6_;
-    typename ParamGenerator<T6>::iterator current6_;
-    const typename ParamGenerator<T7>::iterator begin7_;
-    const typename ParamGenerator<T7>::iterator end7_;
-    typename ParamGenerator<T7>::iterator current7_;
-    const typename ParamGenerator<T8>::iterator begin8_;
-    const typename ParamGenerator<T8>::iterator end8_;
-    typename ParamGenerator<T8>::iterator current8_;
-    const typename ParamGenerator<T9>::iterator begin9_;
-    const typename ParamGenerator<T9>::iterator end9_;
-    typename ParamGenerator<T9>::iterator current9_;
-    ParamType current_value_;
-  };  // class CartesianProductGenerator9::Iterator
+  FlatTuple<Ts...> v_;
+};
 
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator9& other);
-
-  const ParamGenerator<T1> g1_;
-  const ParamGenerator<T2> g2_;
-  const ParamGenerator<T3> g3_;
-  const ParamGenerator<T4> g4_;
-  const ParamGenerator<T5> g5_;
-  const ParamGenerator<T6> g6_;
-  const ParamGenerator<T7> g7_;
-  const ParamGenerator<T8> g8_;
-  const ParamGenerator<T9> g9_;
-};  // class CartesianProductGenerator9
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10>
-class CartesianProductGenerator10
-    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
-        T7, T8, T9, T10> > {
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+template <typename... T>
+class CartesianProductGenerator
+    : public ParamGeneratorInterface<::std::tuple<T...>> {
  public:
-  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> ParamType;
-
-  CartesianProductGenerator10(const ParamGenerator<T1>& g1,
-      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
-      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
-      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
-      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9,
-      const ParamGenerator<T10>& g10)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
-          g9_(g9), g10_(g10) {}
-  virtual ~CartesianProductGenerator10() {}
-
-  virtual ParamIteratorInterface<ParamType>* Begin() const {
-    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
-        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
-        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin());
+  typedef ::std::tuple<T...> ParamType;
+
+  CartesianProductGenerator(const std::tuple<ParamGenerator<T>...>& g)
+      : generators_(g) {}
+  ~CartesianProductGenerator() override {}
+
+  ParamIteratorInterface<ParamType>* Begin() const override {
+    return new Iterator(this, generators_, false);
   }
-  virtual ParamIteratorInterface<ParamType>* End() const {
-    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
-        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
-        g8_.end(), g9_, g9_.end(), g10_, g10_.end());
+  ParamIteratorInterface<ParamType>* End() const override {
+    return new Iterator(this, generators_, true);
   }
 
  private:
-  class Iterator : public ParamIteratorInterface<ParamType> {
+  template <class I>
+  class IteratorImpl;
+  template <size_t... I>
+  class IteratorImpl<IndexSequence<I...>>
+      : public ParamIteratorInterface<ParamType> {
    public:
-    Iterator(const ParamGeneratorInterface<ParamType>* base,
-      const ParamGenerator<T1>& g1,
-      const typename ParamGenerator<T1>::iterator& current1,
-      const ParamGenerator<T2>& g2,
-      const typename ParamGenerator<T2>::iterator& current2,
-      const ParamGenerator<T3>& g3,
-      const typename ParamGenerator<T3>::iterator& current3,
-      const ParamGenerator<T4>& g4,
-      const typename ParamGenerator<T4>::iterator& current4,
-      const ParamGenerator<T5>& g5,
-      const typename ParamGenerator<T5>::iterator& current5,
-      const ParamGenerator<T6>& g6,
-      const typename ParamGenerator<T6>::iterator& current6,
-      const ParamGenerator<T7>& g7,
-      const typename ParamGenerator<T7>::iterator& current7,
-      const ParamGenerator<T8>& g8,
-      const typename ParamGenerator<T8>::iterator& current8,
-      const ParamGenerator<T9>& g9,
-      const typename ParamGenerator<T9>::iterator& current9,
-      const ParamGenerator<T10>& g10,
-      const typename ParamGenerator<T10>::iterator& current10)
+    IteratorImpl(const ParamGeneratorInterface<ParamType>* base,
+             const std::tuple<ParamGenerator<T>...>& generators, bool is_end)
         : base_(base),
-          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
-          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
-          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
-          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
-          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
-          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
-          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
-          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
-          begin9_(g9.begin()), end9_(g9.end()), current9_(current9),
-          begin10_(g10.begin()), end10_(g10.end()), current10_(current10)    {
+          begin_(std::get<I>(generators).begin()...),
+          end_(std::get<I>(generators).end()...),
+          current_(is_end ? end_ : begin_) {
       ComputeCurrentValue();
     }
-    virtual ~Iterator() {}
+    ~IteratorImpl() override {}
 
-    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+    const ParamGeneratorInterface<ParamType>* BaseGenerator() const override {
       return base_;
     }
     // Advance should not be called on beyond-of-range iterators
     // so no component iterators must be beyond end of range, either.
-    virtual void Advance() {
+    void Advance() override {
       assert(!AtEnd());
-      ++current10_;
-      if (current10_ == end10_) {
-        current10_ = begin10_;
-        ++current9_;
-      }
-      if (current9_ == end9_) {
-        current9_ = begin9_;
-        ++current8_;
-      }
-      if (current8_ == end8_) {
-        current8_ = begin8_;
-        ++current7_;
-      }
-      if (current7_ == end7_) {
-        current7_ = begin7_;
-        ++current6_;
-      }
-      if (current6_ == end6_) {
-        current6_ = begin6_;
-        ++current5_;
-      }
-      if (current5_ == end5_) {
-        current5_ = begin5_;
-        ++current4_;
-      }
-      if (current4_ == end4_) {
-        current4_ = begin4_;
-        ++current3_;
-      }
-      if (current3_ == end3_) {
-        current3_ = begin3_;
-        ++current2_;
-      }
-      if (current2_ == end2_) {
-        current2_ = begin2_;
-        ++current1_;
-      }
+      // Advance the last iterator.
+      ++std::get<sizeof...(T) - 1>(current_);
+      // if that reaches end, propagate that up.
+      AdvanceIfEnd<sizeof...(T) - 1>();
       ComputeCurrentValue();
     }
-    virtual ParamIteratorInterface<ParamType>* Clone() const {
-      return new Iterator(*this);
+    ParamIteratorInterface<ParamType>* Clone() const override {
+      return new IteratorImpl(*this);
     }
-    virtual const ParamType* Current() const { return &current_value_; }
-    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+
+    const ParamType* Current() const override { return current_value_.get(); }
+
+    bool Equals(const ParamIteratorInterface<ParamType>& other) const override {
       // Having the same base generator guarantees that the other
       // iterator is of the same type and we can downcast.
       GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
           << "The program attempted to compare iterators "
           << "from different generators." << std::endl;
-      const Iterator* typed_other =
-          CheckedDowncastToActualType<const Iterator>(&other);
+      const IteratorImpl* typed_other =
+          CheckedDowncastToActualType<const IteratorImpl>(&other);
+
       // We must report iterators equal if they both point beyond their
       // respective ranges. That can happen in a variety of fashions,
       // so we have to consult AtEnd().
-      return (AtEnd() && typed_other->AtEnd()) ||
-         (
-          current1_ == typed_other->current1_ &&
-          current2_ == typed_other->current2_ &&
-          current3_ == typed_other->current3_ &&
-          current4_ == typed_other->current4_ &&
-          current5_ == typed_other->current5_ &&
-          current6_ == typed_other->current6_ &&
-          current7_ == typed_other->current7_ &&
-          current8_ == typed_other->current8_ &&
-          current9_ == typed_other->current9_ &&
-          current10_ == typed_other->current10_);
+      if (AtEnd() && typed_other->AtEnd()) return true;
+
+      bool same = true;
+      bool dummy[] = {
+          (same = same && std::get<I>(current_) ==
+                              std::get<I>(typed_other->current_))...};
+      (void)dummy;
+      return same;
     }
 
    private:
-    Iterator(const Iterator& other)
-        : base_(other.base_),
-        begin1_(other.begin1_),
-        end1_(other.end1_),
-        current1_(other.current1_),
-        begin2_(other.begin2_),
-        end2_(other.end2_),
-        current2_(other.current2_),
-        begin3_(other.begin3_),
-        end3_(other.end3_),
-        current3_(other.current3_),
-        begin4_(other.begin4_),
-        end4_(other.end4_),
-        current4_(other.current4_),
-        begin5_(other.begin5_),
-        end5_(other.end5_),
-        current5_(other.current5_),
-        begin6_(other.begin6_),
-        end6_(other.end6_),
-        current6_(other.current6_),
-        begin7_(other.begin7_),
-        end7_(other.end7_),
-        current7_(other.current7_),
-        begin8_(other.begin8_),
-        end8_(other.end8_),
-        current8_(other.current8_),
-        begin9_(other.begin9_),
-        end9_(other.end9_),
-        current9_(other.current9_),
-        begin10_(other.begin10_),
-        end10_(other.end10_),
-        current10_(other.current10_) {
-      ComputeCurrentValue();
+    template <size_t ThisI>
+    void AdvanceIfEnd() {
+      if (std::get<ThisI>(current_) != std::get<ThisI>(end_)) return;
+
+      bool last = ThisI == 0;
+      if (last) {
+        // We are done. Nothing else to propagate.
+        return;
+      }
+
+      constexpr size_t NextI = ThisI - (ThisI != 0);
+      std::get<ThisI>(current_) = std::get<ThisI>(begin_);
+      ++std::get<NextI>(current_);
+      AdvanceIfEnd<NextI>();
     }
 
     void ComputeCurrentValue() {
       if (!AtEnd())
-        current_value_ = ParamType(*current1_, *current2_, *current3_,
-            *current4_, *current5_, *current6_, *current7_, *current8_,
-            *current9_, *current10_);
+        current_value_ = std::make_shared<ParamType>(*std::get<I>(current_)...);
     }
     bool AtEnd() const {
-      // We must report iterator past the end of the range when either of the
-      // component iterators has reached the end of its range.
-      return
-          current1_ == end1_ ||
-          current2_ == end2_ ||
-          current3_ == end3_ ||
-          current4_ == end4_ ||
-          current5_ == end5_ ||
-          current6_ == end6_ ||
-          current7_ == end7_ ||
-          current8_ == end8_ ||
-          current9_ == end9_ ||
-          current10_ == end10_;
+      bool at_end = false;
+      bool dummy[] = {
+          (at_end = at_end || std::get<I>(current_) == std::get<I>(end_))...};
+      (void)dummy;
+      return at_end;
     }
 
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
-
     const ParamGeneratorInterface<ParamType>* const base_;
-    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
-    // current[i]_ is the actual traversing iterator.
-    const typename ParamGenerator<T1>::iterator begin1_;
-    const typename ParamGenerator<T1>::iterator end1_;
-    typename ParamGenerator<T1>::iterator current1_;
-    const typename ParamGenerator<T2>::iterator begin2_;
-    const typename ParamGenerator<T2>::iterator end2_;
-    typename ParamGenerator<T2>::iterator current2_;
-    const typename ParamGenerator<T3>::iterator begin3_;
-    const typename ParamGenerator<T3>::iterator end3_;
-    typename ParamGenerator<T3>::iterator current3_;
-    const typename ParamGenerator<T4>::iterator begin4_;
-    const typename ParamGenerator<T4>::iterator end4_;
-    typename ParamGenerator<T4>::iterator current4_;
-    const typename ParamGenerator<T5>::iterator begin5_;
-    const typename ParamGenerator<T5>::iterator end5_;
-    typename ParamGenerator<T5>::iterator current5_;
-    const typename ParamGenerator<T6>::iterator begin6_;
-    const typename ParamGenerator<T6>::iterator end6_;
-    typename ParamGenerator<T6>::iterator current6_;
-    const typename ParamGenerator<T7>::iterator begin7_;
-    const typename ParamGenerator<T7>::iterator end7_;
-    typename ParamGenerator<T7>::iterator current7_;
-    const typename ParamGenerator<T8>::iterator begin8_;
-    const typename ParamGenerator<T8>::iterator end8_;
-    typename ParamGenerator<T8>::iterator current8_;
-    const typename ParamGenerator<T9>::iterator begin9_;
-    const typename ParamGenerator<T9>::iterator end9_;
-    typename ParamGenerator<T9>::iterator current9_;
-    const typename ParamGenerator<T10>::iterator begin10_;
-    const typename ParamGenerator<T10>::iterator end10_;
-    typename ParamGenerator<T10>::iterator current10_;
-    ParamType current_value_;
-  };  // class CartesianProductGenerator10::Iterator
-
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator10& other);
-
-  const ParamGenerator<T1> g1_;
-  const ParamGenerator<T2> g2_;
-  const ParamGenerator<T3> g3_;
-  const ParamGenerator<T4> g4_;
-  const ParamGenerator<T5> g5_;
-  const ParamGenerator<T6> g6_;
-  const ParamGenerator<T7> g7_;
-  const ParamGenerator<T8> g8_;
-  const ParamGenerator<T9> g9_;
-  const ParamGenerator<T10> g10_;
-};  // class CartesianProductGenerator10
-
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Helper classes providing Combine() with polymorphic features. They allow
-// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
-// convertible to U.
-//
-template <class Generator1, class Generator2>
-class CartesianProductHolder2 {
- public:
-CartesianProductHolder2(const Generator1& g1, const Generator2& g2)
-      : g1_(g1), g2_(g2) {}
-  template <typename T1, typename T2>
-  operator ParamGenerator< ::std::tr1::tuple<T1, T2> >() const {
-    return ParamGenerator< ::std::tr1::tuple<T1, T2> >(
-        new CartesianProductGenerator2<T1, T2>(
-        static_cast<ParamGenerator<T1> >(g1_),
-        static_cast<ParamGenerator<T2> >(g2_)));
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder2& other);
-
-  const Generator1 g1_;
-  const Generator2 g2_;
-};  // class CartesianProductHolder2
-
-template <class Generator1, class Generator2, class Generator3>
-class CartesianProductHolder3 {
- public:
-CartesianProductHolder3(const Generator1& g1, const Generator2& g2,
-    const Generator3& g3)
-      : g1_(g1), g2_(g2), g3_(g3) {}
-  template <typename T1, typename T2, typename T3>
-  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >() const {
-    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >(
-        new CartesianProductGenerator3<T1, T2, T3>(
-        static_cast<ParamGenerator<T1> >(g1_),
-        static_cast<ParamGenerator<T2> >(g2_),
-        static_cast<ParamGenerator<T3> >(g3_)));
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder3& other);
-
-  const Generator1 g1_;
-  const Generator2 g2_;
-  const Generator3 g3_;
-};  // class CartesianProductHolder3
-
-template <class Generator1, class Generator2, class Generator3,
-    class Generator4>
-class CartesianProductHolder4 {
- public:
-CartesianProductHolder4(const Generator1& g1, const Generator2& g2,
-    const Generator3& g3, const Generator4& g4)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
-  template <typename T1, typename T2, typename T3, typename T4>
-  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >() const {
-    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >(
-        new CartesianProductGenerator4<T1, T2, T3, T4>(
-        static_cast<ParamGenerator<T1> >(g1_),
-        static_cast<ParamGenerator<T2> >(g2_),
-        static_cast<ParamGenerator<T3> >(g3_),
-        static_cast<ParamGenerator<T4> >(g4_)));
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder4& other);
-
-  const Generator1 g1_;
-  const Generator2 g2_;
-  const Generator3 g3_;
-  const Generator4 g4_;
-};  // class CartesianProductHolder4
-
-template <class Generator1, class Generator2, class Generator3,
-    class Generator4, class Generator5>
-class CartesianProductHolder5 {
- public:
-CartesianProductHolder5(const Generator1& g1, const Generator2& g2,
-    const Generator3& g3, const Generator4& g4, const Generator5& g5)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
-  template <typename T1, typename T2, typename T3, typename T4, typename T5>
-  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >() const {
-    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >(
-        new CartesianProductGenerator5<T1, T2, T3, T4, T5>(
-        static_cast<ParamGenerator<T1> >(g1_),
-        static_cast<ParamGenerator<T2> >(g2_),
-        static_cast<ParamGenerator<T3> >(g3_),
-        static_cast<ParamGenerator<T4> >(g4_),
-        static_cast<ParamGenerator<T5> >(g5_)));
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder5& other);
-
-  const Generator1 g1_;
-  const Generator2 g2_;
-  const Generator3 g3_;
-  const Generator4 g4_;
-  const Generator5 g5_;
-};  // class CartesianProductHolder5
-
-template <class Generator1, class Generator2, class Generator3,
-    class Generator4, class Generator5, class Generator6>
-class CartesianProductHolder6 {
- public:
-CartesianProductHolder6(const Generator1& g1, const Generator2& g2,
-    const Generator3& g3, const Generator4& g4, const Generator5& g5,
-    const Generator6& g6)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
-  template <typename T1, typename T2, typename T3, typename T4, typename T5,
-      typename T6>
-  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >() const {
-    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >(
-        new CartesianProductGenerator6<T1, T2, T3, T4, T5, T6>(
-        static_cast<ParamGenerator<T1> >(g1_),
-        static_cast<ParamGenerator<T2> >(g2_),
-        static_cast<ParamGenerator<T3> >(g3_),
-        static_cast<ParamGenerator<T4> >(g4_),
-        static_cast<ParamGenerator<T5> >(g5_),
-        static_cast<ParamGenerator<T6> >(g6_)));
-  }
-
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder6& other);
-
-  const Generator1 g1_;
-  const Generator2 g2_;
-  const Generator3 g3_;
-  const Generator4 g4_;
-  const Generator5 g5_;
-  const Generator6 g6_;
-};  // class CartesianProductHolder6
-
-template <class Generator1, class Generator2, class Generator3,
-    class Generator4, class Generator5, class Generator6, class Generator7>
-class CartesianProductHolder7 {
- public:
-CartesianProductHolder7(const Generator1& g1, const Generator2& g2,
-    const Generator3& g3, const Generator4& g4, const Generator5& g5,
-    const Generator6& g6, const Generator7& g7)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
-  template <typename T1, typename T2, typename T3, typename T4, typename T5,
-      typename T6, typename T7>
-  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
-      T7> >() const {
-    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> >(
-        new CartesianProductGenerator7<T1, T2, T3, T4, T5, T6, T7>(
-        static_cast<ParamGenerator<T1> >(g1_),
-        static_cast<ParamGenerator<T2> >(g2_),
-        static_cast<ParamGenerator<T3> >(g3_),
-        static_cast<ParamGenerator<T4> >(g4_),
-        static_cast<ParamGenerator<T5> >(g5_),
-        static_cast<ParamGenerator<T6> >(g6_),
-        static_cast<ParamGenerator<T7> >(g7_)));
-  }
+    std::tuple<typename ParamGenerator<T>::iterator...> begin_;
+    std::tuple<typename ParamGenerator<T>::iterator...> end_;
+    std::tuple<typename ParamGenerator<T>::iterator...> current_;
+    std::shared_ptr<ParamType> current_value_;
+  };
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder7& other);
-
-  const Generator1 g1_;
-  const Generator2 g2_;
-  const Generator3 g3_;
-  const Generator4 g4_;
-  const Generator5 g5_;
-  const Generator6 g6_;
-  const Generator7 g7_;
-};  // class CartesianProductHolder7
-
-template <class Generator1, class Generator2, class Generator3,
-    class Generator4, class Generator5, class Generator6, class Generator7,
-    class Generator8>
-class CartesianProductHolder8 {
- public:
-CartesianProductHolder8(const Generator1& g1, const Generator2& g2,
-    const Generator3& g3, const Generator4& g4, const Generator5& g5,
-    const Generator6& g6, const Generator7& g7, const Generator8& g8)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
-          g8_(g8) {}
-  template <typename T1, typename T2, typename T3, typename T4, typename T5,
-      typename T6, typename T7, typename T8>
-  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7,
-      T8> >() const {
-    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> >(
-        new CartesianProductGenerator8<T1, T2, T3, T4, T5, T6, T7, T8>(
-        static_cast<ParamGenerator<T1> >(g1_),
-        static_cast<ParamGenerator<T2> >(g2_),
-        static_cast<ParamGenerator<T3> >(g3_),
-        static_cast<ParamGenerator<T4> >(g4_),
-        static_cast<ParamGenerator<T5> >(g5_),
-        static_cast<ParamGenerator<T6> >(g6_),
-        static_cast<ParamGenerator<T7> >(g7_),
-        static_cast<ParamGenerator<T8> >(g8_)));
-  }
+  using Iterator = IteratorImpl<typename MakeIndexSequence<sizeof...(T)>::type>;
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder8& other);
-
-  const Generator1 g1_;
-  const Generator2 g2_;
-  const Generator3 g3_;
-  const Generator4 g4_;
-  const Generator5 g5_;
-  const Generator6 g6_;
-  const Generator7 g7_;
-  const Generator8 g8_;
-};  // class CartesianProductHolder8
-
-template <class Generator1, class Generator2, class Generator3,
-    class Generator4, class Generator5, class Generator6, class Generator7,
-    class Generator8, class Generator9>
-class CartesianProductHolder9 {
- public:
-CartesianProductHolder9(const Generator1& g1, const Generator2& g2,
-    const Generator3& g3, const Generator4& g4, const Generator5& g5,
-    const Generator6& g6, const Generator7& g7, const Generator8& g8,
-    const Generator9& g9)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
-          g9_(g9) {}
-  template <typename T1, typename T2, typename T3, typename T4, typename T5,
-      typename T6, typename T7, typename T8, typename T9>
-  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
-      T9> >() const {
-    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
-        T9> >(
-        new CartesianProductGenerator9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(
-        static_cast<ParamGenerator<T1> >(g1_),
-        static_cast<ParamGenerator<T2> >(g2_),
-        static_cast<ParamGenerator<T3> >(g3_),
-        static_cast<ParamGenerator<T4> >(g4_),
-        static_cast<ParamGenerator<T5> >(g5_),
-        static_cast<ParamGenerator<T6> >(g6_),
-        static_cast<ParamGenerator<T7> >(g7_),
-        static_cast<ParamGenerator<T8> >(g8_),
-        static_cast<ParamGenerator<T9> >(g9_)));
-  }
+  std::tuple<ParamGenerator<T>...> generators_;
+};
 
- private:
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder9& other);
-
-  const Generator1 g1_;
-  const Generator2 g2_;
-  const Generator3 g3_;
-  const Generator4 g4_;
-  const Generator5 g5_;
-  const Generator6 g6_;
-  const Generator7 g7_;
-  const Generator8 g8_;
-  const Generator9 g9_;
-};  // class CartesianProductHolder9
-
-template <class Generator1, class Generator2, class Generator3,
-    class Generator4, class Generator5, class Generator6, class Generator7,
-    class Generator8, class Generator9, class Generator10>
-class CartesianProductHolder10 {
+template <class... Gen>
+class CartesianProductHolder {
  public:
-CartesianProductHolder10(const Generator1& g1, const Generator2& g2,
-    const Generator3& g3, const Generator4& g4, const Generator5& g5,
-    const Generator6& g6, const Generator7& g7, const Generator8& g8,
-    const Generator9& g9, const Generator10& g10)
-      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
-          g9_(g9), g10_(g10) {}
-  template <typename T1, typename T2, typename T3, typename T4, typename T5,
-      typename T6, typename T7, typename T8, typename T9, typename T10>
-  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
-      T9, T10> >() const {
-    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
-        T9, T10> >(
-        new CartesianProductGenerator10<T1, T2, T3, T4, T5, T6, T7, T8, T9,
-            T10>(
-        static_cast<ParamGenerator<T1> >(g1_),
-        static_cast<ParamGenerator<T2> >(g2_),
-        static_cast<ParamGenerator<T3> >(g3_),
-        static_cast<ParamGenerator<T4> >(g4_),
-        static_cast<ParamGenerator<T5> >(g5_),
-        static_cast<ParamGenerator<T6> >(g6_),
-        static_cast<ParamGenerator<T7> >(g7_),
-        static_cast<ParamGenerator<T8> >(g8_),
-        static_cast<ParamGenerator<T9> >(g9_),
-        static_cast<ParamGenerator<T10> >(g10_)));
+  CartesianProductHolder(const Gen&... g) : generators_(g...) {}
+  template <typename... T>
+  operator ParamGenerator<::std::tuple<T...>>() const {
+    return ParamGenerator<::std::tuple<T...>>(
+        new CartesianProductGenerator<T...>(generators_));
   }
 
  private:
-  // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder10& other);
-
-  const Generator1 g1_;
-  const Generator2 g2_;
-  const Generator3 g3_;
-  const Generator4 g4_;
-  const Generator5 g5_;
-  const Generator6 g6_;
-  const Generator7 g7_;
-  const Generator8 g8_;
-  const Generator9 g9_;
-  const Generator10 g10_;
-};  // class CartesianProductHolder10
-
-# endif  // GTEST_HAS_COMBINE
+  std::tuple<Gen...> generators_;
+};
 
 }  // namespace internal
 }  // namespace testing
 
-#endif  //  GTEST_HAS_PARAM_TEST
-
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
-
-#if GTEST_HAS_PARAM_TEST
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
 
 namespace testing {
 
 // Functions producing parameter generators.
 //
 // Google Test uses these generators to produce parameters for value-
-// parameterized tests. When a parameterized test case is instantiated
+// parameterized tests. When a parameterized test suite is instantiated
 // with a particular generator, Google Test creates and runs tests
 // for each element in the sequence produced by the generator.
 //
-// In the following sample, tests from test case FooTest are instantiated
+// In the following sample, tests from test suite FooTest are instantiated
 // each three times with parameter values 3, 5, and 8:
 //
 // class FooTest : public TestWithParam<int> { ... };
@@ -15782,7 +8898,7 @@ namespace testing {
 // }
 // TEST_P(FooTest, TestThat) {
 // }
-// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8));
+// INSTANTIATE_TEST_SUITE_P(TestSequence, FooTest, Values(3, 5, 8));
 //
 
 // Range() returns generators providing sequences of values in a range.
@@ -15836,935 +8952,87 @@ internal::ParamGenerator<T> Range(T start, T end) {
 //
 // Please note that ValuesIn copies the values from the containers
 // passed in and keeps them to generate tests in RUN_ALL_TESTS().
-//
-// Examples:
-//
-// This instantiates tests from test case StringTest
-// each with C-string values of "foo", "bar", and "baz":
-//
-// const char* strings[] = {"foo", "bar", "baz"};
-// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings));
-//
-// This instantiates tests from test case StlStringTest
-// each with STL strings with values "a" and "b":
-//
-// ::std::vector< ::std::string> GetParameterStrings() {
-//   ::std::vector< ::std::string> v;
-//   v.push_back("a");
-//   v.push_back("b");
-//   return v;
-// }
-//
-// INSTANTIATE_TEST_CASE_P(CharSequence,
-//                         StlStringTest,
-//                         ValuesIn(GetParameterStrings()));
-//
-//
-// This will also instantiate tests from CharTest
-// each with parameter values 'a' and 'b':
-//
-// ::std::list<char> GetParameterChars() {
-//   ::std::list<char> list;
-//   list.push_back('a');
-//   list.push_back('b');
-//   return list;
-// }
-// ::std::list<char> l = GetParameterChars();
-// INSTANTIATE_TEST_CASE_P(CharSequence2,
-//                         CharTest,
-//                         ValuesIn(l.begin(), l.end()));
-//
-template <typename ForwardIterator>
-internal::ParamGenerator<
-  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
-ValuesIn(ForwardIterator begin, ForwardIterator end) {
-  typedef typename ::testing::internal::IteratorTraits<ForwardIterator>
-      ::value_type ParamType;
-  return internal::ParamGenerator<ParamType>(
-      new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
-}
-
-template <typename T, size_t N>
-internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
-  return ValuesIn(array, array + N);
-}
-
-template <class Container>
-internal::ParamGenerator<typename Container::value_type> ValuesIn(
-    const Container& container) {
-  return ValuesIn(container.begin(), container.end());
-}
-
-// Values() allows generating tests from explicitly specified list of
-// parameters.
-//
-// Synopsis:
-// Values(T v1, T v2, ..., T vN)
-//   - returns a generator producing sequences with elements v1, v2, ..., vN.
-//
-// For example, this instantiates tests from test case BarTest each
-// with values "one", "two", and "three":
-//
-// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three"));
-//
-// This instantiates tests from test case BazTest each with values 1, 2, 3.5.
-// The exact type of values will depend on the type of parameter in BazTest.
-//
-// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
-//
-// Currently, Values() supports from 1 to 50 parameters.
-//
-template <typename T1>
-internal::ValueArray1<T1> Values(T1 v1) {
-  return internal::ValueArray1<T1>(v1);
-}
-
-template <typename T1, typename T2>
-internal::ValueArray2<T1, T2> Values(T1 v1, T2 v2) {
-  return internal::ValueArray2<T1, T2>(v1, v2);
-}
-
-template <typename T1, typename T2, typename T3>
-internal::ValueArray3<T1, T2, T3> Values(T1 v1, T2 v2, T3 v3) {
-  return internal::ValueArray3<T1, T2, T3>(v1, v2, v3);
-}
-
-template <typename T1, typename T2, typename T3, typename T4>
-internal::ValueArray4<T1, T2, T3, T4> Values(T1 v1, T2 v2, T3 v3, T4 v4) {
-  return internal::ValueArray4<T1, T2, T3, T4>(v1, v2, v3, v4);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-internal::ValueArray5<T1, T2, T3, T4, T5> Values(T1 v1, T2 v2, T3 v3, T4 v4,
-    T5 v5) {
-  return internal::ValueArray5<T1, T2, T3, T4, T5>(v1, v2, v3, v4, v5);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6>
-internal::ValueArray6<T1, T2, T3, T4, T5, T6> Values(T1 v1, T2 v2, T3 v3,
-    T4 v4, T5 v5, T6 v6) {
-  return internal::ValueArray6<T1, T2, T3, T4, T5, T6>(v1, v2, v3, v4, v5, v6);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7>
-internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7> Values(T1 v1, T2 v2, T3 v3,
-    T4 v4, T5 v5, T6 v6, T7 v7) {
-  return internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7>(v1, v2, v3, v4, v5,
-      v6, v7);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8>
-internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8> Values(T1 v1, T2 v2,
-    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) {
-  return internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8>(v1, v2, v3, v4,
-      v5, v6, v7, v8);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9>
-internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9> Values(T1 v1, T2 v2,
-    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) {
-  return internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(v1, v2, v3,
-      v4, v5, v6, v7, v8, v9);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10>
-internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> Values(T1 v1,
-    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) {
-  return internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(v1,
-      v2, v3, v4, v5, v6, v7, v8, v9, v10);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11>
-internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
-    T11> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11) {
-  return internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
-      T11>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12>
-internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-    T12> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12) {
-  return internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13>
-internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-    T13> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13) {
-  return internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14>
-internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) {
-  return internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
-      v14);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15>
-internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
-    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) {
-  return internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
-      v13, v14, v15);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16>
-internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
-    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
-    T16 v16) {
-  return internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
-      v12, v13, v14, v15, v16);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17>
-internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
-    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
-    T16 v16, T17 v17) {
-  return internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
-      v11, v12, v13, v14, v15, v16, v17);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18>
-internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
-    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
-    T16 v16, T17 v17, T18 v18) {
-  return internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
-      v10, v11, v12, v13, v14, v15, v16, v17, v18);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19>
-internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
-    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
-    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) {
-  return internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19>(v1, v2, v3, v4, v5, v6, v7, v8,
-      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20>
-internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20> Values(T1 v1, T2 v2, T3 v3, T4 v4,
-    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
-    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) {
-  return internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20>(v1, v2, v3, v4, v5, v6, v7,
-      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21>
-internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21> Values(T1 v1, T2 v2, T3 v3, T4 v4,
-    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
-    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) {
-  return internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21>(v1, v2, v3, v4, v5, v6,
-      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22>
-internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22> Values(T1 v1, T2 v2, T3 v3,
-    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
-    T21 v21, T22 v22) {
-  return internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>(v1, v2, v3, v4,
-      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
-      v20, v21, v22);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23>
-internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> Values(T1 v1, T2 v2,
-    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
-    T21 v21, T22 v22, T23 v23) {
-  return internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>(v1, v2, v3,
-      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
-      v20, v21, v22, v23);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24>
-internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Values(T1 v1, T2 v2,
-    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
-    T21 v21, T22 v22, T23 v23, T24 v24) {
-  return internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>(v1, v2,
-      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
-      v19, v20, v21, v22, v23, v24);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25>
-internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Values(T1 v1,
-    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
-    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
-    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) {
-  return internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25>(v1,
-      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
-      v18, v19, v20, v21, v22, v23, v24, v25);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26>
-internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-    T26> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-    T26 v26) {
-  return internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
-      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27>
-internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-    T27> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-    T26 v26, T27 v27) {
-  return internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
-      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28>
-internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-    T28> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-    T26 v26, T27 v27, T28 v28) {
-  return internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
-      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
-      v28);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29>
-internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-    T26 v26, T27 v27, T28 v28, T29 v29) {
-  return internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
-      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
-      v27, v28, v29);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30>
-internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
-    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
-    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
-    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) {
-  return internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
-      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
-      v26, v27, v28, v29, v30);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31>
-internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
-    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
-    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
-    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) {
-  return internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
-      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
-      v25, v26, v27, v28, v29, v30, v31);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32>
-internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
-    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
-    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
-    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
-    T32 v32) {
-  return internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
-      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
-      v24, v25, v26, v27, v28, v29, v30, v31, v32);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33>
-internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
-    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
-    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
-    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
-    T32 v32, T33 v33) {
-  return internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33>(v1, v2, v3, v4, v5, v6, v7, v8,
-      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
-      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34>
-internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
-    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
-    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
-    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
-    T31 v31, T32 v32, T33 v33, T34 v34) {
-  return internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34>(v1, v2, v3, v4, v5, v6, v7,
-      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
-      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35>
-internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35> Values(T1 v1, T2 v2, T3 v3, T4 v4,
-    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
-    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
-    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
-    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) {
-  return internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35>(v1, v2, v3, v4, v5, v6,
-      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
-      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36>
-internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36> Values(T1 v1, T2 v2, T3 v3, T4 v4,
-    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
-    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
-    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
-    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) {
-  return internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36>(v1, v2, v3, v4,
-      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
-      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
-      v34, v35, v36);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37>
-internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37> Values(T1 v1, T2 v2, T3 v3,
-    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
-    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
-    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
-    T37 v37) {
-  return internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37>(v1, v2, v3,
-      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
-      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
-      v34, v35, v36, v37);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38>
-internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Values(T1 v1, T2 v2,
-    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
-    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
-    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
-    T37 v37, T38 v38) {
-  return internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38>(v1, v2,
-      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
-      v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32,
-      v33, v34, v35, v36, v37, v38);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39>
-internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Values(T1 v1, T2 v2,
-    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
-    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
-    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
-    T37 v37, T38 v38, T39 v39) {
-  return internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39>(v1,
-      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
-      v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
-      v32, v33, v34, v35, v36, v37, v38, v39);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40>
-internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Values(T1 v1,
-    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
-    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
-    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27,
-    T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35,
-    T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) {
-  return internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
-      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29,
-      v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41>
-internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-    T41> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) {
-  return internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
-      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
-      v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42>
-internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-    T42> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-    T42 v42) {
-  return internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41, T42>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
-      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
-      v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41,
-      v42);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43>
-internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-    T43> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-    T42 v42, T43 v43) {
-  return internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41, T42, T43>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
-      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
-      v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40,
-      v41, v42, v43);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44>
-internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    T44> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
-    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
-    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
-    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
-    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
-    T42 v42, T43 v43, T44 v44) {
-  return internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41, T42, T43, T44>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
-      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
-      v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39,
-      v40, v41, v42, v43, v44);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45>
-internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    T44, T45> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
-    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
-    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
-    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
-    T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
-    T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) {
-  return internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41, T42, T43, T44, T45>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
-      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
-      v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38,
-      v39, v40, v41, v42, v43, v44, v45);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46>
-internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    T44, T45, T46> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
-    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
-    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
-    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
-    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
-    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) {
-  return internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41, T42, T43, T44, T45, T46>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
-      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
-      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
-      v38, v39, v40, v41, v42, v43, v44, v45, v46);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47>
-internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    T44, T45, T46, T47> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
-    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
-    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
-    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
-    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
-    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) {
-  return internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41, T42, T43, T44, T45, T46, T47>(v1, v2, v3, v4, v5, v6, v7, v8,
-      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
-      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
-      v38, v39, v40, v41, v42, v43, v44, v45, v46, v47);
+//
+// Examples:
+//
+// This instantiates tests from test suite StringTest
+// each with C-string values of "foo", "bar", and "baz":
+//
+// const char* strings[] = {"foo", "bar", "baz"};
+// INSTANTIATE_TEST_SUITE_P(StringSequence, StringTest, ValuesIn(strings));
+//
+// This instantiates tests from test suite StlStringTest
+// each with STL strings with values "a" and "b":
+//
+// ::std::vector< ::std::string> GetParameterStrings() {
+//   ::std::vector< ::std::string> v;
+//   v.push_back("a");
+//   v.push_back("b");
+//   return v;
+// }
+//
+// INSTANTIATE_TEST_SUITE_P(CharSequence,
+//                          StlStringTest,
+//                          ValuesIn(GetParameterStrings()));
+//
+//
+// This will also instantiate tests from CharTest
+// each with parameter values 'a' and 'b':
+//
+// ::std::list<char> GetParameterChars() {
+//   ::std::list<char> list;
+//   list.push_back('a');
+//   list.push_back('b');
+//   return list;
+// }
+// ::std::list<char> l = GetParameterChars();
+// INSTANTIATE_TEST_SUITE_P(CharSequence2,
+//                          CharTest,
+//                          ValuesIn(l.begin(), l.end()));
+//
+template <typename ForwardIterator>
+internal::ParamGenerator<
+    typename std::iterator_traits<ForwardIterator>::value_type>
+ValuesIn(ForwardIterator begin, ForwardIterator end) {
+  typedef typename std::iterator_traits<ForwardIterator>::value_type ParamType;
+  return internal::ParamGenerator<ParamType>(
+      new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
 }
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48>
-internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    T44, T45, T46, T47, T48> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
-    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
-    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
-    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
-    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
-    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47,
-    T48 v48) {
-  return internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41, T42, T43, T44, T45, T46, T47, T48>(v1, v2, v3, v4, v5, v6, v7,
-      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
-      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36,
-      v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48);
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
+  return ValuesIn(array, array + N);
 }
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49>
-internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    T44, T45, T46, T47, T48, T49> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
-    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
-    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
-    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
-    T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38,
-    T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46,
-    T47 v47, T48 v48, T49 v49) {
-  return internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49>(v1, v2, v3, v4, v5, v6,
-      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
-      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35,
-      v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49);
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+    const Container& container) {
+  return ValuesIn(container.begin(), container.end());
 }
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49, typename T50>
-internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    T44, T45, T46, T47, T48, T49, T50> Values(T1 v1, T2 v2, T3 v3, T4 v4,
-    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
-    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
-    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
-    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37,
-    T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45,
-    T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) {
-  return internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>(v1, v2, v3, v4,
-      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
-      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
-      v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47,
-      v48, v49, v50);
+// Values() allows generating tests from explicitly specified list of
+// parameters.
+//
+// Synopsis:
+// Values(T v1, T v2, ..., T vN)
+//   - returns a generator producing sequences with elements v1, v2, ..., vN.
+//
+// For example, this instantiates tests from test suite BarTest each
+// with values "one", "two", and "three":
+//
+// INSTANTIATE_TEST_SUITE_P(NumSequence,
+//                          BarTest,
+//                          Values("one", "two", "three"));
+//
+// This instantiates tests from test suite BazTest each with values 1, 2, 3.5.
+// The exact type of values will depend on the type of parameter in BazTest.
+//
+// INSTANTIATE_TEST_SUITE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
+//
+//
+template <typename... T>
+internal::ValueArray<T...> Values(T... v) {
+  return internal::ValueArray<T...>(std::move(v)...);
 }
 
 // Bool() allows generating tests with parameters in a set of (false, true).
@@ -16777,7 +9045,7 @@ internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
 // of multiple flags can be tested when several Bool()'s are combined using
 // Combine() function.
 //
-// In the following example all tests in the test case FlagDependentTest
+// In the following example all tests in the test suite FlagDependentTest
 // will be instantiated twice with parameters false and true.
 //
 // class FlagDependentTest : public testing::TestWithParam<bool> {
@@ -16785,13 +9053,12 @@ internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
 //     external_flag = GetParam();
 //   }
 // }
-// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool());
+// INSTANTIATE_TEST_SUITE_P(BoolSequence, FlagDependentTest, Bool());
 //
 inline internal::ParamGenerator<bool> Bool() {
   return Values(false, true);
 }
 
-# if GTEST_HAS_COMBINE
 // Combine() allows the user to combine two or more sequences to produce
 // values of a Cartesian product of those sequences' elements.
 //
@@ -16800,430 +9067,207 @@ inline internal::ParamGenerator<bool> Bool() {
 //   - returns a generator producing sequences with elements coming from
 //     the Cartesian product of elements from the sequences generated by
 //     gen1, gen2, ..., genN. The sequence elements will have a type of
-//     tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
+//     std::tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
 //     of elements from sequences produces by gen1, gen2, ..., genN.
 //
-// Combine can have up to 10 arguments. This number is currently limited
-// by the maximum number of elements in the tuple implementation used by Google
-// Test.
-//
 // Example:
 //
-// This will instantiate tests in test case AnimalTest each one with
+// This will instantiate tests in test suite AnimalTest each one with
 // the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
 // tuple("dog", BLACK), and tuple("dog", WHITE):
 //
 // enum Color { BLACK, GRAY, WHITE };
 // class AnimalTest
-//     : public testing::TestWithParam<tuple<const char*, Color> > {...};
+//     : public testing::TestWithParam<std::tuple<const char*, Color> > {...};
 //
 // TEST_P(AnimalTest, AnimalLooksNice) {...}
 //
-// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest,
-//                         Combine(Values("cat", "dog"),
-//                                 Values(BLACK, WHITE)));
+// INSTANTIATE_TEST_SUITE_P(AnimalVariations, AnimalTest,
+//                          Combine(Values("cat", "dog"),
+//                                  Values(BLACK, WHITE)));
 //
 // This will instantiate tests in FlagDependentTest with all variations of two
 // Boolean flags:
 //
 // class FlagDependentTest
-//     : public testing::TestWithParam<tuple<bool, bool> > {
-//   virtual void SetUp() {
-//     // Assigns external_flag_1 and external_flag_2 values from the tuple.
-//     tie(external_flag_1, external_flag_2) = GetParam();
-//   }
-// };
-//
-// TEST_P(FlagDependentTest, TestFeature1) {
-//   // Test your code using external_flag_1 and external_flag_2 here.
-// }
-// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest,
-//                         Combine(Bool(), Bool()));
-//
-template <typename Generator1, typename Generator2>
-internal::CartesianProductHolder2<Generator1, Generator2> Combine(
-    const Generator1& g1, const Generator2& g2) {
-  return internal::CartesianProductHolder2<Generator1, Generator2>(
-      g1, g2);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3>
-internal::CartesianProductHolder3<Generator1, Generator2, Generator3> Combine(
-    const Generator1& g1, const Generator2& g2, const Generator3& g3) {
-  return internal::CartesianProductHolder3<Generator1, Generator2, Generator3>(
-      g1, g2, g3);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
-    typename Generator4>
-internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
-    Generator4> Combine(
-    const Generator1& g1, const Generator2& g2, const Generator3& g3,
-        const Generator4& g4) {
-  return internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
-      Generator4>(
-      g1, g2, g3, g4);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
-    typename Generator4, typename Generator5>
-internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
-    Generator4, Generator5> Combine(
-    const Generator1& g1, const Generator2& g2, const Generator3& g3,
-        const Generator4& g4, const Generator5& g5) {
-  return internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
-      Generator4, Generator5>(
-      g1, g2, g3, g4, g5);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
-    typename Generator4, typename Generator5, typename Generator6>
-internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
-    Generator4, Generator5, Generator6> Combine(
-    const Generator1& g1, const Generator2& g2, const Generator3& g3,
-        const Generator4& g4, const Generator5& g5, const Generator6& g6) {
-  return internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
-      Generator4, Generator5, Generator6>(
-      g1, g2, g3, g4, g5, g6);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
-    typename Generator4, typename Generator5, typename Generator6,
-    typename Generator7>
-internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
-    Generator4, Generator5, Generator6, Generator7> Combine(
-    const Generator1& g1, const Generator2& g2, const Generator3& g3,
-        const Generator4& g4, const Generator5& g5, const Generator6& g6,
-        const Generator7& g7) {
-  return internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
-      Generator4, Generator5, Generator6, Generator7>(
-      g1, g2, g3, g4, g5, g6, g7);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
-    typename Generator4, typename Generator5, typename Generator6,
-    typename Generator7, typename Generator8>
-internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
-    Generator4, Generator5, Generator6, Generator7, Generator8> Combine(
-    const Generator1& g1, const Generator2& g2, const Generator3& g3,
-        const Generator4& g4, const Generator5& g5, const Generator6& g6,
-        const Generator7& g7, const Generator8& g8) {
-  return internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
-      Generator4, Generator5, Generator6, Generator7, Generator8>(
-      g1, g2, g3, g4, g5, g6, g7, g8);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
-    typename Generator4, typename Generator5, typename Generator6,
-    typename Generator7, typename Generator8, typename Generator9>
-internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
-    Generator4, Generator5, Generator6, Generator7, Generator8,
-    Generator9> Combine(
-    const Generator1& g1, const Generator2& g2, const Generator3& g3,
-        const Generator4& g4, const Generator5& g5, const Generator6& g6,
-        const Generator7& g7, const Generator8& g8, const Generator9& g9) {
-  return internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
-      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9>(
-      g1, g2, g3, g4, g5, g6, g7, g8, g9);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
-    typename Generator4, typename Generator5, typename Generator6,
-    typename Generator7, typename Generator8, typename Generator9,
-    typename Generator10>
-internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
-    Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
-    Generator10> Combine(
-    const Generator1& g1, const Generator2& g2, const Generator3& g3,
-        const Generator4& g4, const Generator5& g5, const Generator6& g6,
-        const Generator7& g7, const Generator8& g8, const Generator9& g9,
-        const Generator10& g10) {
-  return internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
-      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
-      Generator10>(
-      g1, g2, g3, g4, g5, g6, g7, g8, g9, g10);
-}
-# endif  // GTEST_HAS_COMBINE
-
-
-
-# define TEST_P(test_case_name, test_name) \
-  class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
-      : public test_case_name { \
-   public: \
-    GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
-    virtual void TestBody(); \
-   private: \
-    static int AddToRegistry() { \
-      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
-          GetTestCasePatternHolder<test_case_name>(\
-              #test_case_name, __FILE__, __LINE__)->AddTestPattern(\
-                  #test_case_name, \
-                  #test_name, \
-                  new ::testing::internal::TestMetaFactory< \
-                      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
-      return 0; \
-    } \
-    static int gtest_registering_dummy_; \
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(\
-        GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
-  }; \
-  int GTEST_TEST_CLASS_NAME_(test_case_name, \
-                             test_name)::gtest_registering_dummy_ = \
-      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
-  void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
-
-# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \
-  ::testing::internal::ParamGenerator<test_case_name::ParamType> \
-      gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
-  int gtest_##prefix##test_case_name##_dummy_ = \
-      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
-          GetTestCasePatternHolder<test_case_name>(\
-              #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\
-                  #prefix, \
-                  &gtest_##prefix##test_case_name##_EvalGenerator_, \
-                  __FILE__, __LINE__)
-
-}  // namespace testing
-
-#endif  // GTEST_HAS_PARAM_TEST
-
-#endif  // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// Google C++ Testing Framework definitions useful in production code.
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_
-
-// When you need to test the private or protected members of a class,
-// use the FRIEND_TEST macro to declare your tests as friends of the
-// class.  For example:
-//
-// class MyClass {
-//  private:
-//   void MyMethod();
-//   FRIEND_TEST(MyClassTest, MyMethod);
-// };
-//
-// class MyClassTest : public testing::Test {
-//   // ...
-// };
-//
-// TEST_F(MyClassTest, MyMethod) {
-//   // Can call MyClass::MyMethod() here.
-// }
-
-#define FRIEND_TEST(test_case_name, test_name)\
-friend class test_case_name##_##test_name##_Test
-
-#endif  // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: mheule@google.com (Markus Heule)
-//
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
-#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
-
-#include <iosfwd>
-#include <vector>
-
-namespace testing {
-
-// A copyable object representing the result of a test part (i.e. an
-// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
-//
-// Don't inherit from TestPartResult as its destructor is not virtual.
-class GTEST_API_ TestPartResult {
- public:
-  // The possible outcomes of a test part (i.e. an assertion or an
-  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
-  enum Type {
-    kSuccess,          // Succeeded.
-    kNonFatalFailure,  // Failed but the test can continue.
-    kFatalFailure      // Failed and the test should be terminated.
-  };
-
-  // C'tor.  TestPartResult does NOT have a default constructor.
-  // Always use this constructor (with parameters) to create a
-  // TestPartResult object.
-  TestPartResult(Type a_type,
-                 const char* a_file_name,
-                 int a_line_number,
-                 const char* a_message)
-      : type_(a_type),
-        file_name_(a_file_name == NULL ? "" : a_file_name),
-        line_number_(a_line_number),
-        summary_(ExtractSummary(a_message)),
-        message_(a_message) {
-  }
-
-  // Gets the outcome of the test part.
-  Type type() const { return type_; }
-
-  // Gets the name of the source file where the test part took place, or
-  // NULL if it's unknown.
-  const char* file_name() const {
-    return file_name_.empty() ? NULL : file_name_.c_str();
-  }
-
-  // Gets the line in the source file where the test part took place,
-  // or -1 if it's unknown.
-  int line_number() const { return line_number_; }
-
-  // Gets the summary of the failure message.
-  const char* summary() const { return summary_.c_str(); }
-
-  // Gets the message associated with the test part.
-  const char* message() const { return message_.c_str(); }
-
-  // Returns true iff the test part passed.
-  bool passed() const { return type_ == kSuccess; }
-
-  // Returns true iff the test part failed.
-  bool failed() const { return type_ != kSuccess; }
-
-  // Returns true iff the test part non-fatally failed.
-  bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
-
-  // Returns true iff the test part fatally failed.
-  bool fatally_failed() const { return type_ == kFatalFailure; }
-
- private:
-  Type type_;
-
-  // Gets the summary of the failure message by omitting the stack
-  // trace in it.
-  static std::string ExtractSummary(const char* message);
-
-  // The name of the source file where the test part took place, or
-  // "" if the source file is unknown.
-  std::string file_name_;
-  // The line in the source file where the test part took place, or -1
-  // if the line number is unknown.
-  int line_number_;
-  std::string summary_;  // The test failure summary.
-  std::string message_;  // The test failure message.
-};
-
-// Prints a TestPartResult object.
-std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
-
-// An array of TestPartResult objects.
-//
-// Don't inherit from TestPartResultArray as its destructor is not
-// virtual.
-class GTEST_API_ TestPartResultArray {
- public:
-  TestPartResultArray() {}
-
-  // Appends the given TestPartResult to the array.
-  void Append(const TestPartResult& result);
-
-  // Returns the TestPartResult at the given index (0-based).
-  const TestPartResult& GetTestPartResult(int index) const;
-
-  // Returns the number of TestPartResult objects in the array.
-  int size() const;
+//     : public testing::TestWithParam<std::tuple<bool, bool> > {
+//   virtual void SetUp() {
+//     // Assigns external_flag_1 and external_flag_2 values from the tuple.
+//     std::tie(external_flag_1, external_flag_2) = GetParam();
+//   }
+// };
+//
+// TEST_P(FlagDependentTest, TestFeature1) {
+//   // Test your code using external_flag_1 and external_flag_2 here.
+// }
+// INSTANTIATE_TEST_SUITE_P(TwoBoolSequence, FlagDependentTest,
+//                          Combine(Bool(), Bool()));
+//
+template <typename... Generator>
+internal::CartesianProductHolder<Generator...> Combine(const Generator&... g) {
+  return internal::CartesianProductHolder<Generator...>(g...);
+}
+
+#define TEST_P(test_suite_name, test_name)                                     \
+  class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                     \
+      : public test_suite_name {                                               \
+   public:                                                                     \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() {}                    \
+    void TestBody() override;                                                  \
+                                                                               \
+   private:                                                                    \
+    static int AddToRegistry() {                                               \
+      ::testing::UnitTest::GetInstance()                                       \
+          ->parameterized_test_registry()                                      \
+          .GetTestSuitePatternHolder<test_suite_name>(                         \
+              GTEST_STRINGIFY_(test_suite_name),                               \
+              ::testing::internal::CodeLocation(__FILE__, __LINE__))           \
+          ->AddTestPattern(                                                    \
+              GTEST_STRINGIFY_(test_suite_name), GTEST_STRINGIFY_(test_name),  \
+              new ::testing::internal::TestMetaFactory<GTEST_TEST_CLASS_NAME_( \
+                  test_suite_name, test_name)>(),                              \
+              ::testing::internal::CodeLocation(__FILE__, __LINE__));          \
+      return 0;                                                                \
+    }                                                                          \
+    static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_;               \
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name,    \
+                                                           test_name));        \
+  };                                                                           \
+  int GTEST_TEST_CLASS_NAME_(test_suite_name,                                  \
+                             test_name)::gtest_registering_dummy_ =            \
+      GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::AddToRegistry();     \
+  void GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::TestBody()
+
+// The last argument to INSTANTIATE_TEST_SUITE_P allows the user to specify
+// generator and an optional function or functor that generates custom test name
+// suffixes based on the test parameters. Such a function or functor should
+// accept one argument of type testing::TestParamInfo<class ParamType>, and
+// return std::string.
+//
+// testing::PrintToStringParamName is a builtin test suffix generator that
+// returns the value of testing::PrintToString(GetParam()).
+//
+// Note: test names must be non-empty, unique, and may only contain ASCII
+// alphanumeric characters or underscore. Because PrintToString adds quotes
+// to std::string and C strings, it won't work for these types.
+
+#define GTEST_EXPAND_(arg) arg
+#define GTEST_GET_FIRST_(first, ...) first
+#define GTEST_GET_SECOND_(first, second, ...) second
+
+#define INSTANTIATE_TEST_SUITE_P(prefix, test_suite_name, ...)                \
+  static ::testing::internal::ParamGenerator<test_suite_name::ParamType>      \
+      gtest_##prefix##test_suite_name##_EvalGenerator_() {                    \
+    return GTEST_EXPAND_(GTEST_GET_FIRST_(__VA_ARGS__, DUMMY_PARAM_));        \
+  }                                                                           \
+  static ::std::string gtest_##prefix##test_suite_name##_EvalGenerateName_(   \
+      const ::testing::TestParamInfo<test_suite_name::ParamType>& info) {     \
+    if (::testing::internal::AlwaysFalse()) {                                 \
+      ::testing::internal::TestNotEmpty(GTEST_EXPAND_(GTEST_GET_SECOND_(      \
+          __VA_ARGS__,                                                        \
+          ::testing::internal::DefaultParamName<test_suite_name::ParamType>,  \
+          DUMMY_PARAM_)));                                                    \
+      auto t = std::make_tuple(__VA_ARGS__);                                  \
+      static_assert(std::tuple_size<decltype(t)>::value <= 2,                 \
+                    "Too Many Args!");                                        \
+    }                                                                         \
+    return ((GTEST_EXPAND_(GTEST_GET_SECOND_(                                 \
+        __VA_ARGS__,                                                          \
+        ::testing::internal::DefaultParamName<test_suite_name::ParamType>,    \
+        DUMMY_PARAM_))))(info);                                               \
+  }                                                                           \
+  static int gtest_##prefix##test_suite_name##_dummy_                         \
+      GTEST_ATTRIBUTE_UNUSED_ =                                               \
+          ::testing::UnitTest::GetInstance()                                  \
+              ->parameterized_test_registry()                                 \
+              .GetTestSuitePatternHolder<test_suite_name>(                    \
+                  GTEST_STRINGIFY_(test_suite_name),                          \
+                  ::testing::internal::CodeLocation(__FILE__, __LINE__))      \
+              ->AddTestSuiteInstantiation(                                    \
+                  GTEST_STRINGIFY_(prefix),                                   \
+                  &gtest_##prefix##test_suite_name##_EvalGenerator_,          \
+                  &gtest_##prefix##test_suite_name##_EvalGenerateName_,       \
+                  __FILE__, __LINE__)
 
- private:
-  std::vector<TestPartResult> array_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
-};
+// Allow Marking a Parameterized test class as not needing to be instantiated.
+#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(T)                   \
+  namespace gtest_do_not_use_outside_namespace_scope {}                   \
+  static const ::testing::internal::MarkAsIgnored gtest_allow_ignore_##T( \
+      GTEST_STRINGIFY_(T))
 
-// This interface knows how to report a test part result.
-class TestPartResultReporterInterface {
- public:
-  virtual ~TestPartResultReporterInterface() {}
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define INSTANTIATE_TEST_CASE_P                                            \
+  static_assert(::testing::internal::InstantiateTestCase_P_IsDeprecated(), \
+                "");                                                       \
+  INSTANTIATE_TEST_SUITE_P
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
-};
+}  // namespace testing
 
-namespace internal {
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
-// statement generates new fatal failures. To do so it registers itself as the
-// current test part result reporter. Besides checking if fatal failures were
-// reported, it only delegates the reporting to the former result reporter.
-// The original result reporter is restored in the destructor.
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-class GTEST_API_ HasNewFatalFailureHelper
-    : public TestPartResultReporterInterface {
- public:
-  HasNewFatalFailureHelper();
-  virtual ~HasNewFatalFailureHelper();
-  virtual void ReportTestPartResult(const TestPartResult& result);
-  bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
- private:
-  bool has_new_fatal_failure_;
-  TestPartResultReporterInterface* original_reporter_;
+//
+// Google C++ Testing and Mocking Framework definitions useful in production code.
+// GOOGLETEST_CM0003 DO NOT DELETE
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
-};
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
 
-}  // namespace internal
+// When you need to test the private or protected members of a class,
+// use the FRIEND_TEST macro to declare your tests as friends of the
+// class.  For example:
+//
+// class MyClass {
+//  private:
+//   void PrivateMethod();
+//   FRIEND_TEST(MyClassTest, PrivateMethodWorks);
+// };
+//
+// class MyClassTest : public testing::Test {
+//   // ...
+// };
+//
+// TEST_F(MyClassTest, PrivateMethodWorks) {
+//   // Can call MyClass::PrivateMethod() here.
+// }
+//
+// Note: The test class must be in the same namespace as the class being tested.
+// For example, putting MyClassTest in an anonymous namespace will not work.
 
-}  // namespace testing
+#define FRIEND_TEST(test_case_name, test_name)\
+friend class test_case_name##_##test_name##_Test
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
 // Copyright 2008 Google Inc.
 // All Rights Reserved.
 //
@@ -17252,11 +9296,11 @@ class GTEST_API_ HasNewFatalFailureHelper
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
 
 // This header implements typed tests and type-parameterized tests.
 
@@ -17277,22 +9321,22 @@ class FooTest : public testing::Test {
   T value_;
 };
 
-// Next, associate a list of types with the test case, which will be
+// Next, associate a list of types with the test suite, which will be
 // repeated for each type in the list.  The typedef is necessary for
 // the macro to parse correctly.
 typedef testing::Types<char, int, unsigned int> MyTypes;
-TYPED_TEST_CASE(FooTest, MyTypes);
+TYPED_TEST_SUITE(FooTest, MyTypes);
 
 // If the type list contains only one type, you can write that type
 // directly without Types<...>:
-//   TYPED_TEST_CASE(FooTest, int);
+//   TYPED_TEST_SUITE(FooTest, int);
 
 // Then, use TYPED_TEST() instead of TEST_F() to define as many typed
-// tests for this test case as you want.
+// tests for this test suite as you want.
 TYPED_TEST(FooTest, DoesBlah) {
-  // Inside a test, refer to TypeParam to get the type parameter.
-  // Since we are inside a derived class template, C++ requires use to
-  // visit the members of FooTest via 'this'.
+  // Inside a test, refer to the special name TypeParam to get the type
+  // parameter.  Since we are inside a derived class template, C++ requires
+  // us to visit the members of FooTest via 'this'.
   TypeParam n = this->value_;
 
   // To visit static members of the fixture, add the TestFixture::
@@ -17308,6 +9352,24 @@ TYPED_TEST(FooTest, DoesBlah) {
 
 TYPED_TEST(FooTest, HasPropertyA) { ... }
 
+// TYPED_TEST_SUITE takes an optional third argument which allows to specify a
+// class that generates custom test name suffixes based on the type. This should
+// be a class which has a static template function GetName(int index) returning
+// a string for each type. The provided integer index equals the index of the
+// type in the provided type list. In many cases the index can be ignored.
+//
+// For example:
+//   class MyTypeNames {
+//    public:
+//     template <typename T>
+//     static std::string GetName(int) {
+//       if (std::is_same<T, char>()) return "char";
+//       if (std::is_same<T, int>()) return "int";
+//       if (std::is_same<T, unsigned int>()) return "unsignedInt";
+//     }
+//   };
+//   TYPED_TEST_SUITE(FooTest, MyTypes, MyTypeNames);
+
 #endif  // 0
 
 // Type-parameterized tests are abstract test patterns parameterized
@@ -17333,13 +9395,13 @@ class FooTest : public testing::Test {
   ...
 };
 
-// Next, declare that you will define a type-parameterized test case
+// Next, declare that you will define a type-parameterized test suite
 // (the _P suffix is for "parameterized" or "pattern", whichever you
 // prefer):
-TYPED_TEST_CASE_P(FooTest);
+TYPED_TEST_SUITE_P(FooTest);
 
 // Then, use TYPED_TEST_P() to define as many type-parameterized tests
-// for this type-parameterized test case as you want.
+// for this type-parameterized test suite as you want.
 TYPED_TEST_P(FooTest, DoesBlah) {
   // Inside a test, refer to TypeParam to get the type parameter.
   TypeParam n = 0;
@@ -17350,10 +9412,10 @@ TYPED_TEST_P(FooTest, HasPropertyA) { ... }
 
 // Now the tricky part: you need to register all test patterns before
 // you can instantiate them.  The first argument of the macro is the
-// test case name; the rest are the names of the tests in this test
+// test suite name; the rest are the names of the tests in this test
 // case.
-REGISTER_TYPED_TEST_CASE_P(FooTest,
-                           DoesBlah, HasPropertyA);
+REGISTER_TYPED_TEST_SUITE_P(FooTest,
+                            DoesBlah, HasPropertyA);
 
 // Finally, you are free to instantiate the pattern with the types you
 // want.  If you put the above code in a header file, you can #include
@@ -17361,144 +9423,192 @@ REGISTER_TYPED_TEST_CASE_P(FooTest,
 //
 // To distinguish different instances of the pattern, the first
 // argument to the INSTANTIATE_* macro is a prefix that will be added
-// to the actual test case name.  Remember to pick unique prefixes for
+// to the actual test suite name.  Remember to pick unique prefixes for
 // different instances.
 typedef testing::Types<char, int, unsigned int> MyTypes;
-INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);
+INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
 
 // If the type list contains only one type, you can write that type
 // directly without Types<...>:
-//   INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);
+//   INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, int);
+//
+// Similar to the optional argument of TYPED_TEST_SUITE above,
+// INSTANTIATE_TEST_SUITE_P takes an optional fourth argument which allows to
+// generate custom names.
+//   INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes, MyTypeNames);
 
 #endif  // 0
 
 
 // Implements typed tests.
 
-#if GTEST_HAS_TYPED_TEST
-
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
 //
 // Expands to the name of the typedef for the type parameters of the
-// given test case.
-# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_
-
-// The 'Types' template argument below must have spaces around it
-// since some compilers may choke on '>>' when passing a template
-// instance (e.g. Types<int>)
-# define TYPED_TEST_CASE(CaseName, Types) \
-  typedef ::testing::internal::TypeList< Types >::type \
-      GTEST_TYPE_PARAMS_(CaseName)
-
-# define TYPED_TEST(CaseName, TestName) \
-  template <typename gtest_TypeParam_> \
-  class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
-      : public CaseName<gtest_TypeParam_> { \
-   private: \
-    typedef CaseName<gtest_TypeParam_> TestFixture; \
-    typedef gtest_TypeParam_ TypeParam; \
-    virtual void TestBody(); \
-  }; \
-  bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \
-      ::testing::internal::TypeParameterizedTest< \
-          CaseName, \
-          ::testing::internal::TemplateSel< \
-              GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \
-          GTEST_TYPE_PARAMS_(CaseName)>::Register(\
-              "", #CaseName, #TestName, 0); \
-  template <typename gtest_TypeParam_> \
-  void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody()
-
-#endif  // GTEST_HAS_TYPED_TEST
+// given test suite.
+#define GTEST_TYPE_PARAMS_(TestSuiteName) gtest_type_params_##TestSuiteName##_
+
+// Expands to the name of the typedef for the NameGenerator, responsible for
+// creating the suffixes of the name.
+#define GTEST_NAME_GENERATOR_(TestSuiteName) \
+  gtest_type_params_##TestSuiteName##_NameGenerator
+
+#define TYPED_TEST_SUITE(CaseName, Types, ...)                          \
+  typedef ::testing::internal::GenerateTypeList<Types>::type            \
+      GTEST_TYPE_PARAMS_(CaseName);                                     \
+  typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type \
+      GTEST_NAME_GENERATOR_(CaseName)
+
+#define TYPED_TEST(CaseName, TestName)                                        \
+  static_assert(sizeof(GTEST_STRINGIFY_(TestName)) > 1,                       \
+                "test-name must not be empty");                               \
+  template <typename gtest_TypeParam_>                                        \
+  class GTEST_TEST_CLASS_NAME_(CaseName, TestName)                            \
+      : public CaseName<gtest_TypeParam_> {                                   \
+   private:                                                                   \
+    typedef CaseName<gtest_TypeParam_> TestFixture;                           \
+    typedef gtest_TypeParam_ TypeParam;                                       \
+    void TestBody() override;                                                 \
+  };                                                                          \
+  static bool gtest_##CaseName##_##TestName##_registered_                     \
+      GTEST_ATTRIBUTE_UNUSED_ = ::testing::internal::TypeParameterizedTest<   \
+          CaseName,                                                           \
+          ::testing::internal::TemplateSel<GTEST_TEST_CLASS_NAME_(CaseName,   \
+                                                                  TestName)>, \
+          GTEST_TYPE_PARAMS_(                                                 \
+              CaseName)>::Register("",                                        \
+                                   ::testing::internal::CodeLocation(         \
+                                       __FILE__, __LINE__),                   \
+                                   GTEST_STRINGIFY_(CaseName),                \
+                                   GTEST_STRINGIFY_(TestName), 0,             \
+                                   ::testing::internal::GenerateNames<        \
+                                       GTEST_NAME_GENERATOR_(CaseName),       \
+                                       GTEST_TYPE_PARAMS_(CaseName)>());      \
+  template <typename gtest_TypeParam_>                                        \
+  void GTEST_TEST_CLASS_NAME_(CaseName,                                       \
+                              TestName)<gtest_TypeParam_>::TestBody()
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define TYPED_TEST_CASE                                                \
+  static_assert(::testing::internal::TypedTestCaseIsDeprecated(), ""); \
+  TYPED_TEST_SUITE
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 // Implements type-parameterized tests.
 
-#if GTEST_HAS_TYPED_TEST_P
-
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
 //
 // Expands to the namespace name that the type-parameterized tests for
-// the given type-parameterized test case are defined in.  The exact
+// the given type-parameterized test suite are defined in.  The exact
 // name of the namespace is subject to change without notice.
-# define GTEST_CASE_NAMESPACE_(TestCaseName) \
-  gtest_case_##TestCaseName##_
+#define GTEST_SUITE_NAMESPACE_(TestSuiteName) gtest_suite_##TestSuiteName##_
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
 //
 // Expands to the name of the variable used to remember the names of
-// the defined tests in the given test case.
-# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
-  gtest_typed_test_case_p_state_##TestCaseName##_
+// the defined tests in the given test suite.
+#define GTEST_TYPED_TEST_SUITE_P_STATE_(TestSuiteName) \
+  gtest_typed_test_suite_p_state_##TestSuiteName##_
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
 //
 // Expands to the name of the variable used to remember the names of
-// the registered tests in the given test case.
-# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
-  gtest_registered_test_names_##TestCaseName##_
+// the registered tests in the given test suite.
+#define GTEST_REGISTERED_TEST_NAMES_(TestSuiteName) \
+  gtest_registered_test_names_##TestSuiteName##_
 
 // The variables defined in the type-parameterized test macros are
 // static as typically these macros are used in a .h file that can be
 // #included in multiple translation units linked together.
-# define TYPED_TEST_CASE_P(CaseName) \
-  static ::testing::internal::TypedTestCasePState \
-      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)
-
-# define TYPED_TEST_P(CaseName, TestName) \
-  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
-  template <typename gtest_TypeParam_> \
-  class TestName : public CaseName<gtest_TypeParam_> { \
-   private: \
-    typedef CaseName<gtest_TypeParam_> TestFixture; \
-    typedef gtest_TypeParam_ TypeParam; \
-    virtual void TestBody(); \
-  }; \
-  static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
-      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
-          __FILE__, __LINE__, #CaseName, #TestName); \
-  } \
-  template <typename gtest_TypeParam_> \
-  void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()
-
-# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
-  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
-  typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
-  } \
-  static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \
-      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\
-          __FILE__, __LINE__, #__VA_ARGS__)
-
-// The 'Types' template argument below must have spaces around it
-// since some compilers may choke on '>>' when passing a template
-// instance (e.g. Types<int>)
-# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \
-  bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \
-      ::testing::internal::TypeParameterizedTestCase<CaseName, \
-          GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \
-          ::testing::internal::TypeList< Types >::type>::Register(\
-              #Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName))
-
-#endif  // GTEST_HAS_TYPED_TEST_P
-
-#endif  // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
-
-// Depending on the platform, different string classes are available.
-// On Linux, in addition to ::std::string, Google also makes use of
-// class ::string, which has the same interface as ::std::string, but
-// has a different implementation.
-//
-// The user can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that
-// ::string is available AND is a distinct type to ::std::string, or
-// define it to 0 to indicate otherwise.
-//
-// If the user's ::std::string and ::string are the same class due to
-// aliasing, he should define GTEST_HAS_GLOBAL_STRING to 0.
-//
-// If the user doesn't define GTEST_HAS_GLOBAL_STRING, it is defined
-// heuristically.
+#define TYPED_TEST_SUITE_P(SuiteName)              \
+  static ::testing::internal::TypedTestSuitePState \
+      GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName)
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define TYPED_TEST_CASE_P                                                 \
+  static_assert(::testing::internal::TypedTestCase_P_IsDeprecated(), ""); \
+  TYPED_TEST_SUITE_P
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+#define TYPED_TEST_P(SuiteName, TestName)                             \
+  namespace GTEST_SUITE_NAMESPACE_(SuiteName) {                       \
+    template <typename gtest_TypeParam_>                              \
+    class TestName : public SuiteName<gtest_TypeParam_> {             \
+     private:                                                         \
+      typedef SuiteName<gtest_TypeParam_> TestFixture;                \
+      typedef gtest_TypeParam_ TypeParam;                             \
+      void TestBody() override;                                       \
+    };                                                                \
+    static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
+        GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).AddTestName(       \
+            __FILE__, __LINE__, GTEST_STRINGIFY_(SuiteName),          \
+            GTEST_STRINGIFY_(TestName));                              \
+  }                                                                   \
+  template <typename gtest_TypeParam_>                                \
+  void GTEST_SUITE_NAMESPACE_(                                        \
+      SuiteName)::TestName<gtest_TypeParam_>::TestBody()
+
+// Note: this won't work correctly if the trailing arguments are macros.
+#define REGISTER_TYPED_TEST_SUITE_P(SuiteName, ...)                         \
+  namespace GTEST_SUITE_NAMESPACE_(SuiteName) {                             \
+    typedef ::testing::internal::Templates<__VA_ARGS__> gtest_AllTests_;    \
+  }                                                                         \
+  static const char* const GTEST_REGISTERED_TEST_NAMES_(                    \
+      SuiteName) GTEST_ATTRIBUTE_UNUSED_ =                                  \
+      GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).VerifyRegisteredTestNames( \
+          GTEST_STRINGIFY_(SuiteName), __FILE__, __LINE__, #__VA_ARGS__)
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define REGISTER_TYPED_TEST_CASE_P                                           \
+  static_assert(::testing::internal::RegisterTypedTestCase_P_IsDeprecated(), \
+                "");                                                         \
+  REGISTER_TYPED_TEST_SUITE_P
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+#define INSTANTIATE_TYPED_TEST_SUITE_P(Prefix, SuiteName, Types, ...)       \
+  static_assert(sizeof(GTEST_STRINGIFY_(Prefix)) > 1,                       \
+                "test-suit-prefix must not be empty");                      \
+  static bool gtest_##Prefix##_##SuiteName GTEST_ATTRIBUTE_UNUSED_ =        \
+      ::testing::internal::TypeParameterizedTestSuite<                      \
+          SuiteName, GTEST_SUITE_NAMESPACE_(SuiteName)::gtest_AllTests_,    \
+          ::testing::internal::GenerateTypeList<Types>::type>::             \
+          Register(GTEST_STRINGIFY_(Prefix),                                \
+                   ::testing::internal::CodeLocation(__FILE__, __LINE__),   \
+                   &GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName),             \
+                   GTEST_STRINGIFY_(SuiteName),                             \
+                   GTEST_REGISTERED_TEST_NAMES_(SuiteName),                 \
+                   ::testing::internal::GenerateNames<                      \
+                       ::testing::internal::NameGeneratorSelector<          \
+                           __VA_ARGS__>::type,                              \
+                       ::testing::internal::GenerateTypeList<Types>::type>())
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define INSTANTIATE_TYPED_TEST_CASE_P                                      \
+  static_assert(                                                           \
+      ::testing::internal::InstantiateTypedTestCase_P_IsDeprecated(), ""); \
+  INSTANTIATE_TYPED_TEST_SUITE_P
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
 
 namespace testing {
 
+// Silence C4100 (unreferenced formal parameter) and 4805
+// unsafe mix of type 'const int' and type 'const bool'
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable:4805)
+# pragma warning(disable:4100)
+#endif
+
+
 // Declares the flags.
 
 // This flag temporary enables the disabled tests.
@@ -17516,10 +9626,18 @@ GTEST_DECLARE_bool_(catch_exceptions);
 // to let Google Test decide.
 GTEST_DECLARE_string_(color);
 
+// This flag controls whether the test runner should continue execution past
+// first failure.
+GTEST_DECLARE_bool_(fail_fast);
+
 // This flag sets up the filter to select by name using a glob pattern
 // the tests to run. If the filter is not given all tests are executed.
 GTEST_DECLARE_string_(filter);
 
+// This flag controls whether Google Test installs a signal handler that dumps
+// debugging information when fatal signals are raised.
+GTEST_DECLARE_bool_(install_failure_signal_handler);
+
 // This flag causes the Google Test to list tests. None of the tests listed
 // are actually run if the flag is provided.
 GTEST_DECLARE_bool_(list_tests);
@@ -17528,10 +9646,16 @@ GTEST_DECLARE_bool_(list_tests);
 // in addition to its normal textual output.
 GTEST_DECLARE_string_(output);
 
+// This flags control whether Google Test prints only test failures.
+GTEST_DECLARE_bool_(brief);
+
 // This flags control whether Google Test prints the elapsed time for each
 // test.
 GTEST_DECLARE_bool_(print_time);
 
+// This flags control whether Google Test prints UTF8 characters as text.
+GTEST_DECLARE_bool_(print_utf8);
+
 // This flag specifies the random number seed.
 GTEST_DECLARE_int32_(random_seed);
 
@@ -17552,7 +9676,7 @@ GTEST_DECLARE_int32_(stack_trace_depth);
 
 // When this flag is specified, a failed assertion will throw an
 // exception if exceptions are enabled, or exit the program with a
-// non-zero code otherwise.
+// non-zero code otherwise. For use with an external test framework.
 GTEST_DECLARE_bool_(throw_on_failure);
 
 // When this flag is set with a "host:port" string, on supported
@@ -17560,6 +9684,10 @@ GTEST_DECLARE_bool_(throw_on_failure);
 // the specified host machine.
 GTEST_DECLARE_string_(stream_result_to);
 
+#if GTEST_USE_OWN_FLAGFILE_FLAG_
+GTEST_DECLARE_string_(flagfile);
+#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
+
 // The upper limit for valid stack trace depths.
 const int kMaxStackTraceDepth = 100;
 
@@ -17577,9 +9705,11 @@ class TestEventListenersAccessor;
 class TestEventRepeater;
 class UnitTestRecordPropertyTestHelper;
 class WindowsDeathTest;
+class FuchsiaDeathTest;
 class UnitTestImpl* GetUnitTestImpl();
 void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
                                     const std::string& message);
+std::set<std::string>* GetIgnoredParameterizedTestSuites();
 
 }  // namespace internal
 
@@ -17587,7 +9717,12 @@ void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
 // If we don't forward declare them the compiler might confuse the classes
 // in friendship clauses with same named classes on the scope.
 class Test;
-class TestCase;
+class TestSuite;
+
+// Old API is still available but deprecated
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+using TestCase = TestSuite;
+#endif
 class TestInfo;
 class UnitTest;
 
@@ -17675,2041 +9810,2080 @@ class GTEST_API_ AssertionResult {
   // Copy constructor.
   // Used in EXPECT_TRUE/FALSE(assertion_result).
   AssertionResult(const AssertionResult& other);
-  // Used in the EXPECT_TRUE/FALSE(bool_expression).
-  explicit AssertionResult(bool success) : success_(success) {}
-
-  // Returns true iff the assertion succeeded.
-  operator bool() const { return success_; }  // NOLINT
-
-  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
-  AssertionResult operator!() const;
-
-  // Returns the text streamed into this AssertionResult. Test assertions
-  // use it when they fail (i.e., the predicate's outcome doesn't match the
-  // assertion's expectation). When nothing has been streamed into the
-  // object, returns an empty string.
-  const char* message() const {
-    return message_.get() != NULL ?  message_->c_str() : "";
-  }
-  // TODO(vladl@google.com): Remove this after making sure no clients use it.
-  // Deprecated; please use message() instead.
-  const char* failure_message() const { return message(); }
-
-  // Streams a custom failure message into this object.
-  template <typename T> AssertionResult& operator<<(const T& value) {
-    AppendMessage(Message() << value);
-    return *this;
-  }
-
-  // Allows streaming basic output manipulators such as endl or flush into
-  // this object.
-  AssertionResult& operator<<(
-      ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
-    AppendMessage(Message() << basic_manipulator);
-    return *this;
-  }
-
- private:
-  // Appends the contents of message to message_.
-  void AppendMessage(const Message& a_message) {
-    if (message_.get() == NULL)
-      message_.reset(new ::std::string);
-    message_->append(a_message.GetString().c_str());
-  }
-
-  // Stores result of the assertion predicate.
-  bool success_;
-  // Stores the message describing the condition in case the expectation
-  // construct is not satisfied with the predicate's outcome.
-  // Referenced via a pointer to avoid taking too much stack frame space
-  // with test assertions.
-  internal::scoped_ptr< ::std::string> message_;
-
-  GTEST_DISALLOW_ASSIGN_(AssertionResult);
-};
-
-// Makes a successful assertion result.
-GTEST_API_ AssertionResult AssertionSuccess();
-
-// Makes a failed assertion result.
-GTEST_API_ AssertionResult AssertionFailure();
-
-// Makes a failed assertion result with the given failure message.
-// Deprecated; use AssertionFailure() << msg.
-GTEST_API_ AssertionResult AssertionFailure(const Message& msg);
-
-// The abstract class that all tests inherit from.
-//
-// In Google Test, a unit test program contains one or many TestCases, and
-// each TestCase contains one or many Tests.
-//
-// When you define a test using the TEST macro, you don't need to
-// explicitly derive from Test - the TEST macro automatically does
-// this for you.
-//
-// The only time you derive from Test is when defining a test fixture
-// to be used a TEST_F.  For example:
-//
-//   class FooTest : public testing::Test {
-//    protected:
-//     virtual void SetUp() { ... }
-//     virtual void TearDown() { ... }
-//     ...
-//   };
-//
-//   TEST_F(FooTest, Bar) { ... }
-//   TEST_F(FooTest, Baz) { ... }
-//
-// Test is not copyable.
-class GTEST_API_ Test {
- public:
-  friend class TestInfo;
-
-  // Defines types for pointers to functions that set up and tear down
-  // a test case.
-  typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc;
-  typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc;
-
-  // The d'tor is virtual as we intend to inherit from Test.
-  virtual ~Test();
-
-  // Sets up the stuff shared by all tests in this test case.
-  //
-  // Google Test will call Foo::SetUpTestCase() before running the first
-  // test in test case Foo.  Hence a sub-class can define its own
-  // SetUpTestCase() method to shadow the one defined in the super
-  // class.
-  static void SetUpTestCase() {}
-
-  // Tears down the stuff shared by all tests in this test case.
-  //
-  // Google Test will call Foo::TearDownTestCase() after running the last
-  // test in test case Foo.  Hence a sub-class can define its own
-  // TearDownTestCase() method to shadow the one defined in the super
-  // class.
-  static void TearDownTestCase() {}
-
-  // Returns true iff the current test has a fatal failure.
-  static bool HasFatalFailure();
-
-  // Returns true iff the current test has a non-fatal failure.
-  static bool HasNonfatalFailure();
-
-  // Returns true iff the current test has a (either fatal or
-  // non-fatal) failure.
-  static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
-
-  // Logs a property for the current test, test case, or for the entire
-  // invocation of the test program when used outside of the context of a
-  // test case.  Only the last value for a given key is remembered.  These
-  // are public static so they can be called from utility functions that are
-  // not members of the test fixture.  Calls to RecordProperty made during
-  // lifespan of the test (from the moment its constructor starts to the
-  // moment its destructor finishes) will be output in XML as attributes of
-  // the <testcase> element.  Properties recorded from fixture's
-  // SetUpTestCase or TearDownTestCase are logged as attributes of the
-  // corresponding <testsuite> element.  Calls to RecordProperty made in the
-  // global context (before or after invocation of RUN_ALL_TESTS and from
-  // SetUp/TearDown method of Environment objects registered with Google
-  // Test) will be output as attributes of the <testsuites> element.
-  static void RecordProperty(const std::string& key, const std::string& value);
-  static void RecordProperty(const std::string& key, int value);
-
- protected:
-  // Creates a Test object.
-  Test();
-
-  // Sets up the test fixture.
-  virtual void SetUp();
 
-  // Tears down the test fixture.
-  virtual void TearDown();
-
- private:
-  // Returns true iff the current test has the same fixture class as
-  // the first test in the current test case.
-  static bool HasSameFixtureClass();
+// C4800 is a level 3 warning in Visual Studio 2015 and earlier.
+// This warning is not emitted in Visual Studio 2017.
+// This warning is off by default starting in Visual Studio 2019 but can be
+// enabled with command-line options.
+#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
+  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */)
+#endif
 
-  // Runs the test after the test fixture has been set up.
+  // Used in the EXPECT_TRUE/FALSE(bool_expression).
   //
-  // A sub-class must implement this to define the test logic.
+  // T must be contextually convertible to bool.
   //
-  // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
-  // Instead, use the TEST or TEST_F macro.
-  virtual void TestBody() = 0;
-
-  // Sets up, executes, and tears down the test.
-  void Run();
-
-  // Deletes self.  We deliberately pick an unusual name for this
-  // internal method to avoid clashing with names used in user TESTs.
-  void DeleteSelf_() { delete this; }
-
-  // Uses a GTestFlagSaver to save and restore all Google Test flags.
-  const internal::GTestFlagSaver* const gtest_flag_saver_;
+  // The second parameter prevents this overload from being considered if
+  // the argument is implicitly convertible to AssertionResult. In that case
+  // we want AssertionResult's copy constructor to be used.
+  template <typename T>
+  explicit AssertionResult(
+      const T& success,
+      typename std::enable_if<
+          !std::is_convertible<T, AssertionResult>::value>::type*
+      /*enabler*/
+      = nullptr)
+      : success_(success) {}
+
+#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
+  GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif
 
-  // Often a user mis-spells SetUp() as Setup() and spends a long time
-  // wondering why it is never called by Google Test.  The declaration of
-  // the following method is solely for catching such an error at
-  // compile time:
-  //
-  //   - The return type is deliberately chosen to be not void, so it
-  //   will be a conflict if a user declares void Setup() in his test
-  //   fixture.
-  //
-  //   - This method is private, so it will be another compiler error
-  //   if a user calls it from his test fixture.
-  //
-  // DO NOT OVERRIDE THIS FUNCTION.
-  //
-  // If you see an error about overriding the following function or
-  // about it being private, you have mis-spelled SetUp() as Setup().
-  struct Setup_should_be_spelled_SetUp {};
-  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
+  // Assignment operator.
+  AssertionResult& operator=(AssertionResult other) {
+    swap(other);
+    return *this;
+  }
 
-  // We disallow copying Tests.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
-};
+  // Returns true if and only if the assertion succeeded.
+  operator bool() const { return success_; }  // NOLINT
 
-typedef internal::TimeInMillis TimeInMillis;
+  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+  AssertionResult operator!() const;
 
-// A copyable object representing a user specified test property which can be
-// output as a key/value string pair.
-//
-// Don't inherit from TestProperty as its destructor is not virtual.
-class TestProperty {
- public:
-  // C'tor.  TestProperty does NOT have a default constructor.
-  // Always use this constructor (with parameters) to create a
-  // TestProperty object.
-  TestProperty(const std::string& a_key, const std::string& a_value) :
-    key_(a_key), value_(a_value) {
+  // Returns the text streamed into this AssertionResult. Test assertions
+  // use it when they fail (i.e., the predicate's outcome doesn't match the
+  // assertion's expectation). When nothing has been streamed into the
+  // object, returns an empty string.
+  const char* message() const {
+    return message_.get() != nullptr ? message_->c_str() : "";
   }
+  // Deprecated; please use message() instead.
+  const char* failure_message() const { return message(); }
 
-  // Gets the user supplied key.
-  const char* key() const {
-    return key_.c_str();
+  // Streams a custom failure message into this object.
+  template <typename T> AssertionResult& operator<<(const T& value) {
+    AppendMessage(Message() << value);
+    return *this;
   }
 
-  // Gets the user supplied value.
-  const char* value() const {
-    return value_.c_str();
+  // Allows streaming basic output manipulators such as endl or flush into
+  // this object.
+  AssertionResult& operator<<(
+      ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
+    AppendMessage(Message() << basic_manipulator);
+    return *this;
   }
 
-  // Sets a new value, overriding the one supplied in the constructor.
-  void SetValue(const std::string& new_value) {
-    value_ = new_value;
+ private:
+  // Appends the contents of message to message_.
+  void AppendMessage(const Message& a_message) {
+    if (message_.get() == nullptr) message_.reset(new ::std::string);
+    message_->append(a_message.GetString().c_str());
   }
 
- private:
-  // The key supplied by the user.
-  std::string key_;
-  // The value supplied by the user.
-  std::string value_;
+  // Swap the contents of this AssertionResult with other.
+  void swap(AssertionResult& other);
+
+  // Stores result of the assertion predicate.
+  bool success_;
+  // Stores the message describing the condition in case the expectation
+  // construct is not satisfied with the predicate's outcome.
+  // Referenced via a pointer to avoid taking too much stack frame space
+  // with test assertions.
+  std::unique_ptr< ::std::string> message_;
 };
 
-// The result of a single Test.  This includes a list of
-// TestPartResults, a list of TestProperties, a count of how many
-// death tests there are in the Test, and how much time it took to run
-// the Test.
-//
-// TestResult is not copyable.
-class GTEST_API_ TestResult {
- public:
-  // Creates an empty TestResult.
-  TestResult();
+// Makes a successful assertion result.
+GTEST_API_ AssertionResult AssertionSuccess();
 
-  // D'tor.  Do not inherit from TestResult.
-  ~TestResult();
+// Makes a failed assertion result.
+GTEST_API_ AssertionResult AssertionFailure();
 
-  // Gets the number of all test parts.  This is the sum of the number
-  // of successful test parts and the number of failed test parts.
-  int total_part_count() const;
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << msg.
+GTEST_API_ AssertionResult AssertionFailure(const Message& msg);
 
-  // Returns the number of the test properties.
-  int test_property_count() const;
+}  // namespace testing
 
-  // Returns true iff the test passed (i.e. no test part failed).
-  bool Passed() const { return !Failed(); }
+// Includes the auto-generated header that implements a family of generic
+// predicate assertion macros. This include comes late because it relies on
+// APIs declared above.
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-  // Returns true iff the test failed.
-  bool Failed() const;
+// This file is AUTOMATICALLY GENERATED on 01/02/2019 by command
+// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
+//
+// Implements a family of generic predicate assertion macros.
+// GOOGLETEST_CM0001 DO NOT DELETE
 
-  // Returns true iff the test fatally failed.
-  bool HasFatalFailure() const;
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
 
-  // Returns true iff the test has a non-fatal failure.
-  bool HasNonfatalFailure() const;
 
-  // Returns the elapsed time, in milliseconds.
-  TimeInMillis elapsed_time() const { return elapsed_time_; }
+namespace testing {
 
-  // Returns the i-th test part result among all the results. i can range
-  // from 0 to test_property_count() - 1. If i is not in that range, aborts
-  // the program.
-  const TestPartResult& GetTestPartResult(int i) const;
+// This header implements a family of generic predicate assertion
+// macros:
+//
+//   ASSERT_PRED_FORMAT1(pred_format, v1)
+//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
+//   ...
+//
+// where pred_format is a function or functor that takes n (in the
+// case of ASSERT_PRED_FORMATn) values and their source expression
+// text, and returns a testing::AssertionResult.  See the definition
+// of ASSERT_EQ in gtest.h for an example.
+//
+// If you don't care about formatting, you can use the more
+// restrictive version:
+//
+//   ASSERT_PRED1(pred, v1)
+//   ASSERT_PRED2(pred, v1, v2)
+//   ...
+//
+// where pred is an n-ary function or functor that returns bool,
+// and the values v1, v2, ..., must support the << operator for
+// streaming to std::ostream.
+//
+// We also define the EXPECT_* variations.
+//
+// For now we only support predicates whose arity is at most 5.
+// Please email googletestframework@googlegroups.com if you need
+// support for higher arities.
 
-  // Returns the i-th test property. i can range from 0 to
-  // test_property_count() - 1. If i is not in that range, aborts the
-  // program.
-  const TestProperty& GetTestProperty(int i) const;
+// GTEST_ASSERT_ is the basic statement to which all of the assertions
+// in this file reduce.  Don't use this in your code.
 
- private:
-  friend class TestInfo;
-  friend class TestCase;
-  friend class UnitTest;
-  friend class internal::DefaultGlobalTestPartResultReporter;
-  friend class internal::ExecDeathTest;
-  friend class internal::TestResultAccessor;
-  friend class internal::UnitTestImpl;
-  friend class internal::WindowsDeathTest;
+#define GTEST_ASSERT_(expression, on_failure) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const ::testing::AssertionResult gtest_ar = (expression)) \
+    ; \
+  else \
+    on_failure(gtest_ar.failure_message())
 
-  // Gets the vector of TestPartResults.
-  const std::vector<TestPartResult>& test_part_results() const {
-    return test_part_results_;
-  }
 
-  // Gets the vector of TestProperties.
-  const std::vector<TestProperty>& test_properties() const {
-    return test_properties_;
-  }
+// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1>
+AssertionResult AssertPred1Helper(const char* pred_text,
+                                  const char* e1,
+                                  Pred pred,
+                                  const T1& v1) {
+  if (pred(v1)) return AssertionSuccess();
 
-  // Sets the elapsed time.
-  void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1);
+}
 
-  // Adds a test property to the list. The property is validated and may add
-  // a non-fatal failure if invalid (e.g., if it conflicts with reserved
-  // key names). If a property is already recorded for the same key, the
-  // value will be updated, rather than storing multiple values for the same
-  // key.  xml_element specifies the element for which the property is being
-  // recorded and is used for validation.
-  void RecordProperty(const std::string& xml_element,
-                      const TestProperty& test_property);
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, v1), \
+                on_failure)
 
-  // Adds a failure if the key is a reserved attribute of Google Test
-  // testcase tags.  Returns true if the property is valid.
-  // TODO(russr): Validate attribute names are legal and human readable.
-  static bool ValidateTestProperty(const std::string& xml_element,
-                                   const TestProperty& test_property);
+// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+#define GTEST_PRED1_(pred, v1, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
+                                             #v1, \
+                                             pred, \
+                                             v1), on_failure)
 
-  // Adds a test part result to the list.
-  void AddTestPartResult(const TestPartResult& test_part_result);
+// Unary predicate assertion macros.
+#define EXPECT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
 
-  // Returns the death test count.
-  int death_test_count() const { return death_test_count_; }
 
-  // Increments the death test count, returning the new count.
-  int increment_death_test_count() { return ++death_test_count_; }
 
-  // Clears the test part results.
-  void ClearTestPartResults();
+// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2>
+AssertionResult AssertPred2Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2) {
+  if (pred(v1, v2)) return AssertionSuccess();
+
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ", " << e2
+         << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+         << e2 << " evaluates to " << ::testing::PrintToString(v2);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+#define GTEST_PRED2_(pred, v1, v2, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             pred, \
+                                             v1, \
+                                             v2), on_failure)
 
-  // Clears the object.
-  void Clear();
+// Binary predicate assertion macros.
+#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
 
-  // Protects mutable state of the property vector and of owned
-  // properties, whose values may be updated.
-  internal::Mutex test_properites_mutex_;
 
-  // The vector of TestPartResults
-  std::vector<TestPartResult> test_part_results_;
-  // The vector of TestProperties
-  std::vector<TestProperty> test_properties_;
-  // Running count of death tests.
-  int death_test_count_;
-  // The elapsed time, in milliseconds.
-  TimeInMillis elapsed_time_;
 
-  // We disallow copying TestResult.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
-};  // class TestResult
+// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3>
+AssertionResult AssertPred3Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3) {
+  if (pred(v1, v2, v3)) return AssertionSuccess();
 
-// A TestInfo object stores the following information about a test:
-//
-//   Test case name
-//   Test name
-//   Whether the test should be run
-//   A function pointer that creates the test object when invoked
-//   Test result
-//
-// The constructor of TestInfo registers itself with the UnitTest
-// singleton such that the RUN_ALL_TESTS() macro knows which tests to
-// run.
-class GTEST_API_ TestInfo {
- public:
-  // Destructs a TestInfo object.  This function is not virtual, so
-  // don't inherit from TestInfo.
-  ~TestInfo();
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ", " << e2 << ", " << e3
+         << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+         << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
+         << e3 << " evaluates to " << ::testing::PrintToString(v3);
+}
 
-  // Returns the test case name.
-  const char* test_case_name() const { return test_case_name_.c_str(); }
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
+                on_failure)
 
-  // Returns the test name.
-  const char* name() const { return name_.c_str(); }
+// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3), on_failure)
 
-  // Returns the name of the parameter type, or NULL if this is not a typed
-  // or a type-parameterized test.
-  const char* type_param() const {
-    if (type_param_.get() != NULL)
-      return type_param_->c_str();
-    return NULL;
-  }
+// Ternary predicate assertion macros.
+#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
 
-  // Returns the text representation of the value parameter, or NULL if this
-  // is not a value-parameterized test.
-  const char* value_param() const {
-    if (value_param_.get() != NULL)
-      return value_param_->c_str();
-    return NULL;
-  }
 
-  // Returns true if this test should run, that is if the test is not
-  // disabled (or it is disabled but the also_run_disabled_tests flag has
-  // been specified) and its full name matches the user-specified filter.
-  //
-  // Google Test allows the user to filter the tests by their full names.
-  // The full name of a test Bar in test case Foo is defined as
-  // "Foo.Bar".  Only the tests that match the filter will run.
-  //
-  // A filter is a colon-separated list of glob (not regex) patterns,
-  // optionally followed by a '-' and a colon-separated list of
-  // negative patterns (tests to exclude).  A test is run if it
-  // matches one of the positive patterns and does not match any of
-  // the negative patterns.
-  //
-  // For example, *A*:Foo.* is a filter that matches any string that
-  // contains the character 'A' or starts with "Foo.".
-  bool should_run() const { return should_run_; }
 
-  // Returns true iff this test will appear in the XML report.
-  bool is_reportable() const {
-    // For now, the XML report includes all tests matching the filter.
-    // In the future, we may trim tests that are excluded because of
-    // sharding.
-    return matches_filter_;
-  }
+// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4>
+AssertionResult AssertPred4Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4) {
+  if (pred(v1, v2, v3, v4)) return AssertionSuccess();
 
-  // Returns the result of the test.
-  const TestResult* result() const { return &result_; }
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4
+         << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+         << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
+         << e3 << " evaluates to " << ::testing::PrintToString(v3) << "\n"
+         << e4 << " evaluates to " << ::testing::PrintToString(v4);
+}
 
- private:
-#if GTEST_HAS_DEATH_TEST
-  friend class internal::DefaultDeathTestFactory;
-#endif  // GTEST_HAS_DEATH_TEST
-  friend class Test;
-  friend class TestCase;
-  friend class internal::UnitTestImpl;
-  friend class internal::StreamingListenerTest;
-  friend TestInfo* internal::MakeAndRegisterTestInfo(
-      const char* test_case_name,
-      const char* name,
-      const char* type_param,
-      const char* value_param,
-      internal::TypeId fixture_class_id,
-      Test::SetUpTestCaseFunc set_up_tc,
-      Test::TearDownTestCaseFunc tear_down_tc,
-      internal::TestFactoryBase* factory);
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
+                on_failure)
 
-  // Constructs a TestInfo object. The newly constructed instance assumes
-  // ownership of the factory object.
-  TestInfo(const std::string& test_case_name,
-           const std::string& name,
-           const char* a_type_param,   // NULL if not a type-parameterized test
-           const char* a_value_param,  // NULL if not a value-parameterized test
-           internal::TypeId fixture_class_id,
-           internal::TestFactoryBase* factory);
+// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4), on_failure)
 
-  // Increments the number of death tests encountered in this test so
-  // far.
-  int increment_death_test_count() {
-    return result_.increment_death_test_count();
-  }
+// 4-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
 
-  // Creates the test object, runs it, records its result, and then
-  // deletes it.
-  void Run();
 
-  static void ClearTestResult(TestInfo* test_info) {
-    test_info->result_.Clear();
-  }
 
-  // These fields are immutable properties of the test.
-  const std::string test_case_name_;     // Test case name
-  const std::string name_;               // Test name
-  // Name of the parameter type, or NULL if this is not a typed or a
-  // type-parameterized test.
-  const internal::scoped_ptr<const ::std::string> type_param_;
-  // Text representation of the value parameter, or NULL if this is not a
-  // value-parameterized test.
-  const internal::scoped_ptr<const ::std::string> value_param_;
-  const internal::TypeId fixture_class_id_;   // ID of the test fixture class
-  bool should_run_;                 // True iff this test should run
-  bool is_disabled_;                // True iff this test is disabled
-  bool matches_filter_;             // True if this test matches the
-                                    // user-specified filter.
-  internal::TestFactoryBase* const factory_;  // The factory that creates
-                                              // the test object
+// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4,
+          typename T5>
+AssertionResult AssertPred5Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  const char* e5,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4,
+                                  const T5& v5) {
+  if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
 
-  // This field is mutable and needs to be reset before running the
-  // test for the second time.
-  TestResult result_;
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4
+         << ", " << e5 << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+         << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
+         << e3 << " evaluates to " << ::testing::PrintToString(v3) << "\n"
+         << e4 << " evaluates to " << ::testing::PrintToString(v4) << "\n"
+         << e5 << " evaluates to " << ::testing::PrintToString(v5);
+}
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
-};
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
+                on_failure)
 
-// A test case, which consists of a vector of TestInfos.
-//
-// TestCase is not copyable.
-class GTEST_API_ TestCase {
- public:
-  // Creates a TestCase with the given name.
-  //
-  // TestCase does NOT have a default constructor.  Always use this
-  // constructor to create a TestCase object.
-  //
-  // Arguments:
-  //
-  //   name:         name of the test case
-  //   a_type_param: the name of the test's type parameter, or NULL if
-  //                 this is not a type-parameterized test.
-  //   set_up_tc:    pointer to the function that sets up the test case
-  //   tear_down_tc: pointer to the function that tears down the test case
-  TestCase(const char* name, const char* a_type_param,
-           Test::SetUpTestCaseFunc set_up_tc,
-           Test::TearDownTestCaseFunc tear_down_tc);
+// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             #v5, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4, \
+                                             v5), on_failure)
 
-  // Destructor of TestCase.
-  virtual ~TestCase();
+// 5-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
 
-  // Gets the name of the TestCase.
-  const char* name() const { return name_.c_str(); }
 
-  // Returns the name of the parameter type, or NULL if this is not a
-  // type-parameterized test case.
-  const char* type_param() const {
-    if (type_param_.get() != NULL)
-      return type_param_->c_str();
-    return NULL;
-  }
 
-  // Returns true if any test in this test case should run.
-  bool should_run() const { return should_run_; }
+}  // namespace testing
 
-  // Gets the number of successful tests in this test case.
-  int successful_test_count() const;
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
 
-  // Gets the number of failed tests in this test case.
-  int failed_test_count() const;
+namespace testing {
 
-  // Gets the number of disabled tests that will be reported in the XML report.
-  int reportable_disabled_test_count() const;
+// The abstract class that all tests inherit from.
+//
+// In Google Test, a unit test program contains one or many TestSuites, and
+// each TestSuite contains one or many Tests.
+//
+// When you define a test using the TEST macro, you don't need to
+// explicitly derive from Test - the TEST macro automatically does
+// this for you.
+//
+// The only time you derive from Test is when defining a test fixture
+// to be used in a TEST_F.  For example:
+//
+//   class FooTest : public testing::Test {
+//    protected:
+//     void SetUp() override { ... }
+//     void TearDown() override { ... }
+//     ...
+//   };
+//
+//   TEST_F(FooTest, Bar) { ... }
+//   TEST_F(FooTest, Baz) { ... }
+//
+// Test is not copyable.
+class GTEST_API_ Test {
+ public:
+  friend class TestInfo;
 
-  // Gets the number of disabled tests in this test case.
-  int disabled_test_count() const;
+  // The d'tor is virtual as we intend to inherit from Test.
+  virtual ~Test();
 
-  // Gets the number of tests to be printed in the XML report.
-  int reportable_test_count() const;
+  // Sets up the stuff shared by all tests in this test suite.
+  //
+  // Google Test will call Foo::SetUpTestSuite() before running the first
+  // test in test suite Foo.  Hence a sub-class can define its own
+  // SetUpTestSuite() method to shadow the one defined in the super
+  // class.
+  static void SetUpTestSuite() {}
 
-  // Get the number of tests in this test case that should run.
-  int test_to_run_count() const;
+  // Tears down the stuff shared by all tests in this test suite.
+  //
+  // Google Test will call Foo::TearDownTestSuite() after running the last
+  // test in test suite Foo.  Hence a sub-class can define its own
+  // TearDownTestSuite() method to shadow the one defined in the super
+  // class.
+  static void TearDownTestSuite() {}
 
-  // Gets the number of all tests in this test case.
-  int total_test_count() const;
+  // Legacy API is deprecated but still available. Use SetUpTestSuite and
+  // TearDownTestSuite instead.
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  static void TearDownTestCase() {}
+  static void SetUpTestCase() {}
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-  // Returns true iff the test case passed.
-  bool Passed() const { return !Failed(); }
+  // Returns true if and only if the current test has a fatal failure.
+  static bool HasFatalFailure();
 
-  // Returns true iff the test case failed.
-  bool Failed() const { return failed_test_count() > 0; }
+  // Returns true if and only if the current test has a non-fatal failure.
+  static bool HasNonfatalFailure();
 
-  // Returns the elapsed time, in milliseconds.
-  TimeInMillis elapsed_time() const { return elapsed_time_; }
+  // Returns true if and only if the current test was skipped.
+  static bool IsSkipped();
 
-  // Returns the i-th test among all the tests. i can range from 0 to
-  // total_test_count() - 1. If i is not in that range, returns NULL.
-  const TestInfo* GetTestInfo(int i) const;
+  // Returns true if and only if the current test has a (either fatal or
+  // non-fatal) failure.
+  static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
 
-  // Returns the TestResult that holds test properties recorded during
-  // execution of SetUpTestCase and TearDownTestCase.
-  const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }
+  // Logs a property for the current test, test suite, or for the entire
+  // invocation of the test program when used outside of the context of a
+  // test suite.  Only the last value for a given key is remembered.  These
+  // are public static so they can be called from utility functions that are
+  // not members of the test fixture.  Calls to RecordProperty made during
+  // lifespan of the test (from the moment its constructor starts to the
+  // moment its destructor finishes) will be output in XML as attributes of
+  // the <testcase> element.  Properties recorded from fixture's
+  // SetUpTestSuite or TearDownTestSuite are logged as attributes of the
+  // corresponding <testsuite> element.  Calls to RecordProperty made in the
+  // global context (before or after invocation of RUN_ALL_TESTS and from
+  // SetUp/TearDown method of Environment objects registered with Google
+  // Test) will be output as attributes of the <testsuites> element.
+  static void RecordProperty(const std::string& key, const std::string& value);
+  static void RecordProperty(const std::string& key, int value);
 
- private:
-  friend class Test;
-  friend class internal::UnitTestImpl;
+ protected:
+  // Creates a Test object.
+  Test();
 
-  // Gets the (mutable) vector of TestInfos in this TestCase.
-  std::vector<TestInfo*>& test_info_list() { return test_info_list_; }
+  // Sets up the test fixture.
+  virtual void SetUp();
 
-  // Gets the (immutable) vector of TestInfos in this TestCase.
-  const std::vector<TestInfo*>& test_info_list() const {
-    return test_info_list_;
-  }
+  // Tears down the test fixture.
+  virtual void TearDown();
 
-  // Returns the i-th test among all the tests. i can range from 0 to
-  // total_test_count() - 1. If i is not in that range, returns NULL.
-  TestInfo* GetMutableTestInfo(int i);
+ private:
+  // Returns true if and only if the current test has the same fixture class
+  // as the first test in the current test suite.
+  static bool HasSameFixtureClass();
 
-  // Sets the should_run member.
-  void set_should_run(bool should) { should_run_ = should; }
+  // Runs the test after the test fixture has been set up.
+  //
+  // A sub-class must implement this to define the test logic.
+  //
+  // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
+  // Instead, use the TEST or TEST_F macro.
+  virtual void TestBody() = 0;
 
-  // Adds a TestInfo to this test case.  Will delete the TestInfo upon
-  // destruction of the TestCase object.
-  void AddTestInfo(TestInfo * test_info);
+  // Sets up, executes, and tears down the test.
+  void Run();
 
-  // Clears the results of all tests in this test case.
-  void ClearResult();
+  // Deletes self.  We deliberately pick an unusual name for this
+  // internal method to avoid clashing with names used in user TESTs.
+  void DeleteSelf_() { delete this; }
 
-  // Clears the results of all tests in the given test case.
-  static void ClearTestCaseResult(TestCase* test_case) {
-    test_case->ClearResult();
-  }
+  const std::unique_ptr<GTEST_FLAG_SAVER_> gtest_flag_saver_;
 
-  // Runs every test in this TestCase.
-  void Run();
+  // Often a user misspells SetUp() as Setup() and spends a long time
+  // wondering why it is never called by Google Test.  The declaration of
+  // the following method is solely for catching such an error at
+  // compile time:
+  //
+  //   - The return type is deliberately chosen to be not void, so it
+  //   will be a conflict if void Setup() is declared in the user's
+  //   test fixture.
+  //
+  //   - This method is private, so it will be another compiler error
+  //   if the method is called from the user's test fixture.
+  //
+  // DO NOT OVERRIDE THIS FUNCTION.
+  //
+  // If you see an error about overriding the following function or
+  // about it being private, you have mis-spelled SetUp() as Setup().
+  struct Setup_should_be_spelled_SetUp {};
+  virtual Setup_should_be_spelled_SetUp* Setup() { return nullptr; }
 
-  // Runs SetUpTestCase() for this TestCase.  This wrapper is needed
-  // for catching exceptions thrown from SetUpTestCase().
-  void RunSetUpTestCase() { (*set_up_tc_)(); }
+  // We disallow copying Tests.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
+};
 
-  // Runs TearDownTestCase() for this TestCase.  This wrapper is
-  // needed for catching exceptions thrown from TearDownTestCase().
-  void RunTearDownTestCase() { (*tear_down_tc_)(); }
+typedef internal::TimeInMillis TimeInMillis;
 
-  // Returns true iff test passed.
-  static bool TestPassed(const TestInfo* test_info) {
-    return test_info->should_run() && test_info->result()->Passed();
+// A copyable object representing a user specified test property which can be
+// output as a key/value string pair.
+//
+// Don't inherit from TestProperty as its destructor is not virtual.
+class TestProperty {
+ public:
+  // C'tor.  TestProperty does NOT have a default constructor.
+  // Always use this constructor (with parameters) to create a
+  // TestProperty object.
+  TestProperty(const std::string& a_key, const std::string& a_value) :
+    key_(a_key), value_(a_value) {
   }
 
-  // Returns true iff test failed.
-  static bool TestFailed(const TestInfo* test_info) {
-    return test_info->should_run() && test_info->result()->Failed();
+  // Gets the user supplied key.
+  const char* key() const {
+    return key_.c_str();
   }
 
-  // Returns true iff the test is disabled and will be reported in the XML
-  // report.
-  static bool TestReportableDisabled(const TestInfo* test_info) {
-    return test_info->is_reportable() && test_info->is_disabled_;
+  // Gets the user supplied value.
+  const char* value() const {
+    return value_.c_str();
   }
 
-  // Returns true iff test is disabled.
-  static bool TestDisabled(const TestInfo* test_info) {
-    return test_info->is_disabled_;
+  // Sets a new value, overriding the one supplied in the constructor.
+  void SetValue(const std::string& new_value) {
+    value_ = new_value;
   }
 
-  // Returns true iff this test will appear in the XML report.
-  static bool TestReportable(const TestInfo* test_info) {
-    return test_info->is_reportable();
-  }
+ private:
+  // The key supplied by the user.
+  std::string key_;
+  // The value supplied by the user.
+  std::string value_;
+};
 
-  // Returns true if the given test should run.
-  static bool ShouldRunTest(const TestInfo* test_info) {
-    return test_info->should_run();
-  }
+// The result of a single Test.  This includes a list of
+// TestPartResults, a list of TestProperties, a count of how many
+// death tests there are in the Test, and how much time it took to run
+// the Test.
+//
+// TestResult is not copyable.
+class GTEST_API_ TestResult {
+ public:
+  // Creates an empty TestResult.
+  TestResult();
 
-  // Shuffles the tests in this test case.
-  void ShuffleTests(internal::Random* random);
+  // D'tor.  Do not inherit from TestResult.
+  ~TestResult();
 
-  // Restores the test order to before the first shuffle.
-  void UnshuffleTests();
+  // Gets the number of all test parts.  This is the sum of the number
+  // of successful test parts and the number of failed test parts.
+  int total_part_count() const;
 
-  // Name of the test case.
-  std::string name_;
-  // Name of the parameter type, or NULL if this is not a typed or a
-  // type-parameterized test.
-  const internal::scoped_ptr<const ::std::string> type_param_;
-  // The vector of TestInfos in their original order.  It owns the
-  // elements in the vector.
-  std::vector<TestInfo*> test_info_list_;
-  // Provides a level of indirection for the test list to allow easy
-  // shuffling and restoring the test order.  The i-th element in this
-  // vector is the index of the i-th test in the shuffled test list.
-  std::vector<int> test_indices_;
-  // Pointer to the function that sets up the test case.
-  Test::SetUpTestCaseFunc set_up_tc_;
-  // Pointer to the function that tears down the test case.
-  Test::TearDownTestCaseFunc tear_down_tc_;
-  // True iff any test in this test case should run.
-  bool should_run_;
-  // Elapsed time, in milliseconds.
-  TimeInMillis elapsed_time_;
-  // Holds test properties recorded during execution of SetUpTestCase and
-  // TearDownTestCase.
-  TestResult ad_hoc_test_result_;
+  // Returns the number of the test properties.
+  int test_property_count() const;
 
-  // We disallow copying TestCases.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase);
-};
+  // Returns true if and only if the test passed (i.e. no test part failed).
+  bool Passed() const { return !Skipped() && !Failed(); }
 
-// An Environment object is capable of setting up and tearing down an
-// environment.  The user should subclass this to define his own
-// environment(s).
-//
-// An Environment object does the set-up and tear-down in virtual
-// methods SetUp() and TearDown() instead of the constructor and the
-// destructor, as:
-//
-//   1. You cannot safely throw from a destructor.  This is a problem
-//      as in some cases Google Test is used where exceptions are enabled, and
-//      we may want to implement ASSERT_* using exceptions where they are
-//      available.
-//   2. You cannot use ASSERT_* directly in a constructor or
-//      destructor.
-class Environment {
- public:
-  // The d'tor is virtual as we need to subclass Environment.
-  virtual ~Environment() {}
+  // Returns true if and only if the test was skipped.
+  bool Skipped() const;
 
-  // Override this to define how to set up the environment.
-  virtual void SetUp() {}
+  // Returns true if and only if the test failed.
+  bool Failed() const;
 
-  // Override this to define how to tear down the environment.
-  virtual void TearDown() {}
- private:
-  // If you see an error about overriding the following function or
-  // about it being private, you have mis-spelled SetUp() as Setup().
-  struct Setup_should_be_spelled_SetUp {};
-  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
-};
+  // Returns true if and only if the test fatally failed.
+  bool HasFatalFailure() const;
 
-// The interface for tracing execution of tests. The methods are organized in
-// the order the corresponding events are fired.
-class TestEventListener {
- public:
-  virtual ~TestEventListener() {}
+  // Returns true if and only if the test has a non-fatal failure.
+  bool HasNonfatalFailure() const;
+
+  // Returns the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+  // Gets the time of the test case start, in ms from the start of the
+  // UNIX epoch.
+  TimeInMillis start_timestamp() const { return start_timestamp_; }
+
+  // Returns the i-th test part result among all the results. i can range from 0
+  // to total_part_count() - 1. If i is not in that range, aborts the program.
+  const TestPartResult& GetTestPartResult(int i) const;
+
+  // Returns the i-th test property. i can range from 0 to
+  // test_property_count() - 1. If i is not in that range, aborts the
+  // program.
+  const TestProperty& GetTestProperty(int i) const;
+
+ private:
+  friend class TestInfo;
+  friend class TestSuite;
+  friend class UnitTest;
+  friend class internal::DefaultGlobalTestPartResultReporter;
+  friend class internal::ExecDeathTest;
+  friend class internal::TestResultAccessor;
+  friend class internal::UnitTestImpl;
+  friend class internal::WindowsDeathTest;
+  friend class internal::FuchsiaDeathTest;
 
-  // Fired before any test activity starts.
-  virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;
+  // Gets the vector of TestPartResults.
+  const std::vector<TestPartResult>& test_part_results() const {
+    return test_part_results_;
+  }
 
-  // Fired before each iteration of tests starts.  There may be more than
-  // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
-  // index, starting from 0.
-  virtual void OnTestIterationStart(const UnitTest& unit_test,
-                                    int iteration) = 0;
+  // Gets the vector of TestProperties.
+  const std::vector<TestProperty>& test_properties() const {
+    return test_properties_;
+  }
 
-  // Fired before environment set-up for each iteration of tests starts.
-  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;
+  // Sets the start time.
+  void set_start_timestamp(TimeInMillis start) { start_timestamp_ = start; }
 
-  // Fired after environment set-up for each iteration of tests ends.
-  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;
+  // Sets the elapsed time.
+  void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }
 
-  // Fired before the test case starts.
-  virtual void OnTestCaseStart(const TestCase& test_case) = 0;
+  // Adds a test property to the list. The property is validated and may add
+  // a non-fatal failure if invalid (e.g., if it conflicts with reserved
+  // key names). If a property is already recorded for the same key, the
+  // value will be updated, rather than storing multiple values for the same
+  // key.  xml_element specifies the element for which the property is being
+  // recorded and is used for validation.
+  void RecordProperty(const std::string& xml_element,
+                      const TestProperty& test_property);
 
-  // Fired before the test starts.
-  virtual void OnTestStart(const TestInfo& test_info) = 0;
+  // Adds a failure if the key is a reserved attribute of Google Test
+  // testsuite tags.  Returns true if the property is valid.
+  // FIXME: Validate attribute names are legal and human readable.
+  static bool ValidateTestProperty(const std::string& xml_element,
+                                   const TestProperty& test_property);
 
-  // Fired after a failed assertion or a SUCCEED() invocation.
-  virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;
+  // Adds a test part result to the list.
+  void AddTestPartResult(const TestPartResult& test_part_result);
 
-  // Fired after the test ends.
-  virtual void OnTestEnd(const TestInfo& test_info) = 0;
+  // Returns the death test count.
+  int death_test_count() const { return death_test_count_; }
 
-  // Fired after the test case ends.
-  virtual void OnTestCaseEnd(const TestCase& test_case) = 0;
+  // Increments the death test count, returning the new count.
+  int increment_death_test_count() { return ++death_test_count_; }
 
-  // Fired before environment tear-down for each iteration of tests starts.
-  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;
+  // Clears the test part results.
+  void ClearTestPartResults();
 
-  // Fired after environment tear-down for each iteration of tests ends.
-  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;
+  // Clears the object.
+  void Clear();
 
-  // Fired after each iteration of tests finishes.
-  virtual void OnTestIterationEnd(const UnitTest& unit_test,
-                                  int iteration) = 0;
+  // Protects mutable state of the property vector and of owned
+  // properties, whose values may be updated.
+  internal::Mutex test_properties_mutex_;
 
-  // Fired after all test activities have ended.
-  virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
-};
+  // The vector of TestPartResults
+  std::vector<TestPartResult> test_part_results_;
+  // The vector of TestProperties
+  std::vector<TestProperty> test_properties_;
+  // Running count of death tests.
+  int death_test_count_;
+  // The start time, in milliseconds since UNIX Epoch.
+  TimeInMillis start_timestamp_;
+  // The elapsed time, in milliseconds.
+  TimeInMillis elapsed_time_;
 
-// The convenience class for users who need to override just one or two
-// methods and are not concerned that a possible change to a signature of
-// the methods they override will not be caught during the build.  For
-// comments about each method please see the definition of TestEventListener
-// above.
-class EmptyTestEventListener : public TestEventListener {
- public:
-  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestIterationStart(const UnitTest& /*unit_test*/,
-                                    int /*iteration*/) {}
-  virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {}
-  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
-  virtual void OnTestStart(const TestInfo& /*test_info*/) {}
-  virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {}
-  virtual void OnTestEnd(const TestInfo& /*test_info*/) {}
-  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
-  virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {}
-  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/,
-                                  int /*iteration*/) {}
-  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
-};
+  // We disallow copying TestResult.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
+};  // class TestResult
 
-// TestEventListeners lets users add listeners to track events in Google Test.
-class GTEST_API_ TestEventListeners {
+// A TestInfo object stores the following information about a test:
+//
+//   Test suite name
+//   Test name
+//   Whether the test should be run
+//   A function pointer that creates the test object when invoked
+//   Test result
+//
+// The constructor of TestInfo registers itself with the UnitTest
+// singleton such that the RUN_ALL_TESTS() macro knows which tests to
+// run.
+class GTEST_API_ TestInfo {
  public:
-  TestEventListeners();
-  ~TestEventListeners();
+  // Destructs a TestInfo object.  This function is not virtual, so
+  // don't inherit from TestInfo.
+  ~TestInfo();
 
-  // Appends an event listener to the end of the list. Google Test assumes
-  // the ownership of the listener (i.e. it will delete the listener when
-  // the test program finishes).
-  void Append(TestEventListener* listener);
+  // Returns the test suite name.
+  const char* test_suite_name() const { return test_suite_name_.c_str(); }
 
-  // Removes the given event listener from the list and returns it.  It then
-  // becomes the caller's responsibility to delete the listener. Returns
-  // NULL if the listener is not found in the list.
-  TestEventListener* Release(TestEventListener* listener);
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  const char* test_case_name() const { return test_suite_name(); }
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-  // Returns the standard listener responsible for the default console
-  // output.  Can be removed from the listeners list to shut down default
-  // console output.  Note that removing this object from the listener list
-  // with Release transfers its ownership to the caller and makes this
-  // function return NULL the next time.
-  TestEventListener* default_result_printer() const {
-    return default_result_printer_;
+  // Returns the test name.
+  const char* name() const { return name_.c_str(); }
+
+  // Returns the name of the parameter type, or NULL if this is not a typed
+  // or a type-parameterized test.
+  const char* type_param() const {
+    if (type_param_.get() != nullptr) return type_param_->c_str();
+    return nullptr;
   }
 
-  // Returns the standard listener responsible for the default XML output
-  // controlled by the --gtest_output=xml flag.  Can be removed from the
-  // listeners list by users who want to shut down the default XML output
-  // controlled by this flag and substitute it with custom one.  Note that
-  // removing this object from the listener list with Release transfers its
-  // ownership to the caller and makes this function return NULL the next
-  // time.
-  TestEventListener* default_xml_generator() const {
-    return default_xml_generator_;
+  // Returns the text representation of the value parameter, or NULL if this
+  // is not a value-parameterized test.
+  const char* value_param() const {
+    if (value_param_.get() != nullptr) return value_param_->c_str();
+    return nullptr;
   }
 
- private:
-  friend class TestCase;
-  friend class TestInfo;
-  friend class internal::DefaultGlobalTestPartResultReporter;
-  friend class internal::NoExecDeathTest;
-  friend class internal::TestEventListenersAccessor;
-  friend class internal::UnitTestImpl;
+  // Returns the file name where this test is defined.
+  const char* file() const { return location_.file.c_str(); }
 
-  // Returns repeater that broadcasts the TestEventListener events to all
-  // subscribers.
-  TestEventListener* repeater();
+  // Returns the line where this test is defined.
+  int line() const { return location_.line; }
 
-  // Sets the default_result_printer attribute to the provided listener.
-  // The listener is also added to the listener list and previous
-  // default_result_printer is removed from it and deleted. The listener can
-  // also be NULL in which case it will not be added to the list. Does
-  // nothing if the previous and the current listener objects are the same.
-  void SetDefaultResultPrinter(TestEventListener* listener);
+  // Return true if this test should not be run because it's in another shard.
+  bool is_in_another_shard() const { return is_in_another_shard_; }
 
-  // Sets the default_xml_generator attribute to the provided listener.  The
-  // listener is also added to the listener list and previous
-  // default_xml_generator is removed from it and deleted. The listener can
-  // also be NULL in which case it will not be added to the list. Does
-  // nothing if the previous and the current listener objects are the same.
-  void SetDefaultXmlGenerator(TestEventListener* listener);
+  // Returns true if this test should run, that is if the test is not
+  // disabled (or it is disabled but the also_run_disabled_tests flag has
+  // been specified) and its full name matches the user-specified filter.
+  //
+  // Google Test allows the user to filter the tests by their full names.
+  // The full name of a test Bar in test suite Foo is defined as
+  // "Foo.Bar".  Only the tests that match the filter will run.
+  //
+  // A filter is a colon-separated list of glob (not regex) patterns,
+  // optionally followed by a '-' and a colon-separated list of
+  // negative patterns (tests to exclude).  A test is run if it
+  // matches one of the positive patterns and does not match any of
+  // the negative patterns.
+  //
+  // For example, *A*:Foo.* is a filter that matches any string that
+  // contains the character 'A' or starts with "Foo.".
+  bool should_run() const { return should_run_; }
 
-  // Controls whether events will be forwarded by the repeater to the
-  // listeners in the list.
-  bool EventForwardingEnabled() const;
-  void SuppressEventForwarding();
+  // Returns true if and only if this test will appear in the XML report.
+  bool is_reportable() const {
+    // The XML report includes tests matching the filter, excluding those
+    // run in other shards.
+    return matches_filter_ && !is_in_another_shard_;
+  }
 
-  // The actual list of listeners.
-  internal::TestEventRepeater* repeater_;
-  // Listener responsible for the standard result output.
-  TestEventListener* default_result_printer_;
-  // Listener responsible for the creation of the XML output file.
-  TestEventListener* default_xml_generator_;
+  // Returns the result of the test.
+  const TestResult* result() const { return &result_; }
 
-  // We disallow copying TestEventListeners.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
-};
+ private:
+#if GTEST_HAS_DEATH_TEST
+  friend class internal::DefaultDeathTestFactory;
+#endif  // GTEST_HAS_DEATH_TEST
+  friend class Test;
+  friend class TestSuite;
+  friend class internal::UnitTestImpl;
+  friend class internal::StreamingListenerTest;
+  friend TestInfo* internal::MakeAndRegisterTestInfo(
+      const char* test_suite_name, const char* name, const char* type_param,
+      const char* value_param, internal::CodeLocation code_location,
+      internal::TypeId fixture_class_id, internal::SetUpTestSuiteFunc set_up_tc,
+      internal::TearDownTestSuiteFunc tear_down_tc,
+      internal::TestFactoryBase* factory);
 
-// A UnitTest consists of a vector of TestCases.
-//
-// This is a singleton class.  The only instance of UnitTest is
-// created when UnitTest::GetInstance() is first called.  This
-// instance is never deleted.
-//
-// UnitTest is not copyable.
-//
-// This class is thread-safe as long as the methods are called
-// according to their specification.
-class GTEST_API_ UnitTest {
- public:
-  // Gets the singleton UnitTest object.  The first time this method
-  // is called, a UnitTest object is constructed and returned.
-  // Consecutive calls will return the same object.
-  static UnitTest* GetInstance();
+  // Constructs a TestInfo object. The newly constructed instance assumes
+  // ownership of the factory object.
+  TestInfo(const std::string& test_suite_name, const std::string& name,
+           const char* a_type_param,   // NULL if not a type-parameterized test
+           const char* a_value_param,  // NULL if not a value-parameterized test
+           internal::CodeLocation a_code_location,
+           internal::TypeId fixture_class_id,
+           internal::TestFactoryBase* factory);
 
-  // Runs all tests in this UnitTest object and prints the result.
-  // Returns 0 if successful, or 1 otherwise.
-  //
-  // This method can only be called from the main thread.
-  //
-  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-  int Run() GTEST_MUST_USE_RESULT_;
+  // Increments the number of death tests encountered in this test so
+  // far.
+  int increment_death_test_count() {
+    return result_.increment_death_test_count();
+  }
 
-  // Returns the working directory when the first TEST() or TEST_F()
-  // was executed.  The UnitTest object owns the string.
-  const char* original_working_dir() const;
+  // Creates the test object, runs it, records its result, and then
+  // deletes it.
+  void Run();
 
-  // Returns the TestCase object for the test that's currently running,
-  // or NULL if no test is running.
-  const TestCase* current_test_case() const
-      GTEST_LOCK_EXCLUDED_(mutex_);
+  // Skip and records the test result for this object.
+  void Skip();
 
-  // Returns the TestInfo object for the test that's currently running,
-  // or NULL if no test is running.
-  const TestInfo* current_test_info() const
-      GTEST_LOCK_EXCLUDED_(mutex_);
+  static void ClearTestResult(TestInfo* test_info) {
+    test_info->result_.Clear();
+  }
+
+  // These fields are immutable properties of the test.
+  const std::string test_suite_name_;    // test suite name
+  const std::string name_;               // Test name
+  // Name of the parameter type, or NULL if this is not a typed or a
+  // type-parameterized test.
+  const std::unique_ptr<const ::std::string> type_param_;
+  // Text representation of the value parameter, or NULL if this is not a
+  // value-parameterized test.
+  const std::unique_ptr<const ::std::string> value_param_;
+  internal::CodeLocation location_;
+  const internal::TypeId fixture_class_id_;  // ID of the test fixture class
+  bool should_run_;           // True if and only if this test should run
+  bool is_disabled_;          // True if and only if this test is disabled
+  bool matches_filter_;       // True if this test matches the
+                              // user-specified filter.
+  bool is_in_another_shard_;  // Will be run in another shard.
+  internal::TestFactoryBase* const factory_;  // The factory that creates
+                                              // the test object
+
+  // This field is mutable and needs to be reset before running the
+  // test for the second time.
+  TestResult result_;
 
-  // Returns the random seed used at the start of the current test run.
-  int random_seed() const;
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
+};
 
-#if GTEST_HAS_PARAM_TEST
-  // Returns the ParameterizedTestCaseRegistry object used to keep track of
-  // value-parameterized tests and instantiate and register them.
+// A test suite, which consists of a vector of TestInfos.
+//
+// TestSuite is not copyable.
+class GTEST_API_ TestSuite {
+ public:
+  // Creates a TestSuite with the given name.
   //
-  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-  internal::ParameterizedTestCaseRegistry& parameterized_test_registry()
-      GTEST_LOCK_EXCLUDED_(mutex_);
-#endif  // GTEST_HAS_PARAM_TEST
+  // TestSuite does NOT have a default constructor.  Always use this
+  // constructor to create a TestSuite object.
+  //
+  // Arguments:
+  //
+  //   name:         name of the test suite
+  //   a_type_param: the name of the test's type parameter, or NULL if
+  //                 this is not a type-parameterized test.
+  //   set_up_tc:    pointer to the function that sets up the test suite
+  //   tear_down_tc: pointer to the function that tears down the test suite
+  TestSuite(const char* name, const char* a_type_param,
+            internal::SetUpTestSuiteFunc set_up_tc,
+            internal::TearDownTestSuiteFunc tear_down_tc);
 
-  // Gets the number of successful test cases.
-  int successful_test_case_count() const;
+  // Destructor of TestSuite.
+  virtual ~TestSuite();
 
-  // Gets the number of failed test cases.
-  int failed_test_case_count() const;
+  // Gets the name of the TestSuite.
+  const char* name() const { return name_.c_str(); }
 
-  // Gets the number of all test cases.
-  int total_test_case_count() const;
+  // Returns the name of the parameter type, or NULL if this is not a
+  // type-parameterized test suite.
+  const char* type_param() const {
+    if (type_param_.get() != nullptr) return type_param_->c_str();
+    return nullptr;
+  }
 
-  // Gets the number of all test cases that contain at least one test
-  // that should run.
-  int test_case_to_run_count() const;
+  // Returns true if any test in this test suite should run.
+  bool should_run() const { return should_run_; }
 
-  // Gets the number of successful tests.
+  // Gets the number of successful tests in this test suite.
   int successful_test_count() const;
 
-  // Gets the number of failed tests.
+  // Gets the number of skipped tests in this test suite.
+  int skipped_test_count() const;
+
+  // Gets the number of failed tests in this test suite.
   int failed_test_count() const;
 
   // Gets the number of disabled tests that will be reported in the XML report.
   int reportable_disabled_test_count() const;
 
-  // Gets the number of disabled tests.
+  // Gets the number of disabled tests in this test suite.
   int disabled_test_count() const;
 
   // Gets the number of tests to be printed in the XML report.
   int reportable_test_count() const;
 
-  // Gets the number of all tests.
+  // Get the number of tests in this test suite that should run.
+  int test_to_run_count() const;
+
+  // Gets the number of all tests in this test suite.
   int total_test_count() const;
 
-  // Gets the number of tests that should run.
-  int test_to_run_count() const;
+  // Returns true if and only if the test suite passed.
+  bool Passed() const { return !Failed(); }
 
-  // Gets the time of the test program start, in ms from the start of the
+  // Returns true if and only if the test suite failed.
+  bool Failed() const {
+    return failed_test_count() > 0 || ad_hoc_test_result().Failed();
+  }
+
+  // Returns the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+  // Gets the time of the test suite start, in ms from the start of the
   // UNIX epoch.
-  TimeInMillis start_timestamp() const;
+  TimeInMillis start_timestamp() const { return start_timestamp_; }
 
-  // Gets the elapsed time, in milliseconds.
-  TimeInMillis elapsed_time() const;
+  // Returns the i-th test among all the tests. i can range from 0 to
+  // total_test_count() - 1. If i is not in that range, returns NULL.
+  const TestInfo* GetTestInfo(int i) const;
 
-  // Returns true iff the unit test passed (i.e. all test cases passed).
-  bool Passed() const;
+  // Returns the TestResult that holds test properties recorded during
+  // execution of SetUpTestSuite and TearDownTestSuite.
+  const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }
 
-  // Returns true iff the unit test failed (i.e. some test case failed
-  // or something outside of all tests failed).
-  bool Failed() const;
+ private:
+  friend class Test;
+  friend class internal::UnitTestImpl;
 
-  // Gets the i-th test case among all the test cases. i can range from 0 to
-  // total_test_case_count() - 1. If i is not in that range, returns NULL.
-  const TestCase* GetTestCase(int i) const;
+  // Gets the (mutable) vector of TestInfos in this TestSuite.
+  std::vector<TestInfo*>& test_info_list() { return test_info_list_; }
 
-  // Returns the TestResult containing information on test failures and
-  // properties logged outside of individual test cases.
-  const TestResult& ad_hoc_test_result() const;
+  // Gets the (immutable) vector of TestInfos in this TestSuite.
+  const std::vector<TestInfo*>& test_info_list() const {
+    return test_info_list_;
+  }
 
-  // Returns the list of event listeners that can be used to track events
-  // inside Google Test.
-  TestEventListeners& listeners();
+  // Returns the i-th test among all the tests. i can range from 0 to
+  // total_test_count() - 1. If i is not in that range, returns NULL.
+  TestInfo* GetMutableTestInfo(int i);
 
- private:
-  // Registers and returns a global test environment.  When a test
-  // program is run, all global test environments will be set-up in
-  // the order they were registered.  After all tests in the program
-  // have finished, all global test environments will be torn-down in
-  // the *reverse* order they were registered.
-  //
-  // The UnitTest object takes ownership of the given environment.
-  //
-  // This method can only be called from the main thread.
-  Environment* AddEnvironment(Environment* env);
+  // Sets the should_run member.
+  void set_should_run(bool should) { should_run_ = should; }
 
-  // Adds a TestPartResult to the current TestResult object.  All
-  // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
-  // eventually call this to report their results.  The user code
-  // should use the assertion macros instead of calling this directly.
-  void AddTestPartResult(TestPartResult::Type result_type,
-                         const char* file_name,
-                         int line_number,
-                         const std::string& message,
-                         const std::string& os_stack_trace)
-      GTEST_LOCK_EXCLUDED_(mutex_);
+  // Adds a TestInfo to this test suite.  Will delete the TestInfo upon
+  // destruction of the TestSuite object.
+  void AddTestInfo(TestInfo * test_info);
 
-  // Adds a TestProperty to the current TestResult object when invoked from
-  // inside a test, to current TestCase's ad_hoc_test_result_ when invoked
-  // from SetUpTestCase or TearDownTestCase, or to the global property set
-  // when invoked elsewhere.  If the result already contains a property with
-  // the same key, the value will be updated.
-  void RecordProperty(const std::string& key, const std::string& value);
+  // Clears the results of all tests in this test suite.
+  void ClearResult();
 
-  // Gets the i-th test case among all the test cases. i can range from 0 to
-  // total_test_case_count() - 1. If i is not in that range, returns NULL.
-  TestCase* GetMutableTestCase(int i);
+  // Clears the results of all tests in the given test suite.
+  static void ClearTestSuiteResult(TestSuite* test_suite) {
+    test_suite->ClearResult();
+  }
 
-  // Accessors for the implementation object.
-  internal::UnitTestImpl* impl() { return impl_; }
-  const internal::UnitTestImpl* impl() const { return impl_; }
+  // Runs every test in this TestSuite.
+  void Run();
 
-  // These classes and functions are friends as they need to access private
-  // members of UnitTest.
-  friend class Test;
-  friend class internal::AssertHelper;
-  friend class internal::ScopedTrace;
-  friend class internal::StreamingListenerTest;
-  friend class internal::UnitTestRecordPropertyTestHelper;
-  friend Environment* AddGlobalTestEnvironment(Environment* env);
-  friend internal::UnitTestImpl* internal::GetUnitTestImpl();
-  friend void internal::ReportFailureInUnknownLocation(
-      TestPartResult::Type result_type,
-      const std::string& message);
+  // Skips the execution of tests under this TestSuite
+  void Skip();
 
-  // Creates an empty UnitTest.
-  UnitTest();
+  // Runs SetUpTestSuite() for this TestSuite.  This wrapper is needed
+  // for catching exceptions thrown from SetUpTestSuite().
+  void RunSetUpTestSuite() {
+    if (set_up_tc_ != nullptr) {
+      (*set_up_tc_)();
+    }
+  }
 
-  // D'tor
-  virtual ~UnitTest();
+  // Runs TearDownTestSuite() for this TestSuite.  This wrapper is
+  // needed for catching exceptions thrown from TearDownTestSuite().
+  void RunTearDownTestSuite() {
+    if (tear_down_tc_ != nullptr) {
+      (*tear_down_tc_)();
+    }
+  }
 
-  // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
-  // Google Test trace stack.
-  void PushGTestTrace(const internal::TraceInfo& trace)
-      GTEST_LOCK_EXCLUDED_(mutex_);
+  // Returns true if and only if test passed.
+  static bool TestPassed(const TestInfo* test_info) {
+    return test_info->should_run() && test_info->result()->Passed();
+  }
 
-  // Pops a trace from the per-thread Google Test trace stack.
-  void PopGTestTrace()
-      GTEST_LOCK_EXCLUDED_(mutex_);
+  // Returns true if and only if test skipped.
+  static bool TestSkipped(const TestInfo* test_info) {
+    return test_info->should_run() && test_info->result()->Skipped();
+  }
 
-  // Protects mutable state in *impl_.  This is mutable as some const
-  // methods need to lock it too.
-  mutable internal::Mutex mutex_;
+  // Returns true if and only if test failed.
+  static bool TestFailed(const TestInfo* test_info) {
+    return test_info->should_run() && test_info->result()->Failed();
+  }
 
-  // Opaque implementation object.  This field is never changed once
-  // the object is constructed.  We don't mark it as const here, as
-  // doing so will cause a warning in the constructor of UnitTest.
-  // Mutable state in *impl_ is protected by mutex_.
-  internal::UnitTestImpl* impl_;
+  // Returns true if and only if the test is disabled and will be reported in
+  // the XML report.
+  static bool TestReportableDisabled(const TestInfo* test_info) {
+    return test_info->is_reportable() && test_info->is_disabled_;
+  }
 
-  // We disallow copying UnitTest.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
+  // Returns true if and only if test is disabled.
+  static bool TestDisabled(const TestInfo* test_info) {
+    return test_info->is_disabled_;
+  }
+
+  // Returns true if and only if this test will appear in the XML report.
+  static bool TestReportable(const TestInfo* test_info) {
+    return test_info->is_reportable();
+  }
+
+  // Returns true if the given test should run.
+  static bool ShouldRunTest(const TestInfo* test_info) {
+    return test_info->should_run();
+  }
+
+  // Shuffles the tests in this test suite.
+  void ShuffleTests(internal::Random* random);
+
+  // Restores the test order to before the first shuffle.
+  void UnshuffleTests();
+
+  // Name of the test suite.
+  std::string name_;
+  // Name of the parameter type, or NULL if this is not a typed or a
+  // type-parameterized test.
+  const std::unique_ptr<const ::std::string> type_param_;
+  // The vector of TestInfos in their original order.  It owns the
+  // elements in the vector.
+  std::vector<TestInfo*> test_info_list_;
+  // Provides a level of indirection for the test list to allow easy
+  // shuffling and restoring the test order.  The i-th element in this
+  // vector is the index of the i-th test in the shuffled test list.
+  std::vector<int> test_indices_;
+  // Pointer to the function that sets up the test suite.
+  internal::SetUpTestSuiteFunc set_up_tc_;
+  // Pointer to the function that tears down the test suite.
+  internal::TearDownTestSuiteFunc tear_down_tc_;
+  // True if and only if any test in this test suite should run.
+  bool should_run_;
+  // The start time, in milliseconds since UNIX Epoch.
+  TimeInMillis start_timestamp_;
+  // Elapsed time, in milliseconds.
+  TimeInMillis elapsed_time_;
+  // Holds test properties recorded during execution of SetUpTestSuite and
+  // TearDownTestSuite.
+  TestResult ad_hoc_test_result_;
+
+  // We disallow copying TestSuites.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestSuite);
 };
 
-// A convenient wrapper for adding an environment for the test
-// program.
-//
-// You should call this before RUN_ALL_TESTS() is called, probably in
-// main().  If you use gtest_main, you need to call this before main()
-// starts for it to take effect.  For example, you can define a global
-// variable like this:
+// An Environment object is capable of setting up and tearing down an
+// environment.  You should subclass this to define your own
+// environment(s).
 //
-//   testing::Environment* const foo_env =
-//       testing::AddGlobalTestEnvironment(new FooEnvironment);
+// An Environment object does the set-up and tear-down in virtual
+// methods SetUp() and TearDown() instead of the constructor and the
+// destructor, as:
 //
-// However, we strongly recommend you to write your own main() and
-// call AddGlobalTestEnvironment() there, as relying on initialization
-// of global variables makes the code harder to read and may cause
-// problems when you register multiple environments from different
-// translation units and the environments have dependencies among them
-// (remember that the compiler doesn't guarantee the order in which
-// global variables from different translation units are initialized).
-inline Environment* AddGlobalTestEnvironment(Environment* env) {
-  return UnitTest::GetInstance()->AddEnvironment(env);
-}
+//   1. You cannot safely throw from a destructor.  This is a problem
+//      as in some cases Google Test is used where exceptions are enabled, and
+//      we may want to implement ASSERT_* using exceptions where they are
+//      available.
+//   2. You cannot use ASSERT_* directly in a constructor or
+//      destructor.
+class Environment {
+ public:
+  // The d'tor is virtual as we need to subclass Environment.
+  virtual ~Environment() {}
+
+  // Override this to define how to set up the environment.
+  virtual void SetUp() {}
+
+  // Override this to define how to tear down the environment.
+  virtual void TearDown() {}
+ private:
+  // If you see an error about overriding the following function or
+  // about it being private, you have mis-spelled SetUp() as Setup().
+  struct Setup_should_be_spelled_SetUp {};
+  virtual Setup_should_be_spelled_SetUp* Setup() { return nullptr; }
+};
+
+#if GTEST_HAS_EXCEPTIONS
+
+// Exception which can be thrown from TestEventListener::OnTestPartResult.
+class GTEST_API_ AssertionException
+    : public internal::GoogleTestFailureException {
+ public:
+  explicit AssertionException(const TestPartResult& result)
+      : GoogleTestFailureException(result) {}
+};
+
+#endif  // GTEST_HAS_EXCEPTIONS
+
+// The interface for tracing execution of tests. The methods are organized in
+// the order the corresponding events are fired.
+class TestEventListener {
+ public:
+  virtual ~TestEventListener() {}
+
+  // Fired before any test activity starts.
+  virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;
+
+  // Fired before each iteration of tests starts.  There may be more than
+  // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
+  // index, starting from 0.
+  virtual void OnTestIterationStart(const UnitTest& unit_test,
+                                    int iteration) = 0;
+
+  // Fired before environment set-up for each iteration of tests starts.
+  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;
+
+  // Fired after environment set-up for each iteration of tests ends.
+  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;
+
+  // Fired before the test suite starts.
+  virtual void OnTestSuiteStart(const TestSuite& /*test_suite*/) {}
 
-// Initializes Google Test.  This must be called before calling
-// RUN_ALL_TESTS().  In particular, it parses a command line for the
-// flags that Google Test recognizes.  Whenever a Google Test flag is
-// seen, it is removed from argv, and *argc is decremented.
-//
-// No value is returned.  Instead, the Google Test flag variables are
-// updated.
-//
-// Calling the function for the second time has no user-visible effect.
-GTEST_API_ void InitGoogleTest(int* argc, char** argv);
+  //  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-// This overloaded version can be used in Windows programs compiled in
-// UNICODE mode.
-GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);
+  // Fired before the test starts.
+  virtual void OnTestStart(const TestInfo& test_info) = 0;
 
-namespace internal {
+  // Fired after a failed assertion or a SUCCEED() invocation.
+  // If you want to throw an exception from this function to skip to the next
+  // TEST, it must be AssertionException defined above, or inherited from it.
+  virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;
 
-// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
-// value of type ToPrint that is an operand of a comparison assertion
-// (e.g. ASSERT_EQ).  OtherOperand is the type of the other operand in
-// the comparison, and is used to help determine the best way to
-// format the value.  In particular, when the value is a C string
-// (char pointer) and the other operand is an STL string object, we
-// want to format the C string as a string, since we know it is
-// compared by value with the string object.  If the value is a char
-// pointer but the other operand is not an STL string object, we don't
-// know whether the pointer is supposed to point to a NUL-terminated
-// string, and thus want to print it as a pointer to be safe.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  // Fired after the test ends.
+  virtual void OnTestEnd(const TestInfo& test_info) = 0;
 
-// The default case.
-template <typename ToPrint, typename OtherOperand>
-class FormatForComparison {
- public:
-  static ::std::string Format(const ToPrint& value) {
-    return ::testing::PrintToString(value);
-  }
+  // Fired after the test suite ends.
+  virtual void OnTestSuiteEnd(const TestSuite& /*test_suite*/) {}
+
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  // Fired before environment tear-down for each iteration of tests starts.
+  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;
+
+  // Fired after environment tear-down for each iteration of tests ends.
+  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;
+
+  // Fired after each iteration of tests finishes.
+  virtual void OnTestIterationEnd(const UnitTest& unit_test,
+                                  int iteration) = 0;
+
+  // Fired after all test activities have ended.
+  virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
 };
 
-// Array.
-template <typename ToPrint, size_t N, typename OtherOperand>
-class FormatForComparison<ToPrint[N], OtherOperand> {
+// The convenience class for users who need to override just one or two
+// methods and are not concerned that a possible change to a signature of
+// the methods they override will not be caught during the build.  For
+// comments about each method please see the definition of TestEventListener
+// above.
+class EmptyTestEventListener : public TestEventListener {
  public:
-  static ::std::string Format(const ToPrint* value) {
-    return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
-  }
+  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationStart(const UnitTest& /*unit_test*/,
+                            int /*iteration*/) override {}
+  void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
+  void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {}
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseStart(const TestCase& /*test_case*/) override {}
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  void OnTestStart(const TestInfo& /*test_info*/) override {}
+  void OnTestPartResult(const TestPartResult& /*test_part_result*/) override {}
+  void OnTestEnd(const TestInfo& /*test_info*/) override {}
+  void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {}
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseEnd(const TestCase& /*test_case*/) override {}
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationEnd(const UnitTest& /*unit_test*/,
+                          int /*iteration*/) override {}
+  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
 };
 
-// By default, print C string as pointers to be safe, as we don't know
-// whether they actually point to a NUL-terminated string.
-
-#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType)                \
-  template <typename OtherOperand>                                      \
-  class FormatForComparison<CharType*, OtherOperand> {                  \
-   public:                                                              \
-    static ::std::string Format(CharType* value) {                      \
-      return ::testing::PrintToString(static_cast<const void*>(value)); \
-    }                                                                   \
-  }
+// TestEventListeners lets users add listeners to track events in Google Test.
+class GTEST_API_ TestEventListeners {
+ public:
+  TestEventListeners();
+  ~TestEventListeners();
 
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
+  // Appends an event listener to the end of the list. Google Test assumes
+  // the ownership of the listener (i.e. it will delete the listener when
+  // the test program finishes).
+  void Append(TestEventListener* listener);
 
-#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_
+  // Removes the given event listener from the list and returns it.  It then
+  // becomes the caller's responsibility to delete the listener. Returns
+  // NULL if the listener is not found in the list.
+  TestEventListener* Release(TestEventListener* listener);
 
-// If a C string is compared with an STL string object, we know it's meant
-// to point to a NUL-terminated string, and thus can print it as a string.
+  // Returns the standard listener responsible for the default console
+  // output.  Can be removed from the listeners list to shut down default
+  // console output.  Note that removing this object from the listener list
+  // with Release transfers its ownership to the caller and makes this
+  // function return NULL the next time.
+  TestEventListener* default_result_printer() const {
+    return default_result_printer_;
+  }
 
-#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
-  template <>                                                           \
-  class FormatForComparison<CharType*, OtherStringType> {               \
-   public:                                                              \
-    static ::std::string Format(CharType* value) {                      \
-      return ::testing::PrintToString(value);                           \
-    }                                                                   \
+  // Returns the standard listener responsible for the default XML output
+  // controlled by the --gtest_output=xml flag.  Can be removed from the
+  // listeners list by users who want to shut down the default XML output
+  // controlled by this flag and substitute it with custom one.  Note that
+  // removing this object from the listener list with Release transfers its
+  // ownership to the caller and makes this function return NULL the next
+  // time.
+  TestEventListener* default_xml_generator() const {
+    return default_xml_generator_;
   }
 
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);
+ private:
+  friend class TestSuite;
+  friend class TestInfo;
+  friend class internal::DefaultGlobalTestPartResultReporter;
+  friend class internal::NoExecDeathTest;
+  friend class internal::TestEventListenersAccessor;
+  friend class internal::UnitTestImpl;
 
-#if GTEST_HAS_GLOBAL_STRING
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::string);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::string);
-#endif
+  // Returns repeater that broadcasts the TestEventListener events to all
+  // subscribers.
+  TestEventListener* repeater();
 
-#if GTEST_HAS_GLOBAL_WSTRING
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::wstring);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::wstring);
-#endif
+  // Sets the default_result_printer attribute to the provided listener.
+  // The listener is also added to the listener list and previous
+  // default_result_printer is removed from it and deleted. The listener can
+  // also be NULL in which case it will not be added to the list. Does
+  // nothing if the previous and the current listener objects are the same.
+  void SetDefaultResultPrinter(TestEventListener* listener);
 
-#if GTEST_HAS_STD_WSTRING
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
-#endif
+  // Sets the default_xml_generator attribute to the provided listener.  The
+  // listener is also added to the listener list and previous
+  // default_xml_generator is removed from it and deleted. The listener can
+  // also be NULL in which case it will not be added to the list. Does
+  // nothing if the previous and the current listener objects are the same.
+  void SetDefaultXmlGenerator(TestEventListener* listener);
 
-#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_
+  // Controls whether events will be forwarded by the repeater to the
+  // listeners in the list.
+  bool EventForwardingEnabled() const;
+  void SuppressEventForwarding();
 
-// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
-// operand to be used in a failure message.  The type (but not value)
-// of the other operand may affect the format.  This allows us to
-// print a char* as a raw pointer when it is compared against another
-// char* or void*, and print it as a C string when it is compared
-// against an std::string object, for example.
+  // The actual list of listeners.
+  internal::TestEventRepeater* repeater_;
+  // Listener responsible for the standard result output.
+  TestEventListener* default_result_printer_;
+  // Listener responsible for the creation of the XML output file.
+  TestEventListener* default_xml_generator_;
+
+  // We disallow copying TestEventListeners.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
+};
+
+// A UnitTest consists of a vector of TestSuites.
 //
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-template <typename T1, typename T2>
-std::string FormatForComparisonFailureMessage(
-    const T1& value, const T2& /* other_operand */) {
-  return FormatForComparison<T1, T2>::Format(value);
-}
+// This is a singleton class.  The only instance of UnitTest is
+// created when UnitTest::GetInstance() is first called.  This
+// instance is never deleted.
+//
+// UnitTest is not copyable.
+//
+// This class is thread-safe as long as the methods are called
+// according to their specification.
+class GTEST_API_ UnitTest {
+ public:
+  // Gets the singleton UnitTest object.  The first time this method
+  // is called, a UnitTest object is constructed and returned.
+  // Consecutive calls will return the same object.
+  static UnitTest* GetInstance();
 
-// The helper function for {ASSERT|EXPECT}_EQ.
-template <typename T1, typename T2>
-AssertionResult CmpHelperEQ(const char* expected_expression,
-                            const char* actual_expression,
-                            const T1& expected,
-                            const T2& actual) {
-#ifdef _MSC_VER
-# pragma warning(push)          // Saves the current warning state.
-# pragma warning(disable:4389)  // Temporarily disables warning on
-                                // signed/unsigned mismatch.
-#endif
+  // Runs all tests in this UnitTest object and prints the result.
+  // Returns 0 if successful, or 1 otherwise.
+  //
+  // This method can only be called from the main thread.
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  int Run() GTEST_MUST_USE_RESULT_;
 
-  if (expected == actual) {
-    return AssertionSuccess();
-  }
+  // Returns the working directory when the first TEST() or TEST_F()
+  // was executed.  The UnitTest object owns the string.
+  const char* original_working_dir() const;
 
-#ifdef _MSC_VER
-# pragma warning(pop)          // Restores the warning state.
+  // Returns the TestSuite object for the test that's currently running,
+  // or NULL if no test is running.
+  const TestSuite* current_test_suite() const GTEST_LOCK_EXCLUDED_(mutex_);
+
+// Legacy API is still available but deprecated
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  const TestCase* current_test_case() const GTEST_LOCK_EXCLUDED_(mutex_);
 #endif
 
-  return EqFailure(expected_expression,
-                   actual_expression,
-                   FormatForComparisonFailureMessage(expected, actual),
-                   FormatForComparisonFailureMessage(actual, expected),
-                   false);
-}
+  // Returns the TestInfo object for the test that's currently running,
+  // or NULL if no test is running.
+  const TestInfo* current_test_info() const
+      GTEST_LOCK_EXCLUDED_(mutex_);
 
-// With this overloaded version, we allow anonymous enums to be used
-// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums
-// can be implicitly cast to BiggestInt.
-GTEST_API_ AssertionResult CmpHelperEQ(const char* expected_expression,
-                                       const char* actual_expression,
-                                       BiggestInt expected,
-                                       BiggestInt actual);
-
-// The helper class for {ASSERT|EXPECT}_EQ.  The template argument
-// lhs_is_null_literal is true iff the first argument to ASSERT_EQ()
-// is a null pointer literal.  The following default implementation is
-// for lhs_is_null_literal being false.
-template <bool lhs_is_null_literal>
-class EqHelper {
- public:
-  // This templatized version is for the general case.
-  template <typename T1, typename T2>
-  static AssertionResult Compare(const char* expected_expression,
-                                 const char* actual_expression,
-                                 const T1& expected,
-                                 const T2& actual) {
-    return CmpHelperEQ(expected_expression, actual_expression, expected,
-                       actual);
-  }
+  // Returns the random seed used at the start of the current test run.
+  int random_seed() const;
 
-  // With this overloaded version, we allow anonymous enums to be used
-  // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
-  // enums can be implicitly cast to BiggestInt.
+  // Returns the ParameterizedTestSuiteRegistry object used to keep track of
+  // value-parameterized tests and instantiate and register them.
   //
-  // Even though its body looks the same as the above version, we
-  // cannot merge the two, as it will make anonymous enums unhappy.
-  static AssertionResult Compare(const char* expected_expression,
-                                 const char* actual_expression,
-                                 BiggestInt expected,
-                                 BiggestInt actual) {
-    return CmpHelperEQ(expected_expression, actual_expression, expected,
-                       actual);
-  }
-};
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  internal::ParameterizedTestSuiteRegistry& parameterized_test_registry()
+      GTEST_LOCK_EXCLUDED_(mutex_);
 
-// This specialization is used when the first argument to ASSERT_EQ()
-// is a null pointer literal, like NULL, false, or 0.
-template <>
-class EqHelper<true> {
- public:
-  // We define two overloaded versions of Compare().  The first
-  // version will be picked when the second argument to ASSERT_EQ() is
-  // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or
-  // EXPECT_EQ(false, a_bool).
-  template <typename T1, typename T2>
-  static AssertionResult Compare(
-      const char* expected_expression,
-      const char* actual_expression,
-      const T1& expected,
-      const T2& actual,
-      // The following line prevents this overload from being considered if T2
-      // is not a pointer type.  We need this because ASSERT_EQ(NULL, my_ptr)
-      // expands to Compare("", "", NULL, my_ptr), which requires a conversion
-      // to match the Secret* in the other overload, which would otherwise make
-      // this template match better.
-      typename EnableIf<!is_pointer<T2>::value>::type* = 0) {
-    return CmpHelperEQ(expected_expression, actual_expression, expected,
-                       actual);
-  }
+  // Gets the number of successful test suites.
+  int successful_test_suite_count() const;
 
-  // This version will be picked when the second argument to ASSERT_EQ() is a
-  // pointer, e.g. ASSERT_EQ(NULL, a_pointer).
-  template <typename T>
-  static AssertionResult Compare(
-      const char* expected_expression,
-      const char* actual_expression,
-      // We used to have a second template parameter instead of Secret*.  That
-      // template parameter would deduce to 'long', making this a better match
-      // than the first overload even without the first overload's EnableIf.
-      // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to
-      // non-pointer argument" (even a deduced integral argument), so the old
-      // implementation caused warnings in user code.
-      Secret* /* expected (NULL) */,
-      T* actual) {
-    // We already know that 'expected' is a null pointer.
-    return CmpHelperEQ(expected_expression, actual_expression,
-                       static_cast<T*>(NULL), actual);
-  }
-};
+  // Gets the number of failed test suites.
+  int failed_test_suite_count() const;
+
+  // Gets the number of all test suites.
+  int total_test_suite_count() const;
+
+  // Gets the number of all test suites that contain at least one test
+  // that should run.
+  int test_suite_to_run_count() const;
+
+  //  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  int successful_test_case_count() const;
+  int failed_test_case_count() const;
+  int total_test_case_count() const;
+  int test_case_to_run_count() const;
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  // Gets the number of successful tests.
+  int successful_test_count() const;
+
+  // Gets the number of skipped tests.
+  int skipped_test_count() const;
+
+  // Gets the number of failed tests.
+  int failed_test_count() const;
 
-// A macro for implementing the helper functions needed to implement
-// ASSERT_?? and EXPECT_??.  It is here just to avoid copy-and-paste
-// of similar code.
-//
-// For each templatized helper function, we also define an overloaded
-// version for BiggestInt in order to reduce code bloat and allow
-// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled
-// with gcc 4.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
-template <typename T1, typename T2>\
-AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
-                                   const T1& val1, const T2& val2) {\
-  if (val1 op val2) {\
-    return AssertionSuccess();\
-  } else {\
-    return AssertionFailure() \
-        << "Expected: (" << expr1 << ") " #op " (" << expr2\
-        << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
-        << " vs " << FormatForComparisonFailureMessage(val2, val1);\
-  }\
-}\
-GTEST_API_ AssertionResult CmpHelper##op_name(\
-    const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2)
+  // Gets the number of disabled tests that will be reported in the XML report.
+  int reportable_disabled_test_count() const;
 
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  // Gets the number of disabled tests.
+  int disabled_test_count() const;
 
-// Implements the helper function for {ASSERT|EXPECT}_NE
-GTEST_IMPL_CMP_HELPER_(NE, !=);
-// Implements the helper function for {ASSERT|EXPECT}_LE
-GTEST_IMPL_CMP_HELPER_(LE, <=);
-// Implements the helper function for {ASSERT|EXPECT}_LT
-GTEST_IMPL_CMP_HELPER_(LT, <);
-// Implements the helper function for {ASSERT|EXPECT}_GE
-GTEST_IMPL_CMP_HELPER_(GE, >=);
-// Implements the helper function for {ASSERT|EXPECT}_GT
-GTEST_IMPL_CMP_HELPER_(GT, >);
+  // Gets the number of tests to be printed in the XML report.
+  int reportable_test_count() const;
 
-#undef GTEST_IMPL_CMP_HELPER_
+  // Gets the number of all tests.
+  int total_test_count() const;
 
-// The helper function for {ASSERT|EXPECT}_STREQ.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
-                                          const char* actual_expression,
-                                          const char* expected,
-                                          const char* actual);
+  // Gets the number of tests that should run.
+  int test_to_run_count() const;
 
-// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
-                                              const char* actual_expression,
-                                              const char* expected,
-                                              const char* actual);
+  // Gets the time of the test program start, in ms from the start of the
+  // UNIX epoch.
+  TimeInMillis start_timestamp() const;
 
-// The helper function for {ASSERT|EXPECT}_STRNE.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
-                                          const char* s2_expression,
-                                          const char* s1,
-                                          const char* s2);
+  // Gets the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const;
 
-// The helper function for {ASSERT|EXPECT}_STRCASENE.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
-                                              const char* s2_expression,
-                                              const char* s1,
-                                              const char* s2);
+  // Returns true if and only if the unit test passed (i.e. all test suites
+  // passed).
+  bool Passed() const;
 
+  // Returns true if and only if the unit test failed (i.e. some test suite
+  // failed or something outside of all tests failed).
+  bool Failed() const;
 
-// Helper function for *_STREQ on wide strings.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
-                                          const char* actual_expression,
-                                          const wchar_t* expected,
-                                          const wchar_t* actual);
+  // Gets the i-th test suite among all the test suites. i can range from 0 to
+  // total_test_suite_count() - 1. If i is not in that range, returns NULL.
+  const TestSuite* GetTestSuite(int i) const;
 
-// Helper function for *_STRNE on wide strings.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
-                                          const char* s2_expression,
-                                          const wchar_t* s1,
-                                          const wchar_t* s2);
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  const TestCase* GetTestCase(int i) const;
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-}  // namespace internal
+  // Returns the TestResult containing information on test failures and
+  // properties logged outside of individual test suites.
+  const TestResult& ad_hoc_test_result() const;
 
-// IsSubstring() and IsNotSubstring() are intended to be used as the
-// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
-// themselves.  They check whether needle is a substring of haystack
-// (NULL is considered a substring of itself only), and return an
-// appropriate error message when they fail.
-//
-// The {needle,haystack}_expr arguments are the stringified
-// expressions that generated the two real arguments.
-GTEST_API_ AssertionResult IsSubstring(
-    const char* needle_expr, const char* haystack_expr,
-    const char* needle, const char* haystack);
-GTEST_API_ AssertionResult IsSubstring(
-    const char* needle_expr, const char* haystack_expr,
-    const wchar_t* needle, const wchar_t* haystack);
-GTEST_API_ AssertionResult IsNotSubstring(
-    const char* needle_expr, const char* haystack_expr,
-    const char* needle, const char* haystack);
-GTEST_API_ AssertionResult IsNotSubstring(
-    const char* needle_expr, const char* haystack_expr,
-    const wchar_t* needle, const wchar_t* haystack);
-GTEST_API_ AssertionResult IsSubstring(
-    const char* needle_expr, const char* haystack_expr,
-    const ::std::string& needle, const ::std::string& haystack);
-GTEST_API_ AssertionResult IsNotSubstring(
-    const char* needle_expr, const char* haystack_expr,
-    const ::std::string& needle, const ::std::string& haystack);
+  // Returns the list of event listeners that can be used to track events
+  // inside Google Test.
+  TestEventListeners& listeners();
 
-#if GTEST_HAS_STD_WSTRING
-GTEST_API_ AssertionResult IsSubstring(
-    const char* needle_expr, const char* haystack_expr,
-    const ::std::wstring& needle, const ::std::wstring& haystack);
-GTEST_API_ AssertionResult IsNotSubstring(
-    const char* needle_expr, const char* haystack_expr,
-    const ::std::wstring& needle, const ::std::wstring& haystack);
-#endif  // GTEST_HAS_STD_WSTRING
+ private:
+  // Registers and returns a global test environment.  When a test
+  // program is run, all global test environments will be set-up in
+  // the order they were registered.  After all tests in the program
+  // have finished, all global test environments will be torn-down in
+  // the *reverse* order they were registered.
+  //
+  // The UnitTest object takes ownership of the given environment.
+  //
+  // This method can only be called from the main thread.
+  Environment* AddEnvironment(Environment* env);
 
-namespace internal {
+  // Adds a TestPartResult to the current TestResult object.  All
+  // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
+  // eventually call this to report their results.  The user code
+  // should use the assertion macros instead of calling this directly.
+  void AddTestPartResult(TestPartResult::Type result_type,
+                         const char* file_name,
+                         int line_number,
+                         const std::string& message,
+                         const std::string& os_stack_trace)
+      GTEST_LOCK_EXCLUDED_(mutex_);
 
-// Helper template function for comparing floating-points.
-//
-// Template parameter:
-//
-//   RawType: the raw floating-point type (either float or double)
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-template <typename RawType>
-AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression,
-                                         const char* actual_expression,
-                                         RawType expected,
-                                         RawType actual) {
-  const FloatingPoint<RawType> lhs(expected), rhs(actual);
+  // Adds a TestProperty to the current TestResult object when invoked from
+  // inside a test, to current TestSuite's ad_hoc_test_result_ when invoked
+  // from SetUpTestSuite or TearDownTestSuite, or to the global property set
+  // when invoked elsewhere.  If the result already contains a property with
+  // the same key, the value will be updated.
+  void RecordProperty(const std::string& key, const std::string& value);
 
-  if (lhs.AlmostEquals(rhs)) {
-    return AssertionSuccess();
-  }
+  // Gets the i-th test suite among all the test suites. i can range from 0 to
+  // total_test_suite_count() - 1. If i is not in that range, returns NULL.
+  TestSuite* GetMutableTestSuite(int i);
 
-  ::std::stringstream expected_ss;
-  expected_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
-              << expected;
+  // Accessors for the implementation object.
+  internal::UnitTestImpl* impl() { return impl_; }
+  const internal::UnitTestImpl* impl() const { return impl_; }
 
-  ::std::stringstream actual_ss;
-  actual_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
-            << actual;
+  // These classes and functions are friends as they need to access private
+  // members of UnitTest.
+  friend class ScopedTrace;
+  friend class Test;
+  friend class internal::AssertHelper;
+  friend class internal::StreamingListenerTest;
+  friend class internal::UnitTestRecordPropertyTestHelper;
+  friend Environment* AddGlobalTestEnvironment(Environment* env);
+  friend std::set<std::string>* internal::GetIgnoredParameterizedTestSuites();
+  friend internal::UnitTestImpl* internal::GetUnitTestImpl();
+  friend void internal::ReportFailureInUnknownLocation(
+      TestPartResult::Type result_type,
+      const std::string& message);
 
-  return EqFailure(expected_expression,
-                   actual_expression,
-                   StringStreamToString(&expected_ss),
-                   StringStreamToString(&actual_ss),
-                   false);
-}
+  // Creates an empty UnitTest.
+  UnitTest();
 
-// Helper function for implementing ASSERT_NEAR.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
-                                                const char* expr2,
-                                                const char* abs_error_expr,
-                                                double val1,
-                                                double val2,
-                                                double abs_error);
+  // D'tor
+  virtual ~UnitTest();
 
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-// A class that enables one to stream messages to assertion macros
-class GTEST_API_ AssertHelper {
- public:
-  // Constructor.
-  AssertHelper(TestPartResult::Type type,
-               const char* file,
-               int line,
-               const char* message);
-  ~AssertHelper();
+  // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
+  // Google Test trace stack.
+  void PushGTestTrace(const internal::TraceInfo& trace)
+      GTEST_LOCK_EXCLUDED_(mutex_);
 
-  // Message assignment is a semantic trick to enable assertion
-  // streaming; see the GTEST_MESSAGE_ macro below.
-  void operator=(const Message& message) const;
+  // Pops a trace from the per-thread Google Test trace stack.
+  void PopGTestTrace()
+      GTEST_LOCK_EXCLUDED_(mutex_);
 
- private:
-  // We put our data in a struct so that the size of the AssertHelper class can
-  // be as small as possible.  This is important because gcc is incapable of
-  // re-using stack space even for temporary variables, so every EXPECT_EQ
-  // reserves stack space for another AssertHelper.
-  struct AssertHelperData {
-    AssertHelperData(TestPartResult::Type t,
-                     const char* srcfile,
-                     int line_num,
-                     const char* msg)
-        : type(t), file(srcfile), line(line_num), message(msg) { }
+  // Protects mutable state in *impl_.  This is mutable as some const
+  // methods need to lock it too.
+  mutable internal::Mutex mutex_;
 
-    TestPartResult::Type const type;
-    const char* const file;
-    int const line;
-    std::string const message;
+  // Opaque implementation object.  This field is never changed once
+  // the object is constructed.  We don't mark it as const here, as
+  // doing so will cause a warning in the constructor of UnitTest.
+  // Mutable state in *impl_ is protected by mutex_.
+  internal::UnitTestImpl* impl_;
 
-   private:
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
-  };
+  // We disallow copying UnitTest.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
+};
 
-  AssertHelperData* const data_;
+// A convenient wrapper for adding an environment for the test
+// program.
+//
+// You should call this before RUN_ALL_TESTS() is called, probably in
+// main().  If you use gtest_main, you need to call this before main()
+// starts for it to take effect.  For example, you can define a global
+// variable like this:
+//
+//   testing::Environment* const foo_env =
+//       testing::AddGlobalTestEnvironment(new FooEnvironment);
+//
+// However, we strongly recommend you to write your own main() and
+// call AddGlobalTestEnvironment() there, as relying on initialization
+// of global variables makes the code harder to read and may cause
+// problems when you register multiple environments from different
+// translation units and the environments have dependencies among them
+// (remember that the compiler doesn't guarantee the order in which
+// global variables from different translation units are initialized).
+inline Environment* AddGlobalTestEnvironment(Environment* env) {
+  return UnitTest::GetInstance()->AddEnvironment(env);
+}
+
+// Initializes Google Test.  This must be called before calling
+// RUN_ALL_TESTS().  In particular, it parses a command line for the
+// flags that Google Test recognizes.  Whenever a Google Test flag is
+// seen, it is removed from argv, and *argc is decremented.
+//
+// No value is returned.  Instead, the Google Test flag variables are
+// updated.
+//
+// Calling the function for the second time has no user-visible effect.
+GTEST_API_ void InitGoogleTest(int* argc, char** argv);
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
-};
+// This overloaded version can be used in Windows programs compiled in
+// UNICODE mode.
+GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);
 
-}  // namespace internal
+// This overloaded version can be used on Arduino/embedded platforms where
+// there is no argc/argv.
+GTEST_API_ void InitGoogleTest();
 
-#if GTEST_HAS_PARAM_TEST
-// The pure interface class that all value-parameterized tests inherit from.
-// A value-parameterized class must inherit from both ::testing::Test and
-// ::testing::WithParamInterface. In most cases that just means inheriting
-// from ::testing::TestWithParam, but more complicated test hierarchies
-// may need to inherit from Test and WithParamInterface at different levels.
-//
-// This interface has support for accessing the test parameter value via
-// the GetParam() method.
-//
-// Use it with one of the parameter generator defining functions, like Range(),
-// Values(), ValuesIn(), Bool(), and Combine().
-//
-// class FooTest : public ::testing::TestWithParam<int> {
-//  protected:
-//   FooTest() {
-//     // Can use GetParam() here.
-//   }
-//   virtual ~FooTest() {
-//     // Can use GetParam() here.
-//   }
-//   virtual void SetUp() {
-//     // Can use GetParam() here.
-//   }
-//   virtual void TearDown {
-//     // Can use GetParam() here.
-//   }
-// };
-// TEST_P(FooTest, DoesBar) {
-//   // Can use GetParam() method here.
-//   Foo foo;
-//   ASSERT_TRUE(foo.DoesBar(GetParam()));
-// }
-// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));
+namespace internal {
 
-template <typename T>
-class WithParamInterface {
- public:
-  typedef T ParamType;
-  virtual ~WithParamInterface() {}
+// Separate the error generating code from the code path to reduce the stack
+// frame size of CmpHelperEQ. This helps reduce the overhead of some sanitizers
+// when calling EXPECT_* in a tight loop.
+template <typename T1, typename T2>
+AssertionResult CmpHelperEQFailure(const char* lhs_expression,
+                                   const char* rhs_expression,
+                                   const T1& lhs, const T2& rhs) {
+  return EqFailure(lhs_expression,
+                   rhs_expression,
+                   FormatForComparisonFailureMessage(lhs, rhs),
+                   FormatForComparisonFailureMessage(rhs, lhs),
+                   false);
+}
 
-  // The current parameter value. Is also available in the test fixture's
-  // constructor. This member function is non-static, even though it only
-  // references static data, to reduce the opportunity for incorrect uses
-  // like writing 'WithParamInterface<bool>::GetParam()' for a test that
-  // uses a fixture whose parameter type is int.
-  const ParamType& GetParam() const {
-    GTEST_CHECK_(parameter_ != NULL)
-        << "GetParam() can only be called inside a value-parameterized test "
-        << "-- did you intend to write TEST_P instead of TEST_F?";
-    return *parameter_;
-  }
+// This block of code defines operator==/!=
+// to block lexical scope lookup.
+// It prevents using invalid operator==/!= defined at namespace scope.
+struct faketype {};
+inline bool operator==(faketype, faketype) { return true; }
+inline bool operator!=(faketype, faketype) { return false; }
 
- private:
-  // Sets parameter value. The caller is responsible for making sure the value
-  // remains alive and unchanged throughout the current test.
-  static void SetParam(const ParamType* parameter) {
-    parameter_ = parameter;
+// The helper function for {ASSERT|EXPECT}_EQ.
+template <typename T1, typename T2>
+AssertionResult CmpHelperEQ(const char* lhs_expression,
+                            const char* rhs_expression,
+                            const T1& lhs,
+                            const T2& rhs) {
+  if (lhs == rhs) {
+    return AssertionSuccess();
   }
 
-  // Static value used for accessing parameter during a test lifetime.
-  static const ParamType* parameter_;
-
-  // TestClass must be a subclass of WithParamInterface<T> and Test.
-  template <class TestClass> friend class internal::ParameterizedTestFactory;
-};
+  return CmpHelperEQFailure(lhs_expression, rhs_expression, lhs, rhs);
+}
 
-template <typename T>
-const T* WithParamInterface<T>::parameter_ = NULL;
+class EqHelper {
+ public:
+  // This templatized version is for the general case.
+  template <
+      typename T1, typename T2,
+      // Disable this overload for cases where one argument is a pointer
+      // and the other is the null pointer constant.
+      typename std::enable_if<!std::is_integral<T1>::value ||
+                              !std::is_pointer<T2>::value>::type* = nullptr>
+  static AssertionResult Compare(const char* lhs_expression,
+                                 const char* rhs_expression, const T1& lhs,
+                                 const T2& rhs) {
+    return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
+  }
 
-// Most value-parameterized classes can ignore the existence of
-// WithParamInterface, and can just inherit from ::testing::TestWithParam.
+  // With this overloaded version, we allow anonymous enums to be used
+  // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
+  // enums can be implicitly cast to BiggestInt.
+  //
+  // Even though its body looks the same as the above version, we
+  // cannot merge the two, as it will make anonymous enums unhappy.
+  static AssertionResult Compare(const char* lhs_expression,
+                                 const char* rhs_expression,
+                                 BiggestInt lhs,
+                                 BiggestInt rhs) {
+    return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
+  }
 
-template <typename T>
-class TestWithParam : public Test, public WithParamInterface<T> {
+  template <typename T>
+  static AssertionResult Compare(
+      const char* lhs_expression, const char* rhs_expression,
+      // Handle cases where '0' is used as a null pointer literal.
+      std::nullptr_t /* lhs */, T* rhs) {
+    // We already know that 'lhs' is a null pointer.
+    return CmpHelperEQ(lhs_expression, rhs_expression, static_cast<T*>(nullptr),
+                       rhs);
+  }
 };
 
-#endif  // GTEST_HAS_PARAM_TEST
-
-// Macros for indicating success/failure in test code.
+// Separate the error generating code from the code path to reduce the stack
+// frame size of CmpHelperOP. This helps reduce the overhead of some sanitizers
+// when calling EXPECT_OP in a tight loop.
+template <typename T1, typename T2>
+AssertionResult CmpHelperOpFailure(const char* expr1, const char* expr2,
+                                   const T1& val1, const T2& val2,
+                                   const char* op) {
+  return AssertionFailure()
+         << "Expected: (" << expr1 << ") " << op << " (" << expr2
+         << "), actual: " << FormatForComparisonFailureMessage(val1, val2)
+         << " vs " << FormatForComparisonFailureMessage(val2, val1);
+}
 
-// ADD_FAILURE unconditionally adds a failure to the current test.
-// SUCCEED generates a success - it doesn't automatically make the
-// current test successful, as a test is only successful when it has
-// no failure.
-//
-// EXPECT_* verifies that a certain condition is satisfied.  If not,
-// it behaves like ADD_FAILURE.  In particular:
-//
-//   EXPECT_TRUE  verifies that a Boolean condition is true.
-//   EXPECT_FALSE verifies that a Boolean condition is false.
+// A macro for implementing the helper functions needed to implement
+// ASSERT_?? and EXPECT_??.  It is here just to avoid copy-and-paste
+// of similar code.
 //
-// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
-// that they will also abort the current function on failure.  People
-// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
-// writing data-driven tests often find themselves using ADD_FAILURE
-// and EXPECT_* more.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
 
-// Generates a nonfatal failure with a generic message.
-#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")
+#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
+template <typename T1, typename T2>\
+AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
+                                   const T1& val1, const T2& val2) {\
+  if (val1 op val2) {\
+    return AssertionSuccess();\
+  } else {\
+    return CmpHelperOpFailure(expr1, expr2, val1, val2, #op);\
+  }\
+}
 
-// Generates a nonfatal failure at the given source file location with
-// a generic message.
-#define ADD_FAILURE_AT(file, line) \
-  GTEST_MESSAGE_AT_(file, line, "Failed", \
-                    ::testing::TestPartResult::kNonFatalFailure)
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
 
-// Generates a fatal failure with a generic message.
-#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")
+// Implements the helper function for {ASSERT|EXPECT}_NE
+GTEST_IMPL_CMP_HELPER_(NE, !=)
+// Implements the helper function for {ASSERT|EXPECT}_LE
+GTEST_IMPL_CMP_HELPER_(LE, <=)
+// Implements the helper function for {ASSERT|EXPECT}_LT
+GTEST_IMPL_CMP_HELPER_(LT, <)
+// Implements the helper function for {ASSERT|EXPECT}_GE
+GTEST_IMPL_CMP_HELPER_(GE, >=)
+// Implements the helper function for {ASSERT|EXPECT}_GT
+GTEST_IMPL_CMP_HELPER_(GT, >)
 
-// Define this macro to 1 to omit the definition of FAIL(), which is a
-// generic name and clashes with some other libraries.
-#if !GTEST_DONT_DEFINE_FAIL
-# define FAIL() GTEST_FAIL()
-#endif
+#undef GTEST_IMPL_CMP_HELPER_
 
-// Generates a success with a generic message.
-#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")
+// The helper function for {ASSERT|EXPECT}_STREQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const char* s1,
+                                          const char* s2);
 
-// Define this macro to 1 to omit the definition of SUCCEED(), which
-// is a generic name and clashes with some other libraries.
-#if !GTEST_DONT_DEFINE_SUCCEED
-# define SUCCEED() GTEST_SUCCEED()
-#endif
+// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* s1_expression,
+                                              const char* s2_expression,
+                                              const char* s1,
+                                              const char* s2);
 
-// Macros for testing exceptions.
+// The helper function for {ASSERT|EXPECT}_STRNE.
 //
-//    * {ASSERT|EXPECT}_THROW(statement, expected_exception):
-//         Tests that the statement throws the expected exception.
-//    * {ASSERT|EXPECT}_NO_THROW(statement):
-//         Tests that the statement doesn't throw any exception.
-//    * {ASSERT|EXPECT}_ANY_THROW(statement):
-//         Tests that the statement throws an exception.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const char* s1,
+                                          const char* s2);
 
-#define EXPECT_THROW(statement, expected_exception) \
-  GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_NO_THROW(statement) \
-  GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_ANY_THROW(statement) \
-  GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_THROW(statement, expected_exception) \
-  GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
-#define ASSERT_NO_THROW(statement) \
-  GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
-#define ASSERT_ANY_THROW(statement) \
-  GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)
+// The helper function for {ASSERT|EXPECT}_STRCASENE.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+                                              const char* s2_expression,
+                                              const char* s1,
+                                              const char* s2);
 
-// Boolean assertions. Condition can be either a Boolean expression or an
-// AssertionResult. For more information on how to use AssertionResult with
-// these macros see comments on that class.
-#define EXPECT_TRUE(condition) \
-  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
-                      GTEST_NONFATAL_FAILURE_)
-#define EXPECT_FALSE(condition) \
-  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
-                      GTEST_NONFATAL_FAILURE_)
-#define ASSERT_TRUE(condition) \
-  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
-                      GTEST_FATAL_FAILURE_)
-#define ASSERT_FALSE(condition) \
-  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
-                      GTEST_FATAL_FAILURE_)
 
-// Includes the auto-generated header that implements a family of
-// generic predicate assertion macros.
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
+// Helper function for *_STREQ on wide strings.
 //
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const wchar_t* s1,
+                                          const wchar_t* s2);
+
+// Helper function for *_STRNE on wide strings.
 //
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const wchar_t* s1,
+                                          const wchar_t* s2);
 
-// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command
-// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
+}  // namespace internal
+
+// IsSubstring() and IsNotSubstring() are intended to be used as the
+// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
+// themselves.  They check whether needle is a substring of haystack
+// (NULL is considered a substring of itself only), and return an
+// appropriate error message when they fail.
 //
-// Implements a family of generic predicate assertion macros.
+// The {needle,haystack}_expr arguments are the stringified
+// expressions that generated the two real arguments.
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const char* needle, const char* haystack);
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const wchar_t* needle, const wchar_t* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const char* needle, const char* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const wchar_t* needle, const wchar_t* haystack);
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::string& needle, const ::std::string& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::string& needle, const ::std::string& haystack);
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::wstring& needle, const ::std::wstring& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::wstring& needle, const ::std::wstring& haystack);
+#endif  // GTEST_HAS_STD_WSTRING
 
-// Makes sure this header is not included before gtest.h.
-#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
-# error Do not include gtest_pred_impl.h directly.  Include gtest.h instead.
-#endif  // GTEST_INCLUDE_GTEST_GTEST_H_
+namespace internal {
 
-// This header implements a family of generic predicate assertion
-// macros:
-//
-//   ASSERT_PRED_FORMAT1(pred_format, v1)
-//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
-//   ...
-//
-// where pred_format is a function or functor that takes n (in the
-// case of ASSERT_PRED_FORMATn) values and their source expression
-// text, and returns a testing::AssertionResult.  See the definition
-// of ASSERT_EQ in gtest.h for an example.
-//
-// If you don't care about formatting, you can use the more
-// restrictive version:
-//
-//   ASSERT_PRED1(pred, v1)
-//   ASSERT_PRED2(pred, v1, v2)
-//   ...
+// Helper template function for comparing floating-points.
 //
-// where pred is an n-ary function or functor that returns bool,
-// and the values v1, v2, ..., must support the << operator for
-// streaming to std::ostream.
+// Template parameter:
 //
-// We also define the EXPECT_* variations.
+//   RawType: the raw floating-point type (either float or double)
 //
-// For now we only support predicates whose arity is at most 5.
-// Please email googletestframework@googlegroups.com if you need
-// support for higher arities.
-
-// GTEST_ASSERT_ is the basic statement to which all of the assertions
-// in this file reduce.  Don't use this in your code.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename RawType>
+AssertionResult CmpHelperFloatingPointEQ(const char* lhs_expression,
+                                         const char* rhs_expression,
+                                         RawType lhs_value,
+                                         RawType rhs_value) {
+  const FloatingPoint<RawType> lhs(lhs_value), rhs(rhs_value);
 
-#define GTEST_ASSERT_(expression, on_failure) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (const ::testing::AssertionResult gtest_ar = (expression)) \
-    ; \
-  else \
-    on_failure(gtest_ar.failure_message())
+  if (lhs.AlmostEquals(rhs)) {
+    return AssertionSuccess();
+  }
 
+  ::std::stringstream lhs_ss;
+  lhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+         << lhs_value;
 
-// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
-// this in your code.
-template <typename Pred,
-          typename T1>
-AssertionResult AssertPred1Helper(const char* pred_text,
-                                  const char* e1,
-                                  Pred pred,
-                                  const T1& v1) {
-  if (pred(v1)) return AssertionSuccess();
+  ::std::stringstream rhs_ss;
+  rhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+         << rhs_value;
 
-  return AssertionFailure() << pred_text << "("
-                            << e1 << ") evaluates to false, where"
-                            << "\n" << e1 << " evaluates to " << v1;
+  return EqFailure(lhs_expression,
+                   rhs_expression,
+                   StringStreamToString(&lhs_ss),
+                   StringStreamToString(&rhs_ss),
+                   false);
 }
 
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
-  GTEST_ASSERT_(pred_format(#v1, v1), \
-                on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
-// this in your code.
-#define GTEST_PRED1_(pred, v1, on_failure)\
-  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
-                                             #v1, \
-                                             pred, \
-                                             v1), on_failure)
+// Helper function for implementing ASSERT_NEAR.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
+                                                const char* expr2,
+                                                const char* abs_error_expr,
+                                                double val1,
+                                                double val2,
+                                                double abs_error);
 
-// Unary predicate assertion macros.
-#define EXPECT_PRED_FORMAT1(pred_format, v1) \
-  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED1(pred, v1) \
-  GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT1(pred_format, v1) \
-  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED1(pred, v1) \
-  GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// A class that enables one to stream messages to assertion macros
+class GTEST_API_ AssertHelper {
+ public:
+  // Constructor.
+  AssertHelper(TestPartResult::Type type,
+               const char* file,
+               int line,
+               const char* message);
+  ~AssertHelper();
 
+  // Message assignment is a semantic trick to enable assertion
+  // streaming; see the GTEST_MESSAGE_ macro below.
+  void operator=(const Message& message) const;
 
+ private:
+  // We put our data in a struct so that the size of the AssertHelper class can
+  // be as small as possible.  This is important because gcc is incapable of
+  // re-using stack space even for temporary variables, so every EXPECT_EQ
+  // reserves stack space for another AssertHelper.
+  struct AssertHelperData {
+    AssertHelperData(TestPartResult::Type t,
+                     const char* srcfile,
+                     int line_num,
+                     const char* msg)
+        : type(t), file(srcfile), line(line_num), message(msg) { }
 
-// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
-// this in your code.
-template <typename Pred,
-          typename T1,
-          typename T2>
-AssertionResult AssertPred2Helper(const char* pred_text,
-                                  const char* e1,
-                                  const char* e2,
-                                  Pred pred,
-                                  const T1& v1,
-                                  const T2& v2) {
-  if (pred(v1, v2)) return AssertionSuccess();
+    TestPartResult::Type const type;
+    const char* const file;
+    int const line;
+    std::string const message;
 
-  return AssertionFailure() << pred_text << "("
-                            << e1 << ", "
-                            << e2 << ") evaluates to false, where"
-                            << "\n" << e1 << " evaluates to " << v1
-                            << "\n" << e2 << " evaluates to " << v2;
-}
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
+  };
 
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
-  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
-                on_failure)
+  AssertHelperData* const data_;
 
-// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
-// this in your code.
-#define GTEST_PRED2_(pred, v1, v2, on_failure)\
-  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
-                                             #v1, \
-                                             #v2, \
-                                             pred, \
-                                             v1, \
-                                             v2), on_failure)
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
+};
 
-// Binary predicate assertion macros.
-#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
-  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED2(pred, v1, v2) \
-  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
-  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED2(pred, v1, v2) \
-  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
+}  // namespace internal
 
+// The pure interface class that all value-parameterized tests inherit from.
+// A value-parameterized class must inherit from both ::testing::Test and
+// ::testing::WithParamInterface. In most cases that just means inheriting
+// from ::testing::TestWithParam, but more complicated test hierarchies
+// may need to inherit from Test and WithParamInterface at different levels.
+//
+// This interface has support for accessing the test parameter value via
+// the GetParam() method.
+//
+// Use it with one of the parameter generator defining functions, like Range(),
+// Values(), ValuesIn(), Bool(), and Combine().
+//
+// class FooTest : public ::testing::TestWithParam<int> {
+//  protected:
+//   FooTest() {
+//     // Can use GetParam() here.
+//   }
+//   ~FooTest() override {
+//     // Can use GetParam() here.
+//   }
+//   void SetUp() override {
+//     // Can use GetParam() here.
+//   }
+//   void TearDown override {
+//     // Can use GetParam() here.
+//   }
+// };
+// TEST_P(FooTest, DoesBar) {
+//   // Can use GetParam() method here.
+//   Foo foo;
+//   ASSERT_TRUE(foo.DoesBar(GetParam()));
+// }
+// INSTANTIATE_TEST_SUITE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));
 
+template <typename T>
+class WithParamInterface {
+ public:
+  typedef T ParamType;
+  virtual ~WithParamInterface() {}
 
-// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
-// this in your code.
-template <typename Pred,
-          typename T1,
-          typename T2,
-          typename T3>
-AssertionResult AssertPred3Helper(const char* pred_text,
-                                  const char* e1,
-                                  const char* e2,
-                                  const char* e3,
-                                  Pred pred,
-                                  const T1& v1,
-                                  const T2& v2,
-                                  const T3& v3) {
-  if (pred(v1, v2, v3)) return AssertionSuccess();
+  // The current parameter value. Is also available in the test fixture's
+  // constructor.
+  static const ParamType& GetParam() {
+    GTEST_CHECK_(parameter_ != nullptr)
+        << "GetParam() can only be called inside a value-parameterized test "
+        << "-- did you intend to write TEST_P instead of TEST_F?";
+    return *parameter_;
+  }
 
-  return AssertionFailure() << pred_text << "("
-                            << e1 << ", "
-                            << e2 << ", "
-                            << e3 << ") evaluates to false, where"
-                            << "\n" << e1 << " evaluates to " << v1
-                            << "\n" << e2 << " evaluates to " << v2
-                            << "\n" << e3 << " evaluates to " << v3;
-}
+ private:
+  // Sets parameter value. The caller is responsible for making sure the value
+  // remains alive and unchanged throughout the current test.
+  static void SetParam(const ParamType* parameter) {
+    parameter_ = parameter;
+  }
 
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
-  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
-                on_failure)
+  // Static value used for accessing parameter during a test lifetime.
+  static const ParamType* parameter_;
 
-// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
-// this in your code.
-#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
-  GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
-                                             #v1, \
-                                             #v2, \
-                                             #v3, \
-                                             pred, \
-                                             v1, \
-                                             v2, \
-                                             v3), on_failure)
+  // TestClass must be a subclass of WithParamInterface<T> and Test.
+  template <class TestClass> friend class internal::ParameterizedTestFactory;
+};
 
-// Ternary predicate assertion macros.
-#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
-  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED3(pred, v1, v2, v3) \
-  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
-  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED3(pred, v1, v2, v3) \
-  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
+template <typename T>
+const T* WithParamInterface<T>::parameter_ = nullptr;
 
+// Most value-parameterized classes can ignore the existence of
+// WithParamInterface, and can just inherit from ::testing::TestWithParam.
 
+template <typename T>
+class TestWithParam : public Test, public WithParamInterface<T> {
+};
 
-// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
-// this in your code.
-template <typename Pred,
-          typename T1,
-          typename T2,
-          typename T3,
-          typename T4>
-AssertionResult AssertPred4Helper(const char* pred_text,
-                                  const char* e1,
-                                  const char* e2,
-                                  const char* e3,
-                                  const char* e4,
-                                  Pred pred,
-                                  const T1& v1,
-                                  const T2& v2,
-                                  const T3& v3,
-                                  const T4& v4) {
-  if (pred(v1, v2, v3, v4)) return AssertionSuccess();
+// Macros for indicating success/failure in test code.
 
-  return AssertionFailure() << pred_text << "("
-                            << e1 << ", "
-                            << e2 << ", "
-                            << e3 << ", "
-                            << e4 << ") evaluates to false, where"
-                            << "\n" << e1 << " evaluates to " << v1
-                            << "\n" << e2 << " evaluates to " << v2
-                            << "\n" << e3 << " evaluates to " << v3
-                            << "\n" << e4 << " evaluates to " << v4;
-}
+// Skips test in runtime.
+// Skipping test aborts current function.
+// Skipped tests are neither successful nor failed.
+#define GTEST_SKIP() GTEST_SKIP_("")
 
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
-  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
-                on_failure)
+// ADD_FAILURE unconditionally adds a failure to the current test.
+// SUCCEED generates a success - it doesn't automatically make the
+// current test successful, as a test is only successful when it has
+// no failure.
+//
+// EXPECT_* verifies that a certain condition is satisfied.  If not,
+// it behaves like ADD_FAILURE.  In particular:
+//
+//   EXPECT_TRUE  verifies that a Boolean condition is true.
+//   EXPECT_FALSE verifies that a Boolean condition is false.
+//
+// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
+// that they will also abort the current function on failure.  People
+// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
+// writing data-driven tests often find themselves using ADD_FAILURE
+// and EXPECT_* more.
 
-// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
-// this in your code.
-#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
-  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
-                                             #v1, \
-                                             #v2, \
-                                             #v3, \
-                                             #v4, \
-                                             pred, \
-                                             v1, \
-                                             v2, \
-                                             v3, \
-                                             v4), on_failure)
+// Generates a nonfatal failure with a generic message.
+#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")
 
-// 4-ary predicate assertion macros.
-#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
-  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
-  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
-  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
-  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+// Generates a nonfatal failure at the given source file location with
+// a generic message.
+#define ADD_FAILURE_AT(file, line) \
+  GTEST_MESSAGE_AT_(file, line, "Failed", \
+                    ::testing::TestPartResult::kNonFatalFailure)
 
+// Generates a fatal failure with a generic message.
+#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")
 
+// Like GTEST_FAIL(), but at the given source file location.
+#define GTEST_FAIL_AT(file, line)         \
+  GTEST_MESSAGE_AT_(file, line, "Failed", \
+                    ::testing::TestPartResult::kFatalFailure)
 
-// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
-// this in your code.
-template <typename Pred,
-          typename T1,
-          typename T2,
-          typename T3,
-          typename T4,
-          typename T5>
-AssertionResult AssertPred5Helper(const char* pred_text,
-                                  const char* e1,
-                                  const char* e2,
-                                  const char* e3,
-                                  const char* e4,
-                                  const char* e5,
-                                  Pred pred,
-                                  const T1& v1,
-                                  const T2& v2,
-                                  const T3& v3,
-                                  const T4& v4,
-                                  const T5& v5) {
-  if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
+// Define this macro to 1 to omit the definition of FAIL(), which is a
+// generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_FAIL
+# define FAIL() GTEST_FAIL()
+#endif
 
-  return AssertionFailure() << pred_text << "("
-                            << e1 << ", "
-                            << e2 << ", "
-                            << e3 << ", "
-                            << e4 << ", "
-                            << e5 << ") evaluates to false, where"
-                            << "\n" << e1 << " evaluates to " << v1
-                            << "\n" << e2 << " evaluates to " << v2
-                            << "\n" << e3 << " evaluates to " << v3
-                            << "\n" << e4 << " evaluates to " << v4
-                            << "\n" << e5 << " evaluates to " << v5;
-}
+// Generates a success with a generic message.
+#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")
 
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
-  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
-                on_failure)
+// Define this macro to 1 to omit the definition of SUCCEED(), which
+// is a generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_SUCCEED
+# define SUCCEED() GTEST_SUCCEED()
+#endif
 
-// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
-// this in your code.
-#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
-  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
-                                             #v1, \
-                                             #v2, \
-                                             #v3, \
-                                             #v4, \
-                                             #v5, \
-                                             pred, \
-                                             v1, \
-                                             v2, \
-                                             v3, \
-                                             v4, \
-                                             v5), on_failure)
+// Macros for testing exceptions.
+//
+//    * {ASSERT|EXPECT}_THROW(statement, expected_exception):
+//         Tests that the statement throws the expected exception.
+//    * {ASSERT|EXPECT}_NO_THROW(statement):
+//         Tests that the statement doesn't throw any exception.
+//    * {ASSERT|EXPECT}_ANY_THROW(statement):
+//         Tests that the statement throws an exception.
 
-// 5-ary predicate assertion macros.
-#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
-  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
-  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
-  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
-  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+#define EXPECT_THROW(statement, expected_exception) \
+  GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_NO_THROW(statement) \
+  GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_ANY_THROW(statement) \
+  GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_THROW(statement, expected_exception) \
+  GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
+#define ASSERT_NO_THROW(statement) \
+  GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
+#define ASSERT_ANY_THROW(statement) \
+  GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)
+
+// Boolean assertions. Condition can be either a Boolean expression or an
+// AssertionResult. For more information on how to use AssertionResult with
+// these macros see comments on that class.
+#define GTEST_EXPECT_TRUE(condition) \
+  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
+                      GTEST_NONFATAL_FAILURE_)
+#define GTEST_EXPECT_FALSE(condition) \
+  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+                      GTEST_NONFATAL_FAILURE_)
+#define GTEST_ASSERT_TRUE(condition) \
+  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
+                      GTEST_FATAL_FAILURE_)
+#define GTEST_ASSERT_FALSE(condition) \
+  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+                      GTEST_FATAL_FAILURE_)
+
+// Define these macros to 1 to omit the definition of the corresponding
+// EXPECT or ASSERT, which clashes with some users' own code.
+
+#if !GTEST_DONT_DEFINE_EXPECT_TRUE
+#define EXPECT_TRUE(condition) GTEST_EXPECT_TRUE(condition)
+#endif
 
+#if !GTEST_DONT_DEFINE_EXPECT_FALSE
+#define EXPECT_FALSE(condition) GTEST_EXPECT_FALSE(condition)
+#endif
 
+#if !GTEST_DONT_DEFINE_ASSERT_TRUE
+#define ASSERT_TRUE(condition) GTEST_ASSERT_TRUE(condition)
+#endif
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#if !GTEST_DONT_DEFINE_ASSERT_FALSE
+#define ASSERT_FALSE(condition) GTEST_ASSERT_FALSE(condition)
+#endif
 
 // Macros for testing equalities and inequalities.
 //
-//    * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual
-//    * {ASSERT|EXPECT}_NE(v1, v2):           Tests that v1 != v2
-//    * {ASSERT|EXPECT}_LT(v1, v2):           Tests that v1 < v2
-//    * {ASSERT|EXPECT}_LE(v1, v2):           Tests that v1 <= v2
-//    * {ASSERT|EXPECT}_GT(v1, v2):           Tests that v1 > v2
-//    * {ASSERT|EXPECT}_GE(v1, v2):           Tests that v1 >= v2
+//    * {ASSERT|EXPECT}_EQ(v1, v2): Tests that v1 == v2
+//    * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2
+//    * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2
+//    * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2
+//    * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2
+//    * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2
 //
 // When they are not, Google Test prints both the tested expressions and
 // their actual values.  The values must be compatible built-in types,
@@ -19731,8 +11905,8 @@ AssertionResult AssertPred5Helper(const char* pred_text,
 //   are related, not how their content is related.  To compare two C
 //   strings by content, use {ASSERT|EXPECT}_STR*().
 //
-//   3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to
-//   {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you
+//   3. {ASSERT|EXPECT}_EQ(v1, v2) is preferred to
+//   {ASSERT|EXPECT}_TRUE(v1 == v2), as the former tells you
 //   what the actual value is when it fails, and similarly for the
 //   other comparisons.
 //
@@ -19743,17 +11917,15 @@ AssertionResult AssertPred5Helper(const char* pred_text,
 //
 // Examples:
 //
-//   EXPECT_NE(5, Foo());
-//   EXPECT_EQ(NULL, a_pointer);
+//   EXPECT_NE(Foo(), 5);
+//   EXPECT_EQ(a_pointer, NULL);
 //   ASSERT_LT(i, array_size);
 //   ASSERT_GT(records.size(), 0) << "There is no record left.";
 
-#define EXPECT_EQ(expected, actual) \
-  EXPECT_PRED_FORMAT2(::testing::internal:: \
-                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
-                      expected, actual)
-#define EXPECT_NE(expected, actual) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual)
+#define EXPECT_EQ(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::EqHelper::Compare, val1, val2)
+#define EXPECT_NE(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
 #define EXPECT_LE(val1, val2) \
   EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
 #define EXPECT_LT(val1, val2) \
@@ -19763,10 +11935,8 @@ AssertionResult AssertPred5Helper(const char* pred_text,
 #define EXPECT_GT(val1, val2) \
   EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
 
-#define GTEST_ASSERT_EQ(expected, actual) \
-  ASSERT_PRED_FORMAT2(::testing::internal:: \
-                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
-                      expected, actual)
+#define GTEST_ASSERT_EQ(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::EqHelper::Compare, val1, val2)
 #define GTEST_ASSERT_NE(val1, val2) \
   ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
 #define GTEST_ASSERT_LE(val1, val2) \
@@ -19821,29 +11991,29 @@ AssertionResult AssertPred5Helper(const char* pred_text,
 //
 // These macros evaluate their arguments exactly once.
 
-#define EXPECT_STREQ(expected, actual) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
+#define EXPECT_STREQ(s1, s2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
 #define EXPECT_STRNE(s1, s2) \
   EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
-#define EXPECT_STRCASEEQ(expected, actual) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
+#define EXPECT_STRCASEEQ(s1, s2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
 #define EXPECT_STRCASENE(s1, s2)\
   EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
 
-#define ASSERT_STREQ(expected, actual) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
+#define ASSERT_STREQ(s1, s2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
 #define ASSERT_STRNE(s1, s2) \
   ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
-#define ASSERT_STRCASEEQ(expected, actual) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
+#define ASSERT_STRCASEEQ(s1, s2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
 #define ASSERT_STRCASENE(s1, s2)\
   ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
 
 // Macros for comparing floating-point numbers.
 //
-//    * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual):
+//    * {ASSERT|EXPECT}_FLOAT_EQ(val1, val2):
 //         Tests that two float values are almost equal.
-//    * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual):
+//    * {ASSERT|EXPECT}_DOUBLE_EQ(val1, val2):
 //         Tests that two double values are almost equal.
 //    * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
 //         Tests that v1 and v2 are within the given distance to each other.
@@ -19853,21 +12023,21 @@ AssertionResult AssertPred5Helper(const char* pred_text,
 // FloatingPoint template class in gtest-internal.h if you are
 // interested in the implementation details.
 
-#define EXPECT_FLOAT_EQ(expected, actual)\
+#define EXPECT_FLOAT_EQ(val1, val2)\
   EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
-                      expected, actual)
+                      val1, val2)
 
-#define EXPECT_DOUBLE_EQ(expected, actual)\
+#define EXPECT_DOUBLE_EQ(val1, val2)\
   EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
-                      expected, actual)
+                      val1, val2)
 
-#define ASSERT_FLOAT_EQ(expected, actual)\
+#define ASSERT_FLOAT_EQ(val1, val2)\
   ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
-                      expected, actual)
+                      val1, val2)
 
-#define ASSERT_DOUBLE_EQ(expected, actual)\
+#define ASSERT_DOUBLE_EQ(val1, val2)\
   ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
-                      expected, actual)
+                      val1, val2)
 
 #define EXPECT_NEAR(val1, val2, abs_error)\
   EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
@@ -19930,6 +12100,51 @@ GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
 #define EXPECT_NO_FATAL_FAILURE(statement) \
     GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)
 
+// Causes a trace (including the given source file path and line number,
+// and the given message) to be included in every test failure message generated
+// by code in the scope of the lifetime of an instance of this class. The effect
+// is undone with the destruction of the instance.
+//
+// The message argument can be anything streamable to std::ostream.
+//
+// Example:
+//   testing::ScopedTrace trace("file.cc", 123, "message");
+//
+class GTEST_API_ ScopedTrace {
+ public:
+  // The c'tor pushes the given source file location and message onto
+  // a trace stack maintained by Google Test.
+
+  // Template version. Uses Message() to convert the values into strings.
+  // Slow, but flexible.
+  template <typename T>
+  ScopedTrace(const char* file, int line, const T& message) {
+    PushTrace(file, line, (Message() << message).GetString());
+  }
+
+  // Optimize for some known types.
+  ScopedTrace(const char* file, int line, const char* message) {
+    PushTrace(file, line, message ? message : "(null)");
+  }
+
+  ScopedTrace(const char* file, int line, const std::string& message) {
+    PushTrace(file, line, message);
+  }
+
+  // The d'tor pops the info pushed by the c'tor.
+  //
+  // Note that the d'tor is not virtual in order to be efficient.
+  // Don't inherit from ScopedTrace!
+  ~ScopedTrace();
+
+ private:
+  void PushTrace(const char* file, int line, std::string message);
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
+} GTEST_ATTRIBUTE_UNUSED_;  // A ScopedTrace object does its job in its
+                            // c'tor and d'tor.  Therefore it doesn't
+                            // need to be used otherwise.
+
 // Causes a trace (including the source file path, the current line
 // number, and the given message) to be included in every test failure
 // message generated by code in the current scope.  The effect is
@@ -19941,13 +12156,17 @@ GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
 // of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
 // to appear in the same block - as long as they are on different
 // lines.
+//
+// Assuming that each thread maintains its own stack of traces.
+// Therefore, a SCOPED_TRACE() would (correctly) only affect the
+// assertions in its own thread.
 #define SCOPED_TRACE(message) \
-  ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
-    __FILE__, __LINE__, ::testing::Message() << (message))
+  ::testing::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
+    __FILE__, __LINE__, (message))
 
 // Compile-time assertion for type equality.
-// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are
-// the same type.  The value it returns is not interesting.
+// StaticAssertTypeEq<type1, type2>() compiles if and only if type1 and type2
+// are the same type.  The value it returns is not interesting.
 //
 // Instead of making StaticAssertTypeEq a class template, we make it a
 // function template that invokes a helper class template.  This
@@ -19976,21 +12195,21 @@ GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
 //
 // to cause a compiler error.
 template <typename T1, typename T2>
-bool StaticAssertTypeEq() {
-  (void)internal::StaticAssertTypeEqHelper<T1, T2>();
+constexpr bool StaticAssertTypeEq() noexcept {
+  static_assert(std::is_same<T1, T2>::value, "T1 and T2 are not the same type");
   return true;
 }
 
 // Defines a test.
 //
-// The first parameter is the name of the test case, and the second
-// parameter is the name of the test within the test case.
+// The first parameter is the name of the test suite, and the second
+// parameter is the name of the test within the test suite.
 //
-// The convention is to end the test case name with "Test".  For
-// example, a test case for the Foo class can be named FooTest.
+// The convention is to end the test suite name with "Test".  For
+// example, a test suite for the Foo class can be named FooTest.
 //
-// The user should put his test code between braces after using this
-// macro.  Example:
+// Test code should appear between braces after an invocation of
+// this macro.  Example:
 //
 //   TEST(FooTest, InitializesCorrectly) {
 //     Foo foo;
@@ -20006,28 +12225,28 @@ bool StaticAssertTypeEq() {
 // code.  GetTestTypeId() is guaranteed to always return the same
 // value, as it always calls GetTypeId<>() from the Google Test
 // framework.
-#define GTEST_TEST(test_case_name, test_name)\
-  GTEST_TEST_(test_case_name, test_name, \
-              ::testing::Test, ::testing::internal::GetTestTypeId())
+#define GTEST_TEST(test_suite_name, test_name)             \
+  GTEST_TEST_(test_suite_name, test_name, ::testing::Test, \
+              ::testing::internal::GetTestTypeId())
 
 // Define this macro to 1 to omit the definition of TEST(), which
 // is a generic name and clashes with some other libraries.
 #if !GTEST_DONT_DEFINE_TEST
-# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name)
+#define TEST(test_suite_name, test_name) GTEST_TEST(test_suite_name, test_name)
 #endif
 
 // Defines a test that uses a test fixture.
 //
 // The first parameter is the name of the test fixture class, which
-// also doubles as the test case name.  The second parameter is the
-// name of the test within the test case.
+// also doubles as the test suite name.  The second parameter is the
+// name of the test within the test suite.
 //
 // A test fixture class must be declared earlier.  The user should put
-// his test code between braces after using this macro.  Example:
+// the test code between braces after using this macro.  Example:
 //
 //   class FooTest : public testing::Test {
 //    protected:
-//     virtual void SetUp() { b_.AddElement(3); }
+//     void SetUp() override { b_.AddElement(3); }
 //
 //     Foo a_;
 //     Foo b_;
@@ -20038,13 +12257,104 @@ bool StaticAssertTypeEq() {
 //   }
 //
 //   TEST_F(FooTest, ReturnsElementCountCorrectly) {
-//     EXPECT_EQ(0, a_.size());
-//     EXPECT_EQ(1, b_.size());
+//     EXPECT_EQ(a_.size(), 0);
+//     EXPECT_EQ(b_.size(), 1);
 //   }
-
+//
+// GOOGLETEST_CM0011 DO NOT DELETE
+#if !GTEST_DONT_DEFINE_TEST
 #define TEST_F(test_fixture, test_name)\
   GTEST_TEST_(test_fixture, test_name, test_fixture, \
               ::testing::internal::GetTypeId<test_fixture>())
+#endif  // !GTEST_DONT_DEFINE_TEST
+
+// Returns a path to temporary directory.
+// Tries to determine an appropriate directory for the platform.
+GTEST_API_ std::string TempDir();
+
+#ifdef _MSC_VER
+#  pragma warning(pop)
+#endif
+
+// Dynamically registers a test with the framework.
+//
+// This is an advanced API only to be used when the `TEST` macros are
+// insufficient. The macros should be preferred when possible, as they avoid
+// most of the complexity of calling this function.
+//
+// The `factory` argument is a factory callable (move-constructible) object or
+// function pointer that creates a new instance of the Test object. It
+// handles ownership to the caller. The signature of the callable is
+// `Fixture*()`, where `Fixture` is the test fixture class for the test. All
+// tests registered with the same `test_suite_name` must return the same
+// fixture type. This is checked at runtime.
+//
+// The framework will infer the fixture class from the factory and will call
+// the `SetUpTestSuite` and `TearDownTestSuite` for it.
+//
+// Must be called before `RUN_ALL_TESTS()` is invoked, otherwise behavior is
+// undefined.
+//
+// Use case example:
+//
+// class MyFixture : public ::testing::Test {
+//  public:
+//   // All of these optional, just like in regular macro usage.
+//   static void SetUpTestSuite() { ... }
+//   static void TearDownTestSuite() { ... }
+//   void SetUp() override { ... }
+//   void TearDown() override { ... }
+// };
+//
+// class MyTest : public MyFixture {
+//  public:
+//   explicit MyTest(int data) : data_(data) {}
+//   void TestBody() override { ... }
+//
+//  private:
+//   int data_;
+// };
+//
+// void RegisterMyTests(const std::vector<int>& values) {
+//   for (int v : values) {
+//     ::testing::RegisterTest(
+//         "MyFixture", ("Test" + std::to_string(v)).c_str(), nullptr,
+//         std::to_string(v).c_str(),
+//         __FILE__, __LINE__,
+//         // Important to use the fixture type as the return type here.
+//         [=]() -> MyFixture* { return new MyTest(v); });
+//   }
+// }
+// ...
+// int main(int argc, char** argv) {
+//   std::vector<int> values_to_test = LoadValuesFromConfig();
+//   RegisterMyTests(values_to_test);
+//   ...
+//   return RUN_ALL_TESTS();
+// }
+//
+template <int&... ExplicitParameterBarrier, typename Factory>
+TestInfo* RegisterTest(const char* test_suite_name, const char* test_name,
+                       const char* type_param, const char* value_param,
+                       const char* file, int line, Factory factory) {
+  using TestT = typename std::remove_pointer<decltype(factory())>::type;
+
+  class FactoryImpl : public internal::TestFactoryBase {
+   public:
+    explicit FactoryImpl(Factory f) : factory_(std::move(f)) {}
+    Test* CreateTest() override { return factory_(); }
+
+   private:
+    Factory factory_;
+  };
+
+  return internal::MakeAndRegisterTestInfo(
+      test_suite_name, test_name, type_param, value_param,
+      internal::CodeLocation(file, line), internal::GetTypeId<TestT>(),
+      internal::SuiteApiResolver<TestT>::GetSetUpCaseOrSuite(file, line),
+      internal::SuiteApiResolver<TestT>::GetTearDownCaseOrSuite(file, line),
+      new FactoryImpl{std::move(factory)});
+}
 
 }  // namespace testing
 
@@ -20062,4 +12372,6 @@ inline int RUN_ALL_TESTS() {
   return ::testing::UnitTest::GetInstance()->Run();
 }
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_H_
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_H_