diff --git a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml
index 0e5f523ccf77014b18a034659b450f7036901747..a9dc0ec86cd77c49cb6958d01608ca47e5e0dab9 100644
--- a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml
+++ b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml
@@ -19,16 +19,19 @@ jobs:
             cxx: 'icpc'
             cmake_build_type: 'Debug'
             openmp: 'ON'
-          - distro: 'fedora:intel-oneapi'
+          - distro: 'fedora:intel'
             cxx: 'icpx'
             cmake_build_type: 'Release'
             openmp: 'ON'
-          - distro: 'fedora:intel-oneapi'
+          - distro: 'fedora:intel'
             cxx: 'icpx'
             cmake_build_type: 'Debug'
             openmp: 'ON'
     runs-on: ubuntu-latest
-    container: ghcr.io/kokkos/ci-containers/${{ matrix.distro }}
+    container:
+      image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }}
+      # see https://github.com/actions/virtual-environments/issues/3812
+      options: --security-opt seccomp=unconfined
     steps:
       - name: Checkout code
         uses: actions/checkout@v2.2.0
diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo
index 6dd4101e5bdf1210d26ef2ff0a34f557416c532b..85e71521db3fbaa780bb77fb42f5c3e74ae26800 100644
--- a/packages/kokkos/.gitrepo
+++ b/packages/kokkos/.gitrepo
@@ -6,7 +6,7 @@
 [subrepo]
 	remote = git@github.com:kokkos/kokkos.git
 	branch = master
-	commit = 4b97a22ff7be7635116930bb97173058d6079202
-	parent = f2fc77ba9037b2a2032ab980fb445175441f6d1f
+	commit = c28a8b03288b185f846ddfb1b7c08213e12e2634
+	parent = 2c8a5742df289f02f5ca31bce1e293dbfdb1701e
 	method = merge
 	cmdver = 0.4.3
diff --git a/packages/kokkos/BUILD.md b/packages/kokkos/BUILD.md
index e1f0e3e472762fa7c78a68350da9e9bc74f41de1..bb1a31f266ec487c27daced9971d481e41d81c2d 100644
--- a/packages/kokkos/BUILD.md
+++ b/packages/kokkos/BUILD.md
@@ -262,6 +262,9 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
 * Kokkos_ARCH_ZEN2
     * Whether to optimize for the Zen2 architecture
     * BOOL Default: OFF
+* Kokkos_ARCH_ZEN3
+    * Whether to optimize for the Zen3 architecture
+    * BOOL Default: OFF
 * Kokkos_ARCH_HSW
     * Whether to optimize for the HSW architecture
     * BOOL Default: OFF
diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md
index 3ce38c37d866dacc25528f5597461e7629175e00..7bb6de4cd924051c621bec0ac2cca5f734960e9e 100644
--- a/packages/kokkos/CHANGELOG.md
+++ b/packages/kokkos/CHANGELOG.md
@@ -1,5 +1,20 @@
 # Change Log
 
+## [3.4.01](https://github.com/kokkos/kokkos/tree/3.4.01) (2021-05-19)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.00...3.4.01)
+
+**Bug Fixes:**
+- Windows: Remove atomic_compare_exchange_strong overload conflicts with Windows [\#4024](https://github.com/kokkos/kokkos/pull/4024)
+- OpenMPTarget: Fixup allocation headers with OpenMPTarget backend [\#4020](https://github.com/kokkos/kokkos/pull/4020)
+- OpenMPTarget: Add missing specailization for OMPT to Kokkos Random [\#4022](https://github.com/kokkos/kokkos/pull/4022)
+- AMD: Add support for AMD Zen3 CPU architecture [\#4021](https://github.com/kokkos/kokkos/pull/4021)
+- SYCL: Implement SYCL::print_configuration [\#4012](https://github.com/kokkos/kokkos/pull/4012)
+- Containers: staticcsrgraph: use device type instead of execution space to construct views [\#3998](https://github.com/kokkos/kokkos/pull/3998)
+- nvcc_wrapper: fix errors in argument handling, suppress duplicates of GPU architecture and RDC flags [\#4006](https://github.com/kokkos/kokkos/pull/4006)
+- CI: Add icpx testing to intel container [\#4004](https://github.com/kokkos/kokkos/pull/4004)
+- CMake/TRIBITS: Keep quoted compiler flags when passing to Trilinos [\#4007](https://github.com/kokkos/kokkos/pull/4007)
+- CMake: Rename IntelClang to IntelLLVM [\#3945](https://github.com/kokkos/kokkos/pull/3945)
+
 ## [3.4.00](https://github.com/kokkos/kokkos/tree/3.4.00) (2021-04-25)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/3.3.01...3.4.00)
 
diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt
index 6fc1bf7d2f7fd3b02a785b1184923cde07b438b2..9452027d8ee99592293cba43b7a60a7d2c0c3bbc 100644
--- a/packages/kokkos/CMakeLists.txt
+++ b/packages/kokkos/CMakeLists.txt
@@ -112,7 +112,7 @@ ENDIF()
 
 set(Kokkos_VERSION_MAJOR 3)
 set(Kokkos_VERSION_MINOR 4)
-set(Kokkos_VERSION_PATCH 00)
+set(Kokkos_VERSION_PATCH 01)
 set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
 math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
 
@@ -206,8 +206,13 @@ ENDIF()
 IF (KOKKOS_HAS_TRILINOS)
   # Overwrite the old flags at the top-level
   # Because Tribits doesn't use lists, it uses spaces for the list of CXX flags
-  # we have to match the annoying behavior
-  STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}")
+  # we have to match the annoying behavior, also we have to preserve quotes
+  # which needs another workaround.
+  SET(KOKKOS_COMPILE_OPTIONS_TMP)
+  FOREACH(OPTION ${KOKKOS_COMPILE_OPTIONS})
+    LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP \"${OPTION}\")
+  ENDFOREACH()
+  STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS_TMP}")
   LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})
   IF (KOKKOS_ENABLE_CUDA)
     LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS})
diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos
index 2599121d70ada48567c61fdc63ba94925a402267..bda8572073a326320dab54084080bd57115ce791 100644
--- a/packages/kokkos/Makefile.kokkos
+++ b/packages/kokkos/Makefile.kokkos
@@ -2,7 +2,7 @@
 
 KOKKOS_VERSION_MAJOR = 3
 KOKKOS_VERSION_MINOR = 4
-KOKKOS_VERSION_PATCH = 00
+KOKKOS_VERSION_PATCH = 01
 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
 
 # Options: Cuda,HIP,OpenMP,Pthread,Serial
@@ -14,7 +14,7 @@ KOKKOS_DEVICES ?= "Pthread"
 # ARM:      ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
 # IBM:      BGQ,Power7,Power8,Power9
 # AMD-GPUS: Vega900,Vega906,Vega908
-# AMD-CPUS: AMDAVX,Zen,Zen2
+# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
 KOKKOS_ARCH ?= ""
 # Options: yes,no
 KOKKOS_DEBUG ?= "no"
@@ -372,6 +372,7 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
 
 # AMD based.
 KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
+KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3)
 KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
 KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
 KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
@@ -381,12 +382,12 @@ KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega
 # Any AVX?
 KOKKOS_INTERNAL_USE_ARCH_SSE42      := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
 KOKKOS_INTERNAL_USE_ARCH_AVX        := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
-KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
+KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
 KOKKOS_INTERNAL_USE_ARCH_AVX512MIC  := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
 KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
 
 # Decide what ISA level we are able to support.
-KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
+KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
 KOKKOS_INTERNAL_USE_ISA_KNC       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
 KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
 KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@@ -780,6 +781,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1)
   endif
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1)
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
+
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+    KOKKOS_CXXFLAGS += -mavx2
+    KOKKOS_LDFLAGS += -mavx2
+  else
+    KOKKOS_CXXFLAGS += -march=znver3 -mtune=znver3
+    KOKKOS_LDFLAGS += -march=znver3 -mtune=znver3
+  endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
   tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
   tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")
diff --git a/packages/kokkos/algorithms/CMakeLists.txt b/packages/kokkos/algorithms/CMakeLists.txt
index fd099054ba457e2b1a0557fd08be835f50eef939..4df76a1dbbd17eb269694b8bd801184ccc02e047 100644
--- a/packages/kokkos/algorithms/CMakeLists.txt
+++ b/packages/kokkos/algorithms/CMakeLists.txt
@@ -5,10 +5,12 @@ KOKKOS_SUBPACKAGE(Algorithms)
 IF (NOT Kokkos_INSTALL_TESTING)
   ADD_SUBDIRECTORY(src)
 ENDIF()
-
-KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
+IF(NOT (KOKKOS_ENABLE_OPENMPTARGET
+        AND (KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR
+             KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)))
+  KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
+ENDIF()
 
 KOKKOS_SUBPACKAGE_POSTPROCESS()
 
 
-
diff --git a/packages/kokkos/algorithms/src/Kokkos_Random.hpp b/packages/kokkos/algorithms/src/Kokkos_Random.hpp
index 904cf5ccb967037d94ac9b4a06144a4f7333dd3d..55ce19971faf576483da1ec66cedea0735bc8c7a 100644
--- a/packages/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/packages/kokkos/algorithms/src/Kokkos_Random.hpp
@@ -687,6 +687,24 @@ struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
 };
 #endif
 
+#ifdef KOKKOS_ENABLE_OPENMPTARGET
+template <>
+struct Random_UniqueIndex<Kokkos::Experimental::OpenMPTarget> {
+  using locks_view_type = View<int*, Kokkos::Experimental::OpenMPTarget>;
+  KOKKOS_FUNCTION
+  static int get_state_idx(const locks_view_type& locks) {
+    const int team_size = omp_get_num_threads();
+    int i               = omp_get_team_num() * team_size + omp_get_thread_num();
+    const int lock_size = locks.extent_int(0);
+
+    while (Kokkos::atomic_compare_exchange(&locks(i), 0, 1)) {
+      i = (i + 1) % lock_size;
+    }
+    return i;
+  }
+};
+#endif
+
 }  // namespace Impl
 
 template <class DeviceType>
diff --git a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt
index 9109837985a91ad14245133682af15aca59be503..50f8f0a332a6e528bcb12c26a4c60d5599f02c1d 100644
--- a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt
+++ b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt
@@ -44,7 +44,7 @@ IF(Kokkos_ENABLE_OPENMP)
   )
 ENDIF()
 
-foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL)
+foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
   # Because there is always an exception to the rule
   if(Tag STREQUAL "Threads")
     set(DEVICE "PTHREAD")
diff --git a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp
index 1f14875096dd2fbd0bebf4feea796d4c6ccd79f0..c37e779c9927b21b2add67236124f3821341d968 100644
--- a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp
+++ b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp
@@ -109,6 +109,16 @@ struct RandomProperties {
   }
 };
 
+// FIXME_OPENMPTARGET: Need this for OpenMPTarget because contra to the standard
+// llvm requires the binary operator defined not just the +=
+KOKKOS_INLINE_FUNCTION
+RandomProperties operator+(const RandomProperties& org,
+                           const RandomProperties& add) {
+  RandomProperties val = org;
+  val += add;
+  return val;
+}
+
 template <class GeneratorPool, class Scalar>
 struct test_random_functor {
   using rnd_type = typename GeneratorPool::generator_type;
diff --git a/packages/kokkos/algorithms/unit_tests/TestSort.hpp b/packages/kokkos/algorithms/unit_tests/TestSort.hpp
index a3c362ec201bae07df05867d07136e26e73204d0..9c6308c84347e2229ad083805db3d05918baa4f8 100644
--- a/packages/kokkos/algorithms/unit_tests/TestSort.hpp
+++ b/packages/kokkos/algorithms/unit_tests/TestSort.hpp
@@ -370,7 +370,10 @@ template <class ExecutionSpace, typename KeyType>
 void test_sort(unsigned int N) {
   test_1D_sort<ExecutionSpace, KeyType>(N);
   test_3D_sort<ExecutionSpace, KeyType>(N);
+// FIXME_OPENMPTARGET: OpenMPTarget doesn't support DynamicView yet.
+#ifndef KOKKOS_ENABLE_OPENMPTARGET
   test_dynamic_view_sort<ExecutionSpace, KeyType>(N);
+#endif
   test_issue_1160_sort<ExecutionSpace>();
 }
 }  // namespace Impl
diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper
index 5556e888e34b2f7c2dd18bdb6f47071abde0574b..4e52e4d09f4f86668ccd322b8ab2fe1093f31996 100755
--- a/packages/kokkos/bin/nvcc_wrapper
+++ b/packages/kokkos/bin/nvcc_wrapper
@@ -67,6 +67,11 @@ shared_versioned_libraries=""
 
 # Does the User set the architecture
 arch_set=0
+arch_flag=""
+
+# Does the user set RDC?
+rdc_set=0
+rdc_flag=""
 
 # Does the user overwrite the host compiler
 ccbin_set=0
@@ -190,8 +195,34 @@ do
     host_only_args="$host_only_args $1 $2"
     shift
     ;;
+  # Handle nvcc args controlling whether to generated relocatable device code
+  --relocatable-device-code=*|-rdc=*)
+    if [ "$rdc_set" -eq 0 ]; then
+        rdc_set=1
+        rdc_flag="$1"
+        cuda_args="$cuda_args $rdc_flag"
+    elif [  "$rdc_flag" != "$1" ]; then
+        echo "RDC is being set twice with different flags, which is not handled"
+        echo "$rdc_flag"
+        echo "$1"
+        exit 1
+    fi
+    ;;
+  -rdc)
+    if [ "$rdc_set" -eq 0 ]; then
+        rdc_set=1
+        rdc_flag="$1 $2"
+        cuda_args="$cuda_args $rdc_flag"
+        shift
+    elif [ "$rdc_flag" != "$1 $2" ]; then
+        echo "RDC is being set twice with different flags, which is not handled"
+        echo "$rdc_flag"
+        echo "$1 $2"
+        exit 1
+    fi
+    ;;
   #Handle known nvcc args
-  --dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
+  --dryrun|--verbose|--keep|--keep-dir*|-G|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
     cuda_args="$cuda_args $1"
     ;;
   #Handle more known nvcc args
@@ -199,13 +230,13 @@ do
     cuda_args="$cuda_args $1"
     ;;
   #Handle known nvcc args that have an argument
-  -rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include)
+  -maxrregcount=*|--maxrregcount=*)
+    cuda_args="$cuda_args $1"
+    ;;
+  -maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include)
     cuda_args="$cuda_args $1 $2"
     shift
     ;;
-  -rdc=*|-maxrregcount*|--maxrregcount*)
-    cuda_args="$cuda_args $1"
-    ;;
   #Handle unsupported standard flags
   --std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a)
     fallback_std_flag="-std=c++14"
@@ -323,20 +354,36 @@ do
     ;;
 
   #Handle -arch argument (if its not set use a default) this is the version with = sign
-  -arch*|-gencode*)
-    cuda_args="$cuda_args $1"
-    arch_set=1
+  -arch=*|-gencode=*)
+    if [ "$arch_set" -eq 0 ]; then
+        arch_set=1
+        arch_flag="$1"
+        cuda_args="$cuda_args $arch_flag"
+    elif [  "$arch_flag" != "$1" ]; then
+        echo "ARCH is being set twice with different flags, which is not handled"
+        echo "$arch_flag"
+        echo "$1"
+        exit 1
+    fi
+    ;;
+  #Handle -arch argument (if its not set use a default) this is the version without = sign
+  -arch|-gencode)
+    if [ "$arch_set" -eq 0 ]; then
+        arch_set=1
+        arch_flag="$1 $2"
+        cuda_args="$cuda_args $arch_flag"
+        shift
+    elif [ "$arch_flag" != "$1 $2" ]; then
+        echo "ARCH is being set twice with different flags, which is not handled"
+        echo "$arch_flag"
+        echo "$1 $2"
+        exit 1
+    fi
     ;;
   #Handle -code argument (if its not set use a default) this is the version with = sign
   -code*)
     cuda_args="$cuda_args $1"
     ;;
-  #Handle -arch argument (if its not set use a default) this is the version without = sign
-  -arch|-gencode)
-    cuda_args="$cuda_args $1 $2"
-    arch_set=1
-    shift
-    ;;
   #Handle -code argument (if its not set use a default) this is the version without = sign
   -code)
     cuda_args="$cuda_args $1 $2"
diff --git a/packages/kokkos/cmake/KokkosCore_config.h.in b/packages/kokkos/cmake/KokkosCore_config.h.in
index fbfae3711ec14573b4c3067aea4a8625d6b2ad8c..3455b0cb42e78c7e17286c70edd9f19274b8dcfb 100644
--- a/packages/kokkos/cmake/KokkosCore_config.h.in
+++ b/packages/kokkos/cmake/KokkosCore_config.h.in
@@ -99,5 +99,6 @@
 #cmakedefine KOKKOS_ARCH_AMPERE86
 #cmakedefine KOKKOS_ARCH_AMD_ZEN
 #cmakedefine KOKKOS_ARCH_AMD_ZEN2
+#cmakedefine KOKKOS_ARCH_AMD_ZEN3
 
 #cmakedefine KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF
diff --git a/packages/kokkos/cmake/kokkos_arch.cmake b/packages/kokkos/cmake/kokkos_arch.cmake
index ec18e70a36a34dbecc305f978e0d7b84c482da37..e8b85542c633eaea6f63c32ada79c7d7b2402794 100644
--- a/packages/kokkos/cmake/kokkos_arch.cmake
+++ b/packages/kokkos/cmake/kokkos_arch.cmake
@@ -63,6 +63,7 @@ KOKKOS_ARCH_OPTION(AMPERE80        GPU  "NVIDIA Ampere generation CC 8.0")
 KOKKOS_ARCH_OPTION(AMPERE86        GPU  "NVIDIA Ampere generation CC 8.6")
 KOKKOS_ARCH_OPTION(ZEN             HOST "AMD Zen architecture")
 KOKKOS_ARCH_OPTION(ZEN2            HOST "AMD Zen2 architecture")
+KOKKOS_ARCH_OPTION(ZEN3            HOST "AMD Zen3 architecture")
 KOKKOS_ARCH_OPTION(VEGA900         GPU  "AMD GPU MI25 GFX900")
 KOKKOS_ARCH_OPTION(VEGA906         GPU  "AMD GPU MI50/MI60 GFX906")
 KOKKOS_ARCH_OPTION(VEGA908         GPU  "AMD GPU MI100 GFX908")
@@ -215,6 +216,15 @@ IF (KOKKOS_ARCH_ZEN2)
   SET(KOKKOS_ARCH_AMD_AVX2 ON)
 ENDIF()
 
+IF (KOKKOS_ARCH_ZEN3)
+  COMPILER_SPECIFIC_FLAGS(
+    Intel   -mavx2
+    DEFAULT -march=znver3 -mtune=znver3
+  )
+  SET(KOKKOS_ARCH_AMD_ZEN3 ON)
+  SET(KOKKOS_ARCH_AMD_AVX2 ON)
+ENDIF()
+
 IF (KOKKOS_ARCH_WSM)
   COMPILER_SPECIFIC_FLAGS(
     Intel   -xSSE4.2
@@ -284,7 +294,7 @@ IF (KOKKOS_ARCH_SKX)
   )
 ENDIF()
 
-IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2)
+IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2 OR KOKKOS_ARCH_ZEN3)
   SET(KOKKOS_USE_ISA_X86_64 ON)
 ENDIF()
 
@@ -457,7 +467,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
   ENDIF()
   IF (KOKKOS_ARCH_INTEL_GEN)
     COMPILER_SPECIFIC_FLAGS(
-      IntelClang -fopenmp-targets=spir64 -D__STRICT_ANSI__
+      IntelLLVM -fopenmp-targets=spir64 -D__STRICT_ANSI__
     )
   ENDIF()
 ENDIF()
diff --git a/packages/kokkos/cmake/kokkos_compiler_id.cmake b/packages/kokkos/cmake/kokkos_compiler_id.cmake
index 4434d6928f46429ad7525c944a0c1c6c351c4cdd..23847263a952ce0e94fe48c58dbdfc50b228b314 100644
--- a/packages/kokkos/cmake/kokkos_compiler_id.cmake
+++ b/packages/kokkos/cmake/kokkos_compiler_id.cmake
@@ -101,7 +101,7 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
                   OUTPUT_STRIP_TRAILING_WHITESPACE)
   IF (INTERNAL_HAVE_INTEL_COMPILER) #not actually Clang
     SET(KOKKOS_CLANG_IS_INTEL TRUE)
-    SET(KOKKOS_CXX_COMPILER_ID IntelClang CACHE STRING INTERNAL FORCE)
+    SET(KOKKOS_CXX_COMPILER_ID IntelLLVM CACHE STRING INTERNAL FORCE)
   ENDIF()
 ENDIF()
 
diff --git a/packages/kokkos/cmake/kokkos_enable_devices.cmake b/packages/kokkos/cmake/kokkos_enable_devices.cmake
index 445dad47ce561979037bf5b1622413ddda05f3b3..d7f83ddbdf877b672cfc196f89d6b3f61d109087 100644
--- a/packages/kokkos/cmake/kokkos_enable_devices.cmake
+++ b/packages/kokkos/cmake/kokkos_enable_devices.cmake
@@ -61,7 +61,7 @@ IF(KOKKOS_ENABLE_OPENMP)
     COMPILER_SPECIFIC_FLAGS(
       COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
       Clang      -Xcompiler ${ClangOpenMPFlag}
-      IntelClang -Xcompiler -fiopenmp
+      IntelLLVM  -Xcompiler -fiopenmp
       PGI        -Xcompiler -mp
       Cray       NO-VALUE-SPECIFIED
       XL         -Xcompiler -qsmp=omp
@@ -70,7 +70,7 @@ IF(KOKKOS_ENABLE_OPENMP)
   ELSE()
     COMPILER_SPECIFIC_FLAGS(
       Clang      ${ClangOpenMPFlag}
-      IntelClang -fiopenmp
+      IntelLLVM  -fiopenmp
       AppleClang -Xpreprocessor -fopenmp
       PGI        -mp
       Cray       NO-VALUE-SPECIFIED
@@ -92,7 +92,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
 
   COMPILER_SPECIFIC_FLAGS(
     Clang      ${ClangOpenMPFlag} -Wno-openmp-mapping
-    IntelClang -fiopenmp -Wno-openmp-mapping
+    IntelLLVM  -fiopenmp -Wno-openmp-mapping
     XL         -qsmp=omp -qoffload -qnoeh
     PGI        -mp=gpu
     DEFAULT    -fopenmp
diff --git a/packages/kokkos/cmake/kokkos_functions.cmake b/packages/kokkos/cmake/kokkos_functions.cmake
index 858322394d7aefcb9fe23f55a60863f3a8f63484..e1a3e5f8bd00802f465390f332138bbadd4f1a33 100644
--- a/packages/kokkos/cmake/kokkos_functions.cmake
+++ b/packages/kokkos/cmake/kokkos_functions.cmake
@@ -773,7 +773,7 @@ FUNCTION(kokkos_link_tpl TARGET)
 ENDFUNCTION()
 
 FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
-  SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelClang GNU HIPCC Fujitsu)
+  SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelLLVM GNU HIPCC Fujitsu)
   CMAKE_PARSE_ARGUMENTS(
     PARSE
     "LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES"
diff --git a/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp b/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
index f22e5d1eca928bc968d3cf32900f9fa0335751d7..00d3eafd231eabd8af444c8508acb42668ff883d 100644
--- a/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
+++ b/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
@@ -114,15 +114,11 @@ namespace Kokkos {
 template <class StaticCrsGraphType, class InputSizeType>
 inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
     const std::string& label, const std::vector<InputSizeType>& input) {
-  using output_type = StaticCrsGraphType;
-  // using input_type = std::vector<InputSizeType>; // unused
-
+  using output_type  = StaticCrsGraphType;
   using entries_type = typename output_type::entries_type;
-
-  using work_type = View<typename output_type::size_type[],
-                         typename output_type::array_layout,
-                         typename output_type::execution_space,
-                         typename output_type::memory_traits>;
+  using work_type    = View<
+      typename output_type::size_type[], typename output_type::array_layout,
+      typename output_type::device_type, typename output_type::memory_traits>;
 
   output_type output;
 
@@ -161,10 +157,9 @@ inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
 
   static_assert(entries_type::rank == 1, "Graph entries view must be rank one");
 
-  using work_type = View<typename output_type::size_type[],
-                         typename output_type::array_layout,
-                         typename output_type::execution_space,
-                         typename output_type::memory_traits>;
+  using work_type = View<
+      typename output_type::size_type[], typename output_type::array_layout,
+      typename output_type::device_type, typename output_type::memory_traits>;
 
   output_type output;
 
diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp
index dc5e0194ab0a8bb85a29727c664a33b6c23e2c6c..58d723ac110a2bfd2266d4055f9c222c4a2c2c78 100644
--- a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp
+++ b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp
@@ -179,8 +179,6 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>
       const RecordBase::function_type arg_dealloc = &deallocate);
 
  public:
-  std::string get_label() const;
-
   KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
       const Kokkos::Experimental::OpenMPTargetSpace& arg_space,
       const std::string& arg_label, const size_t arg_alloc_size) {
@@ -190,10 +188,6 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>
     return nullptr;
 #endif
   }
-
-  /**\brief  Reallocate tracked memory in the space */
-  static void* reallocate_tracked(void* const arg_alloc_ptr,
-                                  const size_t arg_alloc_size);
 };
 
 }  // namespace Impl
diff --git a/packages/kokkos/core/src/Kokkos_SYCL.hpp b/packages/kokkos/core/src/Kokkos_SYCL.hpp
index aa720371df73cb1ad7bba8191e5c6d83c6c317c5..8ee76b43862fd6c54c42d98e081174f11d5e09e4 100644
--- a/packages/kokkos/core/src/Kokkos_SYCL.hpp
+++ b/packages/kokkos/core/src/Kokkos_SYCL.hpp
@@ -113,7 +113,7 @@ class SYCL {
   void fence() const;
 
   /// \brief Print configuration information to the given output stream.
-  static void print_configuration(std::ostream&, const bool detail = false);
+  void print_configuration(std::ostream&, const bool detail = false);
 
   /// \brief Free any resources being consumed by the device.
   static void impl_finalize();
@@ -131,12 +131,10 @@ class SYCL {
     sycl::device get_device() const;
 
     friend std::ostream& operator<<(std::ostream& os, const SYCLDevice& that) {
-      return that.info(os);
+      return SYCL::impl_sycl_info(os, that.m_device);
     }
 
    private:
-    std::ostream& info(std::ostream& os) const;
-
     sycl::device m_device;
   };
 
@@ -154,6 +152,9 @@ class SYCL {
   }
 
  private:
+  static std::ostream& impl_sycl_info(std::ostream& os,
+                                      const sycl::device& device);
+
   Kokkos::Impl::HostSharedPtr<Impl::SYCLInternal> m_space_instance;
 };
 
diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
index 6fbb4245b8fb8b1e354452727ce9862c85a147c8..b99b0017ca17df6462f0ea2b03b65d47121e47d3 100644
--- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
+++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
@@ -107,12 +107,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace,
                      SharedAllocationRecord<void, void>::m_alloc_size);
 }
 
-// TODO: Implement deep copy back see CudaSpace
-std::string SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace,
-                                   void>::get_label() const {
-  return std::string("OpenMPTargetAllocation");
-}
-
 SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
     SharedAllocationRecord(
         const Kokkos::Experimental::OpenMPTargetSpace &arg_space,
@@ -141,23 +135,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
 
 //----------------------------------------------------------------------------
 
-void *SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
-    reallocate_tracked(void *const arg_alloc_ptr, const size_t arg_alloc_size) {
-  SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr);
-  SharedAllocationRecord *const r_new =
-      allocate(r_old->m_space, r_old->get_label(), arg_alloc_size);
-
-  // Kokkos::Impl::DeepCopy<OpenMPTargetSpace,OpenMPTargetSpace>( r_new->data()
-  // , r_old->data()
-  //                                           , std::min( r_old->size() ,
-  //                                           r_new->size() ) );
-
-  RecordBase::increment(r_new);
-  RecordBase::decrement(r_old);
-
-  return r_new->data();
-}
-
 }  // namespace Impl
 }  // namespace Kokkos
 
diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp
index 9c29eb190d17b64c0340751a3459785c070d7c47..3a09ee919540b93c74bcd5f2e7eea57b352575a7 100644
--- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp
+++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp
@@ -105,6 +105,12 @@ bool SYCL::impl_is_initialized() {
 
 void SYCL::impl_finalize() { Impl::SYCLInternal::singleton().finalize(); }
 
+void SYCL::print_configuration(std::ostream& s, const bool detailed) {
+  s << "macro  KOKKOS_ENABLE_SYCL : defined" << '\n';
+  if (detailed)
+    SYCL::impl_sycl_info(s, m_space_instance->m_queue->get_device());
+}
+
 void SYCL::fence() const {
   Impl::SYCLInternal::fence(*m_space_instance->m_queue);
 }
@@ -143,119 +149,118 @@ void SYCL::impl_initialize(SYCL::SYCLDevice d) {
   Impl::SYCLInternal::singleton().initialize(d.get_device());
 }
 
-std::ostream& SYCL::SYCLDevice::info(std::ostream& os) const {
+std::ostream& SYCL::impl_sycl_info(std::ostream& os,
+                                   const sycl::device& device) {
   using namespace sycl::info;
-  return os << "Name: " << m_device.get_info<device::name>()
-            << "\nDriver Version: "
-            << m_device.get_info<device::driver_version>()
-            << "\nIs Host: " << m_device.is_host()
-            << "\nIs CPU: " << m_device.is_cpu()
-            << "\nIs GPU: " << m_device.is_gpu()
-            << "\nIs Accelerator: " << m_device.is_accelerator()
-            << "\nVendor Id: " << m_device.get_info<device::vendor_id>()
+  return os << "Name: " << device.get_info<device::name>()
+            << "\nDriver Version: " << device.get_info<device::driver_version>()
+            << "\nIs Host: " << device.is_host()
+            << "\nIs CPU: " << device.is_cpu()
+            << "\nIs GPU: " << device.is_gpu()
+            << "\nIs Accelerator: " << device.is_accelerator()
+            << "\nVendor Id: " << device.get_info<device::vendor_id>()
             << "\nMax Compute Units: "
-            << m_device.get_info<device::max_compute_units>()
+            << device.get_info<device::max_compute_units>()
             << "\nMax Work Item Dimensions: "
-            << m_device.get_info<device::max_work_item_dimensions>()
+            << device.get_info<device::max_work_item_dimensions>()
             << "\nMax Work Group Size: "
-            << m_device.get_info<device::max_work_group_size>()
+            << device.get_info<device::max_work_group_size>()
             << "\nPreferred Vector Width Char: "
-            << m_device.get_info<device::preferred_vector_width_char>()
+            << device.get_info<device::preferred_vector_width_char>()
             << "\nPreferred Vector Width Short: "
-            << m_device.get_info<device::preferred_vector_width_short>()
+            << device.get_info<device::preferred_vector_width_short>()
             << "\nPreferred Vector Width Int: "
-            << m_device.get_info<device::preferred_vector_width_int>()
+            << device.get_info<device::preferred_vector_width_int>()
             << "\nPreferred Vector Width Long: "
-            << m_device.get_info<device::preferred_vector_width_long>()
+            << device.get_info<device::preferred_vector_width_long>()
             << "\nPreferred Vector Width Float: "
-            << m_device.get_info<device::preferred_vector_width_float>()
+            << device.get_info<device::preferred_vector_width_float>()
             << "\nPreferred Vector Width Double: "
-            << m_device.get_info<device::preferred_vector_width_double>()
+            << device.get_info<device::preferred_vector_width_double>()
             << "\nPreferred Vector Width Half: "
-            << m_device.get_info<device::preferred_vector_width_half>()
+            << device.get_info<device::preferred_vector_width_half>()
             << "\nNative Vector Width Char: "
-            << m_device.get_info<device::native_vector_width_char>()
+            << device.get_info<device::native_vector_width_char>()
             << "\nNative Vector Width Short: "
-            << m_device.get_info<device::native_vector_width_short>()
+            << device.get_info<device::native_vector_width_short>()
             << "\nNative Vector Width Int: "
-            << m_device.get_info<device::native_vector_width_int>()
+            << device.get_info<device::native_vector_width_int>()
             << "\nNative Vector Width Long: "
-            << m_device.get_info<device::native_vector_width_long>()
+            << device.get_info<device::native_vector_width_long>()
             << "\nNative Vector Width Float: "
-            << m_device.get_info<device::native_vector_width_float>()
+            << device.get_info<device::native_vector_width_float>()
             << "\nNative Vector Width Double: "
-            << m_device.get_info<device::native_vector_width_double>()
+            << device.get_info<device::native_vector_width_double>()
             << "\nNative Vector Width Half: "
-            << m_device.get_info<device::native_vector_width_half>()
-            << "\nAddress Bits: " << m_device.get_info<device::address_bits>()
-            << "\nImage Support: " << m_device.get_info<device::image_support>()
+            << device.get_info<device::native_vector_width_half>()
+            << "\nAddress Bits: " << device.get_info<device::address_bits>()
+            << "\nImage Support: " << device.get_info<device::image_support>()
             << "\nMax Mem Alloc Size: "
-            << m_device.get_info<device::max_mem_alloc_size>()
+            << device.get_info<device::max_mem_alloc_size>()
             << "\nMax Read Image Args: "
-            << m_device.get_info<device::max_read_image_args>()
+            << device.get_info<device::max_read_image_args>()
             << "\nImage2d Max Width: "
-            << m_device.get_info<device::image2d_max_width>()
+            << device.get_info<device::image2d_max_width>()
             << "\nImage2d Max Height: "
-            << m_device.get_info<device::image2d_max_height>()
+            << device.get_info<device::image2d_max_height>()
             << "\nImage3d Max Width: "
-            << m_device.get_info<device::image3d_max_width>()
+            << device.get_info<device::image3d_max_width>()
             << "\nImage3d Max Height: "
-            << m_device.get_info<device::image3d_max_height>()
+            << device.get_info<device::image3d_max_height>()
             << "\nImage3d Max Depth: "
-            << m_device.get_info<device::image3d_max_depth>()
+            << device.get_info<device::image3d_max_depth>()
             << "\nImage Max Buffer Size: "
-            << m_device.get_info<device::image_max_buffer_size>()
+            << device.get_info<device::image_max_buffer_size>()
             << "\nImage Max Array Size: "
-            << m_device.get_info<device::image_max_array_size>()
-            << "\nMax Samplers: " << m_device.get_info<device::max_samplers>()
+            << device.get_info<device::image_max_array_size>()
+            << "\nMax Samplers: " << device.get_info<device::max_samplers>()
             << "\nMax Parameter Size: "
-            << m_device.get_info<device::max_parameter_size>()
+            << device.get_info<device::max_parameter_size>()
             << "\nMem Base Addr Align: "
-            << m_device.get_info<device::mem_base_addr_align>()
+            << device.get_info<device::mem_base_addr_align>()
             << "\nGlobal Cache Mem Line Size: "
-            << m_device.get_info<device::global_mem_cache_line_size>()
+            << device.get_info<device::global_mem_cache_line_size>()
             << "\nGlobal Mem Cache Size: "
-            << m_device.get_info<device::global_mem_cache_size>()
+            << device.get_info<device::global_mem_cache_size>()
             << "\nGlobal Mem Size: "
-            << m_device.get_info<device::global_mem_size>()
+            << device.get_info<device::global_mem_size>()
             << "\nMax Constant Buffer Size: "
-            << m_device.get_info<device::max_constant_buffer_size>()
+            << device.get_info<device::max_constant_buffer_size>()
             << "\nMax Constant Args: "
-            << m_device.get_info<device::max_constant_args>()
-            << "\nLocal Mem Size: "
-            << m_device.get_info<device::local_mem_size>()
+            << device.get_info<device::max_constant_args>()
+            << "\nLocal Mem Size: " << device.get_info<device::local_mem_size>()
             << "\nError Correction Support: "
-            << m_device.get_info<device::error_correction_support>()
+            << device.get_info<device::error_correction_support>()
             << "\nHost Unified Memory: "
-            << m_device.get_info<device::host_unified_memory>()
+            << device.get_info<device::host_unified_memory>()
             << "\nProfiling Timer Resolution: "
-            << m_device.get_info<device::profiling_timer_resolution>()
+            << device.get_info<device::profiling_timer_resolution>()
             << "\nIs Endian Little: "
-            << m_device.get_info<device::is_endian_little>()
-            << "\nIs Available: " << m_device.get_info<device::is_available>()
+            << device.get_info<device::is_endian_little>()
+            << "\nIs Available: " << device.get_info<device::is_available>()
             << "\nIs Compiler Available: "
-            << m_device.get_info<device::is_compiler_available>()
+            << device.get_info<device::is_compiler_available>()
             << "\nIs Linker Available: "
-            << m_device.get_info<device::is_linker_available>()
+            << device.get_info<device::is_linker_available>()
             << "\nQueue Profiling: "
-            << m_device.get_info<device::queue_profiling>()
+            << device.get_info<device::queue_profiling>()
             << "\nBuilt In Kernels: "
             << Container<std::vector<std::string>>(
-                   m_device.get_info<device::built_in_kernels>())
-            << "\nVendor: " << m_device.get_info<device::vendor>()
-            << "\nProfile: " << m_device.get_info<device::profile>()
-            << "\nVersion: " << m_device.get_info<device::version>()
+                   device.get_info<device::built_in_kernels>())
+            << "\nVendor: " << device.get_info<device::vendor>()
+            << "\nProfile: " << device.get_info<device::profile>()
+            << "\nVersion: " << device.get_info<device::version>()
             << "\nExtensions: "
             << Container<std::vector<std::string>>(
-                   m_device.get_info<device::extensions>())
+                   device.get_info<device::extensions>())
             << "\nPrintf Buffer Size: "
-            << m_device.get_info<device::printf_buffer_size>()
+            << device.get_info<device::printf_buffer_size>()
             << "\nPreferred Interop User Sync: "
-            << m_device.get_info<device::preferred_interop_user_sync>()
+            << device.get_info<device::preferred_interop_user_sync>()
             << "\nPartition Max Sub Devices: "
-            << m_device.get_info<device::partition_max_sub_devices>()
+            << device.get_info<device::partition_max_sub_devices>()
             << "\nReference Count: "
-            << m_device.get_info<device::reference_count>() << '\n';
+            << device.get_info<device::reference_count>() << '\n';
 }
 
 namespace Impl {
@@ -293,15 +298,13 @@ void SYCLSpaceInitializer::fence() {
 }
 
 void SYCLSpaceInitializer::print_configuration(std::ostream& msg,
-                                               const bool /*detail*/) {
+                                               const bool detail) {
   msg << "Devices:" << std::endl;
   msg << "  KOKKOS_ENABLE_SYCL: ";
   msg << "yes" << std::endl;
 
   msg << "\nRuntime Configuration:" << std::endl;
-  // FIXME_SYCL not implemented
-  std::abort();
-  // Experimental::SYCL::print_configuration(msg, detail);
+  Experimental::SYCL{}.print_configuration(msg, detail);
 }
 
 }  // namespace Impl
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
index 3f2e8914ea9347f74cad8d84dde87bc8f5764f19..2f824566b8043a080e8d1f9010256b755547d42a 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
@@ -152,12 +152,6 @@ inline T atomic_compare_exchange(
                                  ((LONGLONG*)&compare_and_result));
   return compare_and_result;
 }
-
-template <typename T>
-inline T atomic_compare_exchange_strong(volatile T* const dest,
-                                        const T& compare, const T& val) {
-  return atomic_compare_exchange(dest, compare, val);
-}
 #endif
 
 }  // namespace Kokkos
diff --git a/packages/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash b/packages/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash
index 5ff781b96fc0949361329a61baa4f966f6b8a93a..8fe8e2b5ecea429f750eb4ad8eff6588f4ae9691 100755
--- a/packages/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash
+++ b/packages/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash
@@ -4,7 +4,7 @@ HostArch=(SNB HSW SKX KNL)
 DeviceArch=(Kepler35 Kepler37 Pascal60 Pascal61 Volta70)
 if [ ! -z "$KOKKOS_HOST_ARCH_TEST" ]; then
   export KOKKOS_ARCH_TEST=1
-  HostArch=(WSM SNB HSW SKX WSM AMDAVX ARMv80 ARMv81 BDW KNC KNL BGQ Power7 Power8 Power9 Zen Zen2 ARMv8_ThunderX ARMv8_ThunderX2)
+  HostArch=(WSM SNB HSW SKX WSM AMDAVX ARMv80 ARMv81 BDW KNC KNL BGQ Power7 Power8 Power9 Zen Zen2 Zen3 ARMv8_ThunderX ARMv8_ThunderX2)
   DeviceArch=()
 fi
 
diff --git a/packages/kokkos/generate_makefile.bash b/packages/kokkos/generate_makefile.bash
index e9871b436971a551c82751756b2b18de9175839a..c601e0ee161fb11833c9f97014205585839a3717 100755
--- a/packages/kokkos/generate_makefile.bash
+++ b/packages/kokkos/generate_makefile.bash
@@ -157,6 +157,7 @@ display_help_text() {
       echo "                 AMDAVX          = AMD CPU"
       echo "                 ZEN             = AMD Zen-Core CPU"
       echo "                 ZEN2            = AMD Zen2-Core CPU"
+      echo "                 ZEN3            = AMD Zen3-Core CPU"
       echo "               [AMD: GPU]"
       echo "                 VEGA900         = AMD GPU MI25 GFX900"
       echo "                 VEGA906         = AMD GPU MI50/MI60 GFX906"
diff --git a/packages/kokkos/gnu_generate_makefile.bash b/packages/kokkos/gnu_generate_makefile.bash
index ea509669f068d677a0354c83891d7caf298b1e34..8a463270c85548e4d14fb8c8cda9d5c478bdb96f 100755
--- a/packages/kokkos/gnu_generate_makefile.bash
+++ b/packages/kokkos/gnu_generate_makefile.bash
@@ -137,6 +137,7 @@ do
       echo "                 AMDAVX          = AMD CPU"
       echo "                 ZEN             = AMD Zen-Core CPU"
       echo "                 ZEN2            = AMD Zen2-Core CPU"
+      echo "                 ZEN3            = AMD Zen3-Core CPU"
       echo "               [ARM]"
       echo "                 ARMv80          = ARMv8.0 Compatible CPU"
       echo "                 ARMv81          = ARMv8.1 Compatible CPU"
diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt
index 7a58f593d00e424b7d7dcbda226f5c4c6d7ccd3c..be8a5e7da5f4d8cada30fea4f78a21656268c8ef 100644
--- a/packages/kokkos/master_history.txt
+++ b/packages/kokkos/master_history.txt
@@ -24,3 +24,4 @@ tag:  3.2.00     date: 08:19:2020    master: 3b2fdc7e    release: 5dc6d303
 tag:  3.3.00     date: 12:16:2020    master: 734f577a    release: 1535ba5c
 tag:  3.3.01     date: 01:06:2021    master: 6d65b5a3    release: 4d23839c
 tag:  3.4.00     date: 04:26:2021    master: 1fb0c284    release: 5d7738d6
+tag:  3.4.01     date: 05:20:2021    master: 4b97a22f    release: 410b15c8
diff --git a/packages/kokkos/scripts/testing_scripts/generate_makefile.bash b/packages/kokkos/scripts/testing_scripts/generate_makefile.bash
index f21124ed6e716844e876cf209ee2af5cb9a7dbbd..ff9620efa689254af88bf64637e4998ca2d75265 100755
--- a/packages/kokkos/scripts/testing_scripts/generate_makefile.bash
+++ b/packages/kokkos/scripts/testing_scripts/generate_makefile.bash
@@ -129,6 +129,7 @@ do
       echo "                 AMDAVX          = AMD CPU"
       echo "                 ZEN             = AMD Zen-Core CPU"
       echo "                 ZEN2            = AMD Zen2-Core CPU"
+      echo "                 ZEN3            = AMD Zen3-Core CPU"
       echo "               [ARM]"
       echo "                 ARMv80          = ARMv8.0 Compatible CPU"
       echo "                 ARMv81          = ARMv8.1 Compatible CPU"