diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo
index ef0c50a600b3577cd16dabfdfed75efdad75a8e6..a1e1cc09eecc83b6c04feed37c1a4cb889b6cb88 100644
--- a/packages/kokkos/.gitrepo
+++ b/packages/kokkos/.gitrepo
@@ -6,7 +6,7 @@
 [subrepo]
 	remote = git@github.com:kokkos/kokkos.git
 	branch = master
-	commit = 9614f72c75aa2131d56900511e5eebae54a7bd8b
-	parent = 7fc65e3330cc86e88570067a4f99f6d794992ac1
+	commit = 5d6e7fb38e96aec88d2c514e1f9be1cf2b549b57
+	parent = 5809f3aeb3dacfc19ff4e1808f64895e05d8c875
 	cmdver = 0.4.0
 	method = merge
diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md
index 5564096ea10469ed4ebf5ce321ddce9f08ebd9ab..9d503663ae7939452863205ffcd287cad58be69a 100644
--- a/packages/kokkos/CHANGELOG.md
+++ b/packages/kokkos/CHANGELOG.md
@@ -1,5 +1,26 @@
 # Change Log
 
+## [2.8.00](https://github.com/kokkos/kokkos/tree/2.8.00) (2019-02-05)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.24...2.8.00)
+
+**Implemented enhancements:**
+
+- Capability, Tests: C++14 support and testing [\#1914](https://github.com/kokkos/kokkos/issues/1914)
+- Capability: Add environment variables for all command line arguments [\#1798](https://github.com/kokkos/kokkos/issues/1798)
+- Capability: --kokkos-ndevices not working for Slurm [\#1920](https://github.com/kokkos/kokkos/issues/1920)
+- View: Undefined behavior when deep copying from and to an empty unmanaged view [\#1967](https://github.com/kokkos/kokkos/issues/1967)
+- BuildSystem: nvcc\_wrapper should stop immediately if nvcc is not in PATH [\#1861](https://github.com/kokkos/kokkos/issues/1861)
+
+**Fixed bugs:**
+
+- Cuda: Fix Volta Issues 1 Non-deterministic behavior on Volta, runs fine on Pascal [\#1949](https://github.com/kokkos/kokkos/issues/1949)
+- Cuda: Fix Volta Issues 2 CUDA Team Scan gives wrong values on Volta with -G compile flag [\#1942](https://github.com/kokkos/kokkos/issues/1942)
+- Cuda: illegal warp sync in parallel\_reduce by functor on Turing 75 [\#1958](https://github.com/kokkos/kokkos/issues/1958)
+- Threads: Pthreads backend does not handle RangePolicy with offset correctly [\#1976](https://github.com/kokkos/kokkos/issues/1976)
+- Atomics: atomic\_fetch\_oper has no case for Kokkos::complex\<double\> or other 16-byte types [\#1951](https://github.com/kokkos/kokkos/issues/1951)
+- MDRangePolicy: Fix zero-length range [\#1948](https://github.com/kokkos/kokkos/issues/1948)
+- TeamThreadRange: TeamThreadRange MaxLoc reduce doesnt compile  [\#1909](https://github.com/kokkos/kokkos/issues/1909)
+
 ## [2.7.24](https://github.com/kokkos/kokkos/tree/2.7.24) (2018-11-04)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.00...2.7.24)
 
diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos
index 05f3cf7811996868d618decc375b8383b7f3b1de..387fde19404820c3588ece26880e018a846061fb 100644
--- a/packages/kokkos/Makefile.kokkos
+++ b/packages/kokkos/Makefile.kokkos
@@ -5,11 +5,11 @@
 KOKKOS_DEVICES ?= "Pthread"
 # Options: 
 # Intel:    KNC,KNL,SNB,HSW,BDW,SKX
-# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72
+# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
 # ARM:      ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
 # IBM:      BGQ,Power7,Power8,Power9
 # AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
-# AMD-CPUS: AMDAVX,Ryzen,Epyc
+# AMD-CPUS: AMDAVX,Ryzen,EPYC
 KOKKOS_ARCH ?= ""
 # Options: yes,no
 KOKKOS_DEBUG ?= "no"
@@ -218,7 +218,7 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
 else
   ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
      KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
-     #KOKKOS_INTERNAL_CXX14_FLAG := -std=c++14
+     KOKKOS_INTERNAL_CXX14_FLAG := -std=c++14
      KOKKOS_INTERNAL_CXX1Y_FLAG := -std=c++1y
      #KOKKOS_INTERNAL_CXX17_FLAG := -std=c++17
      #KOKKOS_INTERNAL_CXX1Z_FLAG := -std=c++1Z
@@ -270,6 +270,7 @@ KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pas
 KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60)
 KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
 KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
+KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75)
 KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
                                               + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
                                               + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
@@ -278,6 +279,7 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLE
                                               + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
 					      + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
 					      + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+					      + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
                                               + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                               + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                               + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
@@ -294,6 +296,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
                                                 + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
 						+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
 						+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+						+ $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
                                                 + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                                 + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                                 + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
@@ -325,7 +328,7 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
 # AMD based.
 KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
 KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen)
-KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),Epyc)
+KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
 KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri)
 KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo)
 KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji)
@@ -335,12 +338,12 @@ KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx90
 # Any AVX?
 KOKKOS_INTERNAL_USE_ARCH_SSE42      := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
 KOKKOS_INTERNAL_USE_ARCH_AVX        := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
-KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW))
+KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
 KOKKOS_INTERNAL_USE_ARCH_AVX512MIC  := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
 KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
 
 # Decide what ISA level we are able to support.
-KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
+KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
 KOKKOS_INTERNAL_USE_ISA_KNC       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
 KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
 KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@@ -652,6 +655,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
   endif
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
+  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_EPYC")
+  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_AVX2")
+
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+    KOKKOS_CXXFLAGS += -mavx2
+    KOKKOS_LDFLAGS += -mavx2
+  else
+    KOKKOS_CXXFLAGS += -march=znver1 -mtune=znver1
+    KOKKOS_LDFLAGS += -march=znver1 -mtune=znver1
+  endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
   tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
   tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX")
@@ -944,6 +960,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
     tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72")
     KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
   endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
+    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING")
+    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING75")
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
+  endif
 
   ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
     KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
diff --git a/packages/kokkos/README b/packages/kokkos/README
index 4b6d4170e08ccd3b3bdbbc5eacfc055971246abf..cb6ceb5581d58fd3841bb43c4797708c52f41a1e 100644
--- a/packages/kokkos/README
+++ b/packages/kokkos/README
@@ -73,6 +73,8 @@ For specifics see the LICENSE file contained in the repository or distribution.
   * NVCC 7.5 for CUDA (with gcc 4.8.4)
   * NVCC 8.0.44 for CUDA (with gcc 5.3.0)
   * NVCC 9.1 for CUDA (with gcc 6.1.0)
+  * NVCC 9.2 for CUDA (with gcc 7.2.0)
+  * NVCC 10.0 for CUDA (with gcc 7.4.0)
 
 ### Primary tested compilers on Power 8 are:
   * GCC 6.4.0 (OpenMP,Serial)
@@ -109,7 +111,7 @@ GCC:   -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
        -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
 Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
 Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
-NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
+NVCC:  -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
 
 Other compilers are tested occasionally, in particular when pushing from develop to 
 master branch, without -Werror and only for a select set of backends.
diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper
index f926ae024c7d3d4ee7fd198998644434d81af646..94bc72854e74211cd3ed1c2876100731311dddfe 100755
--- a/packages/kokkos/bin/nvcc_wrapper
+++ b/packages/kokkos/bin/nvcc_wrapper
@@ -308,6 +308,16 @@ do
   shift
 done
 
+#Check if nvcc exists
+if [ $host_only -ne 1 ]; then
+  var=$(which nvcc )
+  if [ $? -gt 0 ]; then
+    echo "Could not find nvcc in PATH"
+    exit $?
+  fi
+fi
+
+
 # Only print host compiler version
 if [ $get_host_version -eq 1 ]; then
   $host_compiler --version
diff --git a/packages/kokkos/cmake/kokkos_options.cmake b/packages/kokkos/cmake/kokkos_options.cmake
index 580d1d322bd2d374273b76adff75ed332e836c94..be494e5df08748f734d197d5d43bd64d9168e7e4 100644
--- a/packages/kokkos/cmake/kokkos_options.cmake
+++ b/packages/kokkos/cmake/kokkos_options.cmake
@@ -104,6 +104,7 @@ list(APPEND KOKKOS_ARCH_LIST
      Pascal61        # (GPU) NVIDIA Pascal generation CC 6.1
      Volta70         # (GPU) NVIDIA Volta generation CC 7.0
      Volta72         # (GPU) NVIDIA Volta generation CC 7.2
+     Turing75         # (GPU) NVIDIA Turing generation CC 7.5
     )
 
 # List of possible device architectures.
diff --git a/packages/kokkos/containers/src/Kokkos_DualView.hpp b/packages/kokkos/containers/src/Kokkos_DualView.hpp
index adba0c4158f1b06a2df13c31c0ae9592916be243..f6631a4149c916c4760edd5377981a3812a31f2d 100644
--- a/packages/kokkos/containers/src/Kokkos_DualView.hpp
+++ b/packages/kokkos/containers/src/Kokkos_DualView.hpp
@@ -832,16 +832,14 @@ void
 deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
            const DualView<ST,SL,SD,SM>& src )
 {
-  if(src.modified_flags.data()==NULL || dst.modified_flags.data()==NULL) {
-    return deep_copy(dst.d_view, src.d_view);
-  }
-  if (src.modified_flags(1) >= src.modified_flags(0)) {
-    deep_copy (dst.d_view, src.d_view);
-    dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
-  } else {
+  if ( src.need_sync_device() ) {
     deep_copy (dst.h_view, src.h_view);
-    dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
+    dst.modify_host();
   }
+  else {
+    deep_copy (dst.d_view, src.d_view);
+    dst.modify_device();
+  } 
 }
 
 template< class ExecutionSpace ,
@@ -852,15 +850,12 @@ deep_copy (const ExecutionSpace& exec ,
            DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
            const DualView<ST,SL,SD,SM>& src )
 {
-  if(src.modified_flags.data()==NULL || dst.modified_flags.data()==NULL) {
-    return deep_copy(exec, dst.d_view, src.d_view);
-  }
-  if (src.modified_flags(1) >= src.modified_flags(0)) {
-    deep_copy (exec, dst.d_view, src.d_view);
-    dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
-  } else {
+  if ( src.need_sync_device() ) {
     deep_copy (exec, dst.h_view, src.h_view);
-    dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
+    dst.modify_host();
+  } else {
+    deep_copy (exec, dst.d_view, src.d_view);
+    dst.modify_device();
   }
 }
 
diff --git a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp
index 8be2c49a31171a085785abb38ecbcb33495bcd9a..3f284e6a8d79d9275f00420a90d208c4683bdd62 100644
--- a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp
+++ b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp
@@ -368,8 +368,8 @@ public:
   enum { is_assignable = is_assignable_value_type &&
                          is_assignable_layout };
 
-  typedef ViewMapping< DstTraits , void >  DstType ;
-  typedef ViewMapping< SrcTraits , void >  SrcType ;
+  typedef ViewMapping< DstTraits , typename DstTraits::specialize >  DstType ;
+  typedef ViewMapping< SrcTraits , typename SrcTraits::specialize >  SrcType ;
 
   template < typename DT , typename ... DP , typename ST , typename ... SP >
   KOKKOS_INLINE_FUNCTION
@@ -432,7 +432,7 @@ public:
 
 
 private:
-  typedef Kokkos::Impl::ViewMapping< traits , void > map_type ;
+  typedef Kokkos::Impl::ViewMapping< traits , typename traits::specialize > map_type ;
   typedef Kokkos::Impl::SharedAllocationTracker      track_type ;
 
   track_type  m_track ;
@@ -567,11 +567,11 @@ public:
   // Allow specializations to query their specialized map
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
   KOKKOS_INLINE_FUNCTION
-  const Kokkos::Impl::ViewMapping< traits , void > &
+  const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > &
   implementation_map() const { return m_map ; }
 #endif
   KOKKOS_INLINE_FUNCTION
-  const Kokkos::Impl::ViewMapping< traits , void > &
+  const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > &
   impl_map() const { return m_map ; }
 
   //----------------------------------------
@@ -952,7 +952,7 @@ public:
     , m_rank(rhs.m_rank)
     {
       typedef typename DynRankView<RT,RP...> ::traits SrcTraits ;
-      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize > Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" );
       Mapping::assign( m_map , rhs.m_map , rhs.m_track );
     }
@@ -962,7 +962,7 @@ public:
   DynRankView & operator = (const DynRankView<RT,RP...> & rhs )
     {
       typedef typename DynRankView<RT,RP...> ::traits SrcTraits ;
-      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize > Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" );
       Mapping::assign( m_map , rhs.m_map , rhs.m_track );
       m_track.assign( rhs.m_track , traits::is_managed );
@@ -980,7 +980,7 @@ public:
     {
       typedef typename View<RT,RP...>::traits  SrcTraits ;
       typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag >  Mapping ;
-      static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" );
+      static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy construction" );
       Mapping::assign( *this , rhs );
     }
 
@@ -1432,7 +1432,7 @@ public:
                     , Args ... args )
     {
 
-       typedef ViewMapping< traits_type, void >  DstType ;
+       typedef ViewMapping< traits_type, typename traits_type::specialize >  DstType ;
 
        typedef typename std::conditional< (rank==0) , ViewDimension<>
                                                     , typename std::conditional< (rank==1) , ViewDimension<0>
diff --git a/packages/kokkos/containers/unit_tests/TestDualView.hpp b/packages/kokkos/containers/unit_tests/TestDualView.hpp
index cbff27cb3938ab52e9d1074bfe01d1190b7367ad..767f93c093c48b27bef9819a78e333aa29dd7495 100644
--- a/packages/kokkos/containers/unit_tests/TestDualView.hpp
+++ b/packages/kokkos/containers/unit_tests/TestDualView.hpp
@@ -101,10 +101,95 @@ namespace Impl {
       result = run_me< Kokkos::DualView<Scalar**,Kokkos::LayoutLeft,Device> >(size,3);
     }
 
-   };
+  };
 
-} // namespace Impl
+  template < typename Scalar, class ViewType >
+  struct SumViewEntriesFunctor {
+
+    typedef Scalar value_type;
+
+    ViewType fv;
+
+    SumViewEntriesFunctor ( const ViewType & fv_ ) : fv(fv_) {}
+
+    KOKKOS_INLINE_FUNCTION
+    void operator() ( const int i , value_type & total ) const {
+      for ( size_t j = 0; j < fv.extent(1); ++j ) {
+        total += fv(i,j);
+      }
+    }
+
+  };
+  
+
+  template <typename Scalar, class Device>
+  struct test_dual_view_deep_copy
+  {
+    typedef Scalar scalar_type;
+    typedef Device execution_space;
+
+    template <typename ViewType>
+    void run_me() {
+
+      const unsigned int n = 10;
+      const unsigned int m = 5;
+      const unsigned int sum_total = n * m;
+
+      ViewType a("A",n,m);
+      ViewType b("B",n,m);
+
+      Kokkos::deep_copy( a.d_view , 1 );
+
+      a.template modify<typename ViewType::execution_space>();
+      a.template sync<typename ViewType::host_mirror_space>();
+
+      // Check device view is initialized as expected
+      scalar_type a_d_sum = 0;
+      // Execute on the execution_space associated with t_dev's memory space
+      typedef typename ViewType::t_dev::memory_space::execution_space t_dev_exec_space;
+      Kokkos::parallel_reduce( Kokkos::RangePolicy<t_dev_exec_space>(0,n), SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view), a_d_sum );
+      ASSERT_EQ(a_d_sum, sum_total);
+
+      // Check host view is synced as expected
+      scalar_type a_h_sum = 0;
+      for ( size_t i = 0; i < a.h_view.extent(0); ++i )
+        for ( size_t j = 0; j < a.h_view.extent(1); ++j ) {
+          a_h_sum += a.h_view(i,j);
+        }
+
+      ASSERT_EQ(a_h_sum, sum_total);
+
+
+      // Test deep_copy
+      Kokkos::deep_copy( b, a );
+      b.template sync<typename ViewType::host_mirror_space>();
 
+      // Perform same checks on b as done on a
+      // Check device view is initialized as expected
+      scalar_type b_d_sum = 0;
+      // Execute on the execution_space associated with t_dev's memory space
+      Kokkos::parallel_reduce( Kokkos::RangePolicy<t_dev_exec_space>(0,n), SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(b.d_view), b_d_sum );
+      ASSERT_EQ(b_d_sum, sum_total);
+
+      // Check host view is synced as expected
+      scalar_type b_h_sum = 0;
+      for ( size_t i = 0; i < b.h_view.extent(0); ++i )
+        for ( size_t j = 0; j < b.h_view.extent(1); ++j ) {
+          b_h_sum += b.h_view(i,j);
+        }
+
+      ASSERT_EQ(b_h_sum, sum_total);
+
+    } // end run_me
+
+    test_dual_view_deep_copy()
+    {
+      run_me< Kokkos::DualView<Scalar**,Kokkos::LayoutLeft,Device> >();
+    }
+
+  };
+
+} // namespace Impl
 
 
 
@@ -116,10 +201,21 @@ void test_dualview_combinations(unsigned int size)
 
 }
 
+template <typename Scalar, typename Device>
+void test_dualview_deep_copy()
+{
+  Impl::test_dual_view_deep_copy<Scalar,Device> ();
+}
+
 TEST_F( TEST_CATEGORY, dualview_combination) {
     test_dualview_combinations<int,TEST_EXECSPACE>(10);
 }
 
+TEST_F( TEST_CATEGORY, dualview_deep_copy) {
+    test_dualview_deep_copy<int,TEST_EXECSPACE>();
+    test_dualview_deep_copy<double,TEST_EXECSPACE>();
+}
+
 
 } // namespace Test
 
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
index 4fa4609968a9e57c2fad37b78fa1ff3e209164a0..e13744e32790837c704903946eaddb5549c9dfde 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
@@ -829,7 +829,8 @@ void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink) {
   }
   if(bytes > current_size) {
     current_size = bytes;
-    ptr = Kokkos::kokkos_realloc<Kokkos::CudaSpace>(ptr,current_size);
+    Kokkos::kokkos_free<Kokkos::CudaSpace>(ptr);
+    ptr = Kokkos::kokkos_malloc<Kokkos::CudaSpace>("CudaSpace::ScratchMemory",current_size);
   }
   if((bytes < current_size) && (force_shrink)) {
     current_size = bytes;
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
index 16952a3ae44b831ab072c18cf9498d62b3a3c5f8..4fd7a9c69e2aa6457606c35e150ba0fa9630ad49 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
@@ -561,7 +561,11 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
     }
   #endif
 
+  #ifdef KOKKOS_ENABLE_PRE_CUDA_10_DEPRECATION_API
   cudaThreadSetCacheConfig(cudaFuncCachePreferShared);
+  #else
+  cudaDeviceSetCacheConfig(cudaFuncCachePreferShared);
+  #endif
 
   // Init the array for used for arbitrarily sized atomics
   Impl::initialize_host_cuda_lock_arrays();
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
index 2ae1cc0dddf52e937212fc3b822e1b7ce6d8c201..665d0732a74863b3d245ee4fee8a740f9a0e4812 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
@@ -525,6 +525,7 @@ public:
   inline
   void execute() const
   {
+    if(m_rp.m_num_tiles==0) return;
     const array_index_type maxblocks = static_cast<array_index_type>(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
     if ( RP::rank == 2 )
     {
@@ -685,7 +686,7 @@ public:
         typename Policy::member_type( kokkos_impl_cuda_shared_memory<void>()
                                     , m_shmem_begin
                                     , m_shmem_size
-                                    , (void*) ( ((char*)m_scratch_ptr[1]) + threadid/(blockDim.x*blockDim.y) * m_scratch_size[1])
+                                    , (void*) ( ((char*)m_scratch_ptr[1]) + ptrdiff_t(threadid/(blockDim.x*blockDim.y)) * m_scratch_size[1])
                                     , m_scratch_size[1]
                                     , league_rank
                                     , m_league_size ) );
@@ -1336,7 +1337,7 @@ public:
         ( Member( kokkos_impl_cuda_shared_memory<char>() + m_team_begin
                                         , m_shmem_begin
                                         , m_shmem_size
-                                        , (void*) ( ((char*)m_scratch_ptr[1]) + threadid/(blockDim.x*blockDim.y) * m_scratch_size[1])
+                                        , (void*) ( ((char*)m_scratch_ptr[1]) + ptrdiff_t(threadid/(blockDim.x*blockDim.y)) * m_scratch_size[1])
                                         , m_scratch_size[1]
                                         , league_rank
                                         , m_league_size )
@@ -1378,7 +1379,7 @@ public:
         ( Member( kokkos_impl_cuda_shared_memory<char>() + m_team_begin
                                         , m_shmem_begin
                                         , m_shmem_size
-                                        , (void*) ( ((char*)m_scratch_ptr[1]) + threadid/(blockDim.x*blockDim.y) * m_scratch_size[1])
+                                        , (void*) ( ((char*)m_scratch_ptr[1]) + ptrdiff_t(threadid/(blockDim.x*blockDim.y)) * m_scratch_size[1])
                                         , m_scratch_size[1]
                                         , league_rank
                                         , m_league_size )
@@ -2064,7 +2065,7 @@ private:
       #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       KOKKOS_IMPL_CUDA_SYNCWARP_MASK(MASK);
       #else
-      KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+      KOKKOS_IMPL_CUDA_SYNCWARP;
       #endif
       if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } // Protect against large scan values.
 
@@ -2291,7 +2292,7 @@ private:
       #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       KOKKOS_IMPL_CUDA_SYNCWARP_MASK(MASK);
       #else
-      KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+      KOKKOS_IMPL_CUDA_SYNCWARP;
       #endif
       if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } // Protect against large scan values.
 
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
index 82d691f7d4c0979569f5c92c4c00508f4c3668fd..d09854c3a5e7a4d20336f8c7e6f539a0fc21c6b8 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
@@ -321,7 +321,7 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
       unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
       int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      int active = KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
       if (int(blockDim.x*blockDim.y) > 2) {
         value_type tmp = Kokkos::shfl_down(value, 2,32);
@@ -331,7 +331,7 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      active += KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
       if (int(blockDim.x*blockDim.y) > 4) {
         value_type tmp = Kokkos::shfl_down(value, 4,32);
@@ -341,7 +341,7 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      active += KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
       if (int(blockDim.x*blockDim.y) > 8) {
         value_type tmp = Kokkos::shfl_down(value, 8,32);
@@ -351,7 +351,7 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      active += KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
       if (int(blockDim.x*blockDim.y) > 16) {
         value_type tmp = Kokkos::shfl_down(value, 16,32);
@@ -361,7 +361,7 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      active += KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
     }
   }
@@ -506,7 +506,7 @@ cuda_inter_block_reduction( const ReducerType& reducer,
       unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
       int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      int active = KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
       if (int(blockDim.x*blockDim.y) > 2) {
         value_type tmp = Kokkos::shfl_down(value, 2,32);
@@ -516,7 +516,7 @@ cuda_inter_block_reduction( const ReducerType& reducer,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      active += KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
       if (int(blockDim.x*blockDim.y) > 4) {
         value_type tmp = Kokkos::shfl_down(value, 4,32);
@@ -526,7 +526,7 @@ cuda_inter_block_reduction( const ReducerType& reducer,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      active += KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
       if (int(blockDim.x*blockDim.y) > 8) {
         value_type tmp = Kokkos::shfl_down(value, 8,32);
@@ -536,7 +536,7 @@ cuda_inter_block_reduction( const ReducerType& reducer,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      active += KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
       if (int(blockDim.x*blockDim.y) > 16) {
         value_type tmp = Kokkos::shfl_down(value, 16,32);
@@ -546,7 +546,7 @@ cuda_inter_block_reduction( const ReducerType& reducer,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+      active += KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
     }
   }
@@ -578,7 +578,7 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, true> {
       const int width,                         // How much of the warp participates
       Scalar& result)
   {
-    unsigned mask = width==32?0xffffffff:((1<<width)-1)<<((threadIdx.y*blockDim.x+threadIdx.x)%(32/width))*width;
+    unsigned mask = width==32?0xffffffff:((1<<width)-1)<<((threadIdx.y*blockDim.x+threadIdx.x)/width)*width;
     for(int delta=skip_vector?blockDim.x:1; delta<width; delta*=2) {
       Scalar tmp;
       cuda_shfl_down(tmp,value,delta,width,mask);
@@ -683,7 +683,7 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, false> {
       const int width)                         // How much of the warp participates
   {
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
-    unsigned mask = width==32?0xffffffff:((1<<width)-1)<<((threadIdx.y*blockDim.x+threadIdx.x)%(32/width))*width;
+    unsigned mask = width==32?0xffffffff:((1<<width)-1)<<((threadIdx.y*blockDim.x+threadIdx.x)/width)*width;
 #endif
     const int lane_id = (threadIdx.y*blockDim.x+threadIdx.x)%32;
     for(int delta=skip_vector?blockDim.x:1; delta<width; delta*=2) {
@@ -693,7 +693,7 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, false> {
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
       KOKKOS_IMPL_CUDA_SYNCWARP_MASK(mask);
 #else
-      KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+      KOKKOS_IMPL_CUDA_SYNCWARP;
 #endif
     }
     *value=*(value-lane_id);
@@ -779,7 +779,7 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, false> {
 /*
  *  Algorithmic constraints:
  *   (a) blockDim.y is a power of two
- *   (b) blockDim.y <= 512
+ *   (b) blockDim.y <= 1024
  *   (c) blockDim.x == blockDim.z == 1
  */
 
@@ -828,14 +828,26 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
   { // Inter-warp reduce-scan by a single warp to avoid extra synchronizations
     const unsigned rtid_inter = ( threadIdx.y ^ BlockSizeMask ) << CudaTraits::WarpIndexShift ;
 
+    #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+    unsigned inner_mask = KOKKOS_IMPL_CUDA_BALLOT_MASK(0xffffffff,(rtid_inter<blockDim.y));
+    #endif
     if ( rtid_inter < blockDim.y ) {
 
       const pointer_type tdata_inter = base_data + value_count * ( rtid_inter ^ BlockSizeMask );
 
-      if ( (1<<5) < BlockSizeMask ) {                        BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) }
-      if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
-      if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
-      if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
+      #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      if ( (1<<5) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP_MASK(inner_mask); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) }
+      if ( (1<<6) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP_MASK(inner_mask); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
+      if ( (1<<7) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP_MASK(inner_mask); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
+      if ( (1<<8) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP_MASK(inner_mask); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
+      if ( (1<<9) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP_MASK(inner_mask); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,9) }
+      #else
+      if ( (1<<5) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP; BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) }
+      if ( (1<<6) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP; BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
+      if ( (1<<7) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP; BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
+      if ( (1<<8) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP; BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
+      if ( (1<<9) < BlockSizeMask ) { KOKKOS_IMPL_CUDA_SYNCWARP; BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,9) }
+      #endif
 
       if ( DoScan ) {
 
@@ -846,10 +858,17 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
 
         if ( ! ( rtid_inter + n < blockDim.y ) ) n = 0 ;
 
-        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,8)
-        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,7)
-        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,6)
-        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,5)
+        #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+        KOKKOS_IMPL_CUDA_SYNCWARP_MASK(inner_mask); BLOCK_SCAN_STEP(tdata_inter,n,8)
+        KOKKOS_IMPL_CUDA_SYNCWARP_MASK(inner_mask); BLOCK_SCAN_STEP(tdata_inter,n,7)
+        KOKKOS_IMPL_CUDA_SYNCWARP_MASK(inner_mask); BLOCK_SCAN_STEP(tdata_inter,n,6)
+        KOKKOS_IMPL_CUDA_SYNCWARP_MASK(inner_mask); BLOCK_SCAN_STEP(tdata_inter,n,5)
+        #else
+        KOKKOS_IMPL_CUDA_SYNCWARP; BLOCK_SCAN_STEP(tdata_inter,n,8)
+        KOKKOS_IMPL_CUDA_SYNCWARP; BLOCK_SCAN_STEP(tdata_inter,n,7)
+        KOKKOS_IMPL_CUDA_SYNCWARP; BLOCK_SCAN_STEP(tdata_inter,n,6)
+        KOKKOS_IMPL_CUDA_SYNCWARP; BLOCK_SCAN_STEP(tdata_inter,n,5)
+        #endif
       }
     }
   }
@@ -864,19 +883,17 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
             ( rtid_intra & 16 ) ? 16 : 0 ))));
 
     if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
-    #ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
-    BLOCK_SCAN_STEP(tdata_intra,n,4) __syncthreads();//__threadfence_block();
-    BLOCK_SCAN_STEP(tdata_intra,n,3) __syncthreads();//__threadfence_block();
-    BLOCK_SCAN_STEP(tdata_intra,n,2) __syncthreads();//__threadfence_block();
-    BLOCK_SCAN_STEP(tdata_intra,n,1) __syncthreads();//__threadfence_block();
-    BLOCK_SCAN_STEP(tdata_intra,n,0) __syncthreads();
-    #else
+    KOKKOS_IMPL_CUDA_SYNCWARP;
     BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block();
+    KOKKOS_IMPL_CUDA_SYNCWARP;
     BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block();
+    KOKKOS_IMPL_CUDA_SYNCWARP;
     BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block();
+    KOKKOS_IMPL_CUDA_SYNCWARP;
     BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block();
+    KOKKOS_IMPL_CUDA_SYNCWARP;
     BLOCK_SCAN_STEP(tdata_intra,n,0) __threadfence_block();
-    #endif
+    KOKKOS_IMPL_CUDA_SYNCWARP;
   }
 
 #undef BLOCK_SCAN_STEP
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
index 9eb32f07c735c8bda8dce67bd8ce7cb03758679e..18271a51469ace822f0e4f483a3ee8c576a151b9 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
@@ -290,7 +290,7 @@ public:
       // Intra vector lane shuffle reduction:
       typename ReducerType::value_type tmp ( reducer.reference() );
 
-      unsigned mask = blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x;
+      unsigned mask = blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<((threadIdx.y%(32/blockDim.x))*blockDim.x);
 
       for ( int i = blockDim.x ; ( i >>= 1 ) ; ) {
         cuda_shfl_down( reducer.reference() , tmp , i , blockDim.x , mask );
@@ -742,7 +742,7 @@ void parallel_for
   #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
   KOKKOS_IMPL_CUDA_SYNCWARP_MASK(blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x);
   #else
-  KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+  KOKKOS_IMPL_CUDA_SYNCWARP;
   #endif
 #endif
 }
@@ -915,7 +915,7 @@ void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const Functo
   #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
   KOKKOS_IMPL_CUDA_SYNCWARP_MASK(blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x);
   #else
-  KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+  KOKKOS_IMPL_CUDA_SYNCWARP;
   #endif
 #endif
 }
@@ -928,7 +928,7 @@ void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& , const Functo
   #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
   KOKKOS_IMPL_CUDA_SYNCWARP_MASK(blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x);
   #else
-  KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+  KOKKOS_IMPL_CUDA_SYNCWARP;
   #endif
 #endif
 }
@@ -938,7 +938,7 @@ KOKKOS_INLINE_FUNCTION
 void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda, ValueType& val) {
 #ifdef __CUDA_ARCH__
   if(threadIdx.x == 0) lambda(val);
-  unsigned mask = blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x;
+  unsigned mask = blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<((threadIdx.y%(32/blockDim.x))*blockDim.x);
   Impl::cuda_shfl(val,val,0,blockDim.x,mask);
 #endif
 }
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp
index 25951b81b00447975552cf435e768d9193cee5de..8aa8b8f459f4281f296dd202e564346617a61da2 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp
@@ -4,9 +4,9 @@
 #if ( CUDA_VERSION < 9000 )
 #define KOKKOS_IMPL_CUDA_ACTIVEMASK 0
 #define KOKKOS_IMPL_CUDA_SYNCWARP __threadfence_block()
-#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK __threadfence_block()
+#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) if(m)__threadfence_block()
 #define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot(x)
-#define KOKKOS_IMPL_CUDA_BALLOT_MASK(x) __ballot(x)
+#define KOKKOS_IMPL_CUDA_BALLOT_MASK(m,x) __ballot(x)
 #define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl(x,y,z)
 #define KOKKOS_IMPL_CUDA_SHFL_MASK(m,x,y,z) __shfl(x,y,z)
 #define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up(x,y,z)
@@ -16,7 +16,7 @@
 #else
 #define KOKKOS_IMPL_CUDA_ACTIVEMASK __activemask()
 #define KOKKOS_IMPL_CUDA_SYNCWARP __syncwarp(0xffffffff)
-#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) __syncwarp(m);
+#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) __syncwarp(m)
 #define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot_sync(__activemask(),x)
 #define KOKKOS_IMPL_CUDA_BALLOT_MASK(m,x) __ballot_sync(m,x)
 #define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl_sync(0xffffffff,x,y,z)
@@ -29,9 +29,9 @@
 #else
 #define KOKKOS_IMPL_CUDA_ACTIVEMASK 0
 #define KOKKOS_IMPL_CUDA_SYNCWARP 
-#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK
+#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) (void)m
 #define KOKKOS_IMPL_CUDA_BALLOT(x) 0
-#define KOKKOS_IMPL_CUDA_BALLOT_MASK(x) 0
+#define KOKKOS_IMPL_CUDA_BALLOT_MASK(m,x) 0
 #define KOKKOS_IMPL_CUDA_SHFL(x,y,z) 0
 #define KOKKOS_IMPL_CUDA_SHFL_MASK(m,x,y,z) 0
 #define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) 0
diff --git a/packages/kokkos/core/src/Kokkos_CopyViews.hpp b/packages/kokkos/core/src/Kokkos_CopyViews.hpp
index 86547420ef0bee23030ee1a7c33a16c898b969bf..31605c9d39c4304b0a0ae8287be85f03e1164de7 100644
--- a/packages/kokkos/core/src/Kokkos_CopyViews.hpp
+++ b/packages/kokkos/core/src/Kokkos_CopyViews.hpp
@@ -1401,7 +1401,33 @@ void deep_copy
   typedef typename src_type::memory_space     src_memory_space ;
   typedef typename dst_type::value_type       dst_value_type ;
   typedef typename src_type::value_type       src_value_type ;
-  if(dst.data() == NULL && src.data() == NULL) {
+  if(dst.data() == NULL || src.data() == NULL) {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    // do nothing
+#else
+    // throw if dimension mismatch
+    if ( (src.extent(0) != dst.extent(0)) ||
+         (src.extent(1) != dst.extent(1)) ||
+         (src.extent(2) != dst.extent(2)) ||
+         (src.extent(3) != dst.extent(3)) ||
+         (src.extent(4) != dst.extent(4)) ||
+         (src.extent(5) != dst.extent(5)) ||
+         (src.extent(6) != dst.extent(6)) ||
+         (src.extent(7) != dst.extent(7))
+       ) {
+      std::string message("Deprecation Error: Kokkos::deep_copy extents of views don't match: ");
+      message += dst.label(); message += "(";
+      for(int r = 0; r<dst_type::Rank-1; r++)
+        { message+= std::to_string(dst.extent(r)); message += ","; }
+      message+= std::to_string(dst.extent(dst_type::Rank-1)); message += ") ";
+      message += src.label(); message += "(";
+      for(int r = 0; r<src_type::Rank-1; r++)
+        { message+= std::to_string(src.extent(r)); message += ","; }
+      message+= std::to_string(src.extent(src_type::Rank-1)); message += ") ";
+
+      Kokkos::Impl::throw_runtime_exception(message);
+    }
+#endif
     Kokkos::fence();
     return;
   }
@@ -1646,7 +1672,33 @@ void deep_copy
   typedef typename dst_type::value_type       dst_value_type ;
   typedef typename src_type::value_type       src_value_type ;
 
-  if(dst.data() == NULL && src.data() == NULL) {
+  if(dst.data() == NULL || src.data() == NULL) {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    // do nothing
+#else
+    // throw if dimension mismatch
+    if ( (src.extent(0) != dst.extent(0)) ||
+         (src.extent(1) != dst.extent(1)) ||
+         (src.extent(2) != dst.extent(2)) ||
+         (src.extent(3) != dst.extent(3)) ||
+         (src.extent(4) != dst.extent(4)) ||
+         (src.extent(5) != dst.extent(5)) ||
+         (src.extent(6) != dst.extent(6)) ||
+         (src.extent(7) != dst.extent(7))
+       ) {
+      std::string message("Deprecation Error: Kokkos::deep_copy extents of views don't match: ");
+      message += dst.label(); message += "(";
+      for(int r = 0; r<dst_type::Rank-1; r++)
+        { message+= std::to_string(dst.extent(r)); message += ","; }
+      message+= std::to_string(dst.extent(dst_type::Rank-1)); message += ") ";
+      message += src.label(); message += "(";
+      for(int r = 0; r<src_type::Rank-1; r++)
+        { message+= std::to_string(src.extent(r)); message += ","; }
+      message+= std::to_string(src.extent(src_type::Rank-1)); message += ") ";
+
+      Kokkos::Impl::throw_runtime_exception(message);
+    }
+#endif
     exec_space.fence();
     return;
   }
diff --git a/packages/kokkos/core/src/Kokkos_Crs.hpp b/packages/kokkos/core/src/Kokkos_Crs.hpp
index 09b0d666a1f8c10c0bd3f616fc333515ac688b1d..ccc3944d86fb5f3ab3d59d15aea47682b6269ff0 100644
--- a/packages/kokkos/core/src/Kokkos_Crs.hpp
+++ b/packages/kokkos/core/src/Kokkos_Crs.hpp
@@ -100,31 +100,26 @@ public:
   row_map_type row_map;
   entries_type entries;
 
-  //! Construct an empty view.
-  Crs() : row_map(), entries() {}
-
-  //! Copy constructor (shallow copy).
-  Crs(const Crs& rhs) : row_map(rhs.row_map), entries(rhs.entries)
-  {}
-
-  template<class EntriesType, class RowMapType>
-  Crs(const RowMapType& row_map_, const EntriesType& entries_) : row_map(row_map_), entries(entries_)
-  {}
-
-  /** \brief  Assign to a view of the rhs array.
-   *          If the old view is the last view
-   *          then allocated memory is deallocated.
+  /*
+   * Default Constructors, operators and destructor
    */
-  Crs& operator= (const Crs& rhs) {
-    row_map = rhs.row_map;
-    entries = rhs.entries;
-    return *this;
-  }
-
-  /**  \brief  Destroy this view of the array.
-   *           If the last view then allocated memory is deallocated.
+  KOKKOS_FUNCTION Crs() = default;
+  KOKKOS_FUNCTION Crs(Crs const &) = default;
+  KOKKOS_FUNCTION Crs(Crs &&) = default;
+  KOKKOS_FUNCTION Crs& operator=(Crs const &) = default;
+  KOKKOS_FUNCTION Crs& operator=(Crs &&) = default;
+  KOKKOS_FUNCTION ~Crs() = default;
+
+  /** \brief Assign to a view of the rhs array.
+   *         If the old view is the last view
+   *         then allocated memory is deallocated.
    */
-  ~Crs() {}
+  template<class EntriesType, class RowMapType>
+  KOKKOS_INLINE_FUNCTION
+  Crs(const RowMapType& row_map_, const EntriesType& entries_) 
+     : row_map(row_map_), entries(entries_)
+  {
+  }
 
   /**  \brief  Return number of rows in the graph
    */
diff --git a/packages/kokkos/core/src/Kokkos_Macros.hpp b/packages/kokkos/core/src/Kokkos_Macros.hpp
index 96bd23e22009bb16ca87adb84afa961e103459ac..10fc09423e729d0fa1315e7e6c30f726bf6a8981 100644
--- a/packages/kokkos/core/src/Kokkos_Macros.hpp
+++ b/packages/kokkos/core/src/Kokkos_Macros.hpp
@@ -170,6 +170,10 @@
     // see https://github.com/kokkos/kokkos/issues/1470
     #define KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
   #endif
+
+  #if ( 10000 > CUDA_VERSION )
+    #define KOKKOS_ENABLE_PRE_CUDA_10_DEPRECATION_API
+  #endif
 #endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
 
 //----------------------------------------------------------------------------
diff --git a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
index 19007945147ac3f8422e87ab5dc278047b45383f..06aaa6546e8bee697894b91fee6decff4302b14b 100644
--- a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
+++ b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
@@ -505,7 +505,7 @@ public:
   }
 
   KOKKOS_INLINE_FUNCTION
-  value_type& reference() {
+  value_type& reference() const {
     return *value;
   }
 
@@ -559,7 +559,7 @@ public:
   }
 
   KOKKOS_INLINE_FUNCTION
-  value_type& reference() {
+  value_type& reference() const {
     return *value;
   }
 
@@ -637,7 +637,7 @@ public:
   }
 
   KOKKOS_INLINE_FUNCTION
-  value_type& reference() {
+  value_type& reference() const {
     return *value;
   }
 
@@ -727,7 +727,7 @@ public:
   }
 
   KOKKOS_INLINE_FUNCTION
-  value_type& reference() {
+  value_type& reference() const {
     return *value;
   }
 
diff --git a/packages/kokkos/core/src/Kokkos_View.hpp b/packages/kokkos/core/src/Kokkos_View.hpp
index da49aff222102fcc39e7314d4a8154c13b6b3ebf..754a0ab8c0b3e6985cf8617d5b287e6c16711bfa 100644
--- a/packages/kokkos/core/src/Kokkos_View.hpp
+++ b/packages/kokkos/core/src/Kokkos_View.hpp
@@ -198,6 +198,7 @@ struct ViewTraits< void >
   typedef void  HostMirrorSpace ;
   typedef void  array_layout ;
   typedef void  memory_traits ;
+  typedef void  specialize ;
 };
 
 template< class ... Prop >
@@ -209,6 +210,7 @@ struct ViewTraits< void , void , Prop ... >
   typedef typename ViewTraits<void,Prop...>::HostMirrorSpace  HostMirrorSpace ;
   typedef typename ViewTraits<void,Prop...>::array_layout     array_layout ;
   typedef typename ViewTraits<void,Prop...>::memory_traits    memory_traits ;
+  typedef typename ViewTraits<void,Prop...>::specialize       specialize ;
 };
 
 template< class ArrayLayout , class ... Prop >
@@ -221,6 +223,7 @@ struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_array_layout<ArrayL
   typedef typename ViewTraits<void,Prop...>::HostMirrorSpace  HostMirrorSpace ;
   typedef          ArrayLayout                                array_layout ;
   typedef typename ViewTraits<void,Prop...>::memory_traits    memory_traits ;
+  typedef typename ViewTraits<void,Prop...>::specialize       specialize ;
 };
 
 template< class Space , class ... Prop >
@@ -239,6 +242,7 @@ struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_space<Space>::value
   typedef typename Kokkos::Impl::HostMirror< Space >::Space HostMirrorSpace ;
   typedef typename execution_space::array_layout            array_layout ;
   typedef typename ViewTraits<void,Prop...>::memory_traits  memory_traits ;
+  typedef typename ViewTraits<void,Prop...>::specialize       specialize ;
 };
 
 template< class MemoryTraits , class ... Prop >
@@ -257,6 +261,7 @@ struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_memory_traits<Memor
   typedef void          HostMirrorSpace ;
   typedef void          array_layout ;
   typedef MemoryTraits  memory_traits ;
+  typedef void          specialize ;
 };
 
 
@@ -335,7 +340,12 @@ public:
 
   typedef ArrayLayout                         array_layout ;
   typedef typename data_analysis::dimension   dimension ;
-  typedef typename data_analysis::specialize  specialize /* mapping specialization tag */ ;
+
+  typedef typename std::conditional<
+                      std::is_same<typename data_analysis::specialize,void>::value
+                      ,typename prop::specialize
+                      ,typename data_analysis::specialize>::type
+                   specialize ; /* mapping specialization tag */
 
   enum { rank         = dimension::rank };
   enum { rank_dynamic = dimension::rank_dynamic };
@@ -542,7 +552,7 @@ public:
 
 private:
 
-  typedef Kokkos::Impl::ViewMapping< traits , void > map_type ;
+  typedef Kokkos::Impl::ViewMapping< traits , typename traits::specialize > map_type ;
   typedef Kokkos::Impl::SharedAllocationTracker      track_type ;
 
   track_type  m_track ;
@@ -608,13 +618,18 @@ public:
   template< typename iType >
   KOKKOS_INLINE_FUNCTION constexpr
   typename std::enable_if< std::is_integral<iType>::value , size_t >::type
-  extent( const iType & r ) const
+  extent( const iType & r ) const noexcept
     { return m_map.extent(r); }
 
+  static KOKKOS_INLINE_FUNCTION constexpr
+  size_t
+  static_extent( const unsigned r ) noexcept
+    { return map_type::static_extent(r); }
+
   template< typename iType >
   KOKKOS_INLINE_FUNCTION constexpr
   typename std::enable_if< std::is_integral<iType>::value , int >::type
-  extent_int( const iType & r ) const
+  extent_int( const iType & r ) const noexcept
     { return static_cast<int>(m_map.extent(r)); }
 
   KOKKOS_INLINE_FUNCTION constexpr
@@ -709,11 +724,11 @@ public:
 
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
   KOKKOS_INLINE_FUNCTION
-  const Kokkos::Impl::ViewMapping< traits , void > &
+  const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > &
   implementation_map() const { return m_map ; }
 #endif
   KOKKOS_INLINE_FUNCTION
-  const Kokkos::Impl::ViewMapping< traits , void > &
+  const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > &
   impl_map() const { return m_map ; }
   KOKKOS_INLINE_FUNCTION
   const Kokkos::Impl::SharedAllocationTracker &
@@ -1955,7 +1970,7 @@ public:
     , m_map()
     {
       typedef typename View<RT,RP...>::traits  SrcTraits ;
-      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize >  Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible View copy construction" );
       Mapping::assign( m_map , rhs.m_map , rhs.m_track );
     }
@@ -1965,7 +1980,7 @@ public:
   View & operator = ( const View<RT,RP...> & rhs )
     {
       typedef typename View<RT,RP...>::traits  SrcTraits ;
-      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize >  Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible View copy assignment" );
       Mapping::assign( m_map , rhs.m_map , rhs.m_track );
       m_track.assign( rhs.m_track , traits::is_managed );
@@ -1992,7 +2007,7 @@ public:
 
       typedef typename Mapping::type DstType ;
 
-      static_assert( Kokkos::Impl::ViewMapping< traits , typename DstType::traits , void >::is_assignable
+      static_assert( Kokkos::Impl::ViewMapping< traits , typename DstType::traits , typename traits::specialize >::is_assignable
         , "Subview construction requires compatible view and subview arguments" );
 
       Mapping::assign( m_map, src_view.m_map, arg0 , args... );
@@ -2266,10 +2281,10 @@ public:
     }
   template <class Traits>
   KOKKOS_INLINE_FUNCTION
-  View( const track_type & track,  const Kokkos::Impl::ViewMapping< Traits , void >  &map ) :
+  View( const track_type & track,  const Kokkos::Impl::ViewMapping< Traits , typename Traits::specialize >  &map ) :
   m_track(track), m_map()
   {
-    typedef Kokkos::Impl::ViewMapping< traits , Traits , void >  Mapping ;
+    typedef Kokkos::Impl::ViewMapping< traits , Traits , typename traits::specialize >  Mapping ;
     static_assert( Mapping::is_assignable , "Incompatible View copy construction" );
     Mapping::assign( m_map , map , track );
   }
diff --git a/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp
index 6b3e206f6c34121c3d44f8c1229c7ee2eb8d42e7..42269176ed0d1ef46c0775ae59ab3e8f23a50f4a 100644
--- a/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp
+++ b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp
@@ -142,16 +142,15 @@ private:
 
     WorkRange range( self.m_policy , exec.pool_rank() , exec.pool_size() );
 
-    exec.set_work_range(range.begin(),range.end(),self.m_policy.chunk_size());
+    exec.set_work_range(range.begin()-self.m_policy.begin(),range.end()-self.m_policy.begin(),self.m_policy.chunk_size());
     exec.reset_steal_target();
     exec.barrier();
 
     long work_index = exec.get_work_index();
 
     while(work_index != -1) {
-      const Member begin = static_cast<Member>(work_index) * self.m_policy.chunk_size();
+      const Member begin = static_cast<Member>(work_index) * self.m_policy.chunk_size()+self.m_policy.begin();
       const Member end = begin + self.m_policy.chunk_size() < self.m_policy.end()?begin+self.m_policy.chunk_size():self.m_policy.end();
-
       ParallelFor::template exec_range< WorkTag >
         ( self.m_functor , begin , end );
       work_index = exec.get_work_index();
@@ -470,14 +469,14 @@ private:
     const ParallelReduce & self = * ((const ParallelReduce *) arg );
     const WorkRange range( self.m_policy, exec.pool_rank(), exec.pool_size() );
 
-    exec.set_work_range(range.begin(),range.end(),self.m_policy.chunk_size());
+    exec.set_work_range(range.begin()-self.m_policy.begin(),range.end()-self.m_policy.begin(),self.m_policy.chunk_size());
     exec.reset_steal_target();
     exec.barrier();
 
     long work_index = exec.get_work_index();
     reference_type update = ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() );
     while(work_index != -1) {
-      const Member begin = static_cast<Member>(work_index) * self.m_policy.chunk_size();
+      const Member begin = static_cast<Member>(work_index) * self.m_policy.chunk_size() + self.m_policy.begin();
       const Member end = begin + self.m_policy.chunk_size() < self.m_policy.end()?begin+self.m_policy.chunk_size():self.m_policy.end();
       ParallelReduce::template exec_range< WorkTag >
         ( self.m_functor , begin , end
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
index e2028db8c8ff214e3bbebdefb408cb0b669d07cf..3d99b075689bd2521575a4388b7e1f3aa07a7a4c 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
@@ -111,7 +111,7 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
   unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
   unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
   unsigned int done_active = 0;
   while (active!=done_active) {
@@ -127,7 +127,7 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
     done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
 #else
-    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
 #endif
   }
   return return_val;
@@ -308,6 +308,16 @@ T atomic_compare_exchange( volatile T * const dest_v, const T compare, const T v
 #endif
 #endif // !defined ROCM_ATOMICS
 
+// dummy for non-CUDA Kokkos headers being processed by NVCC
+#if defined(__CUDA_ARCH__) && !defined(KOKKOS_ENABLE_CUDA)
+template <typename T>
+__inline__ __device__
+T atomic_compare_exchange(volatile T * const, const Kokkos::Impl::identity_t<T>, const Kokkos::Impl::identity_t<T>)
+{
+  return T();
+}
+#endif
+
 template <typename T>
 KOKKOS_INLINE_FUNCTION
 bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
index 4e41cb125856b88121ec2565d53ff07e3b7a87a7..6ccf35816b40a5753f04d536b5a0fe83d3257658 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
@@ -134,7 +134,7 @@ T atomic_exchange( volatile T * const dest ,
   unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
   unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
   unsigned int done_active = 0;
   while (active!=done_active) {
@@ -149,7 +149,7 @@ T atomic_exchange( volatile T * const dest ,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
     done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
 #else
-    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
 #endif
   }
   return return_val;
@@ -418,6 +418,23 @@ void atomic_assign( volatile T * const dest_v , const T val )
 
 #endif
 #endif
+
+// dummy for non-CUDA Kokkos headers being processed by NVCC
+#if defined(__CUDA_ARCH__) && !defined(KOKKOS_ENABLE_CUDA)
+template <typename T>
+__inline__ __device__
+T atomic_exchange(volatile T * const, const Kokkos::Impl::identity_t<T>)
+{
+  return T();
+}
+
+template < typename T >
+__inline__ __device__
+void atomic_assign(volatile T * const, const Kokkos::Impl::identity_t<T>)
+{
+}
+#endif
+
 } // namespace Kokkos
 
 #endif
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
index e2e23bb5fdb0a83aea5ec4d8956422b1d35c9123..d6fab811332bc4aba65118d9da96582c42db1b6c 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
@@ -147,7 +147,7 @@ T atomic_fetch_add( volatile T * const dest ,
   unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
   unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
   unsigned int done_active = 0;
   while (active!=done_active) {
@@ -164,7 +164,7 @@ T atomic_fetch_add( volatile T * const dest ,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
     done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
 #else
-    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
 #endif
   }
   return return_val;
@@ -384,6 +384,15 @@ T atomic_fetch_add( volatile T * const dest_v , typename std::add_const<T>::type
 #endif // !defined ROCM_ATOMICS
 //----------------------------------------------------------------------------
 
+// dummy for non-CUDA Kokkos headers being processed by NVCC
+#if defined(__CUDA_ARCH__) && !defined(KOKKOS_ENABLE_CUDA)
+template< typename T >
+__inline__ __device__
+T atomic_fetch_add(volatile T* const, Kokkos::Impl::identity_t<T>) {
+  return T();
+}
+#endif
+
 // Simpler version of atomic_fetch_add without the fetch
 template <typename T>
 KOKKOS_INLINE_FUNCTION
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp
index 044cbdf79ae74fe304de8fea159c7a18188289f0..db0d97ca19ebde9b30e5030b43b7338ed6388739 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp
@@ -149,6 +149,15 @@ T atomic_fetch_and( volatile T * const dest_v , const T val )
 #endif
 //----------------------------------------------------------------------------
 
+// dummy for non-CUDA Kokkos headers being processed by NVCC
+#if defined(__CUDA_ARCH__) && !defined(KOKKOS_ENABLE_CUDA)
+template< typename T >
+__inline__ __device__
+T atomic_fetch_and(volatile T* const, Kokkos::Impl::identity_t<T>) {
+  return T();
+}
+#endif
+
 // Simpler version of atomic_fetch_and without the fetch
 template <typename T>
 KOKKOS_INLINE_FUNCTION
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp
index 0b8cbb1d8ce0777cd029fc70b93675e29e65e63a..d146ef3148050d904306e92f5fd87604dd9e3ae4 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp
@@ -149,6 +149,15 @@ T atomic_fetch_or( volatile T * const dest_v , const T val )
 #endif
 //----------------------------------------------------------------------------
 
+// dummy for non-CUDA Kokkos headers being processed by NVCC
+#if defined(__CUDA_ARCH__) && !defined(KOKKOS_ENABLE_CUDA)
+template< typename T >
+__inline__ __device__
+T atomic_fetch_or(volatile T* const, Kokkos::Impl::identity_t<T>) {
+  return T();
+}
+#endif
+
 // Simpler version of atomic_fetch_or without the fetch
 template <typename T>
 KOKKOS_INLINE_FUNCTION
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
index dd69c967c54ab2476d9f5656b8fa3266b3de71c7..48dc8731ef8a6551dbcb0ce19cc0620320d0096b 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
@@ -139,7 +139,7 @@ T atomic_fetch_sub( volatile T * const dest ,
   unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
   unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
   unsigned int done_active = 0;
   while (active!=done_active) {
@@ -154,7 +154,7 @@ T atomic_fetch_sub( volatile T * const dest ,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
     done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
 #else
-    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
 #endif
   }
   return return_val;
@@ -304,6 +304,15 @@ T atomic_fetch_sub( volatile T * const dest_v , const T val )
 #endif
 #endif // !defined ROCM_ATOMICS
 
+// dummy for non-CUDA Kokkos headers being processed by NVCC
+#if defined(__CUDA_ARCH__) && !defined(KOKKOS_ENABLE_CUDA)
+template< typename T >
+__inline__ __device__
+T atomic_fetch_sub(volatile T* const, Kokkos::Impl::identity_t<T>) {
+  return T();
+}
+#endif
+
 // Simpler version of atomic_fetch_sub without the fetch
 template <typename T>
 KOKKOS_INLINE_FUNCTION
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
index 74e9db303d7445cde738a0aa1b7f1a26931c717d..a3a18166af4b2b00f6ea94aad2b6fe8a0b869da7 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
@@ -230,9 +230,6 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
   typename Kokkos::Impl::enable_if<
                 ( sizeof(T) != 4 )
              && ( sizeof(T) != 8 )
-          #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
-             && ( sizeof(T) != 16 )
-          #endif
            , const T >::type val )
 {
 
@@ -250,7 +247,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
   unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
   unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
   unsigned int done_active = 0;
   while (active!=done_active) {
@@ -265,7 +262,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
     done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
 #else
-    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
 #endif
   }
   return return_val;
@@ -298,7 +295,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
   unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
   unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
 #else
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
 #endif
   unsigned int done_active = 0;
   while (active!=done_active) {
@@ -313,7 +310,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
 #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
     done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
 #else
-    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
 #endif
   }
   return return_val;
diff --git a/packages/kokkos/core/src/impl/Kokkos_Core.cpp b/packages/kokkos/core/src/impl/Kokkos_Core.cpp
index 628e070a0d551c440ad9001ca65782a1acf60f60..82fdee4399384c1e55df44753872d90c64d6649c 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Core.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Core.cpp
@@ -49,6 +49,7 @@
 #include <sstream>
 #include <cstdlib>
 #include <stack>
+#include <cerrno>
 
 //----------------------------------------------------------------------------
 
@@ -70,7 +71,6 @@ bool is_unsigned_int(const char* str)
   }
   return true;
 }
-
 void initialize_internal(const InitArguments& args)
 {
 // This is an experimental setting
@@ -99,6 +99,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
   if (use_gpu < 0 && ndevices >= 0) {
     auto local_rank_str = std::getenv("OMPI_COMM_WORLD_LOCAL_RANK"); //OpenMPI
     if (!local_rank_str) local_rank_str = std::getenv("MV2_COMM_WORLD_LOCAL_RANK"); //MVAPICH2
+    if (!local_rank_str) local_rank_str = std::getenv("SLURM_LOCALID"); //SLURM
     if (local_rank_str) {
       auto local_rank = std::atoi(local_rank_str);
       use_gpu = local_rank % ndevices;
@@ -532,6 +533,85 @@ void initialize(int& narg, char* arg[])
       iarg++;
     }
 
+    //Read environment variables
+    char * endptr;
+    auto env_num_threads_str = std::getenv("KOKKOS_NUM_THREADS");
+    if (env_num_threads_str!=nullptr) {
+        errno = 0;
+        auto env_num_threads = std::strtol(env_num_threads_str,&endptr,10);
+        if (endptr== env_num_threads_str) 
+            Impl::throw_runtime_exception("Error: cannot convert KOKKOS_NUM_THREADS to an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        if (errno == ERANGE)
+            Impl::throw_runtime_exception("Error: KOKKOS_NUM_THREADS out of range of representable values by an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        if ((num_threads != -1)&&(env_num_threads!=num_threads))
+            Impl::throw_runtime_exception("Error: expecting a match between --kokkos-threads and KOKKOS_NUM_THREADS if both are set. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        else
+            num_threads = env_num_threads;
+    }
+    auto env_numa_str = std::getenv("KOKKOS_NUMA");
+    if (env_numa_str!=nullptr) {
+        errno = 0;
+        auto env_numa = std::strtol(env_numa_str,&endptr,10);
+        if (endptr== env_numa_str) 
+            Impl::throw_runtime_exception("Error: cannot convert KOKKOS_NUMA to an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        if (errno == ERANGE)
+            Impl::throw_runtime_exception("Error: KOKKOS_NUMA out of range of representable values by an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        if ((numa != -1)&&(env_numa!=numa))
+            Impl::throw_runtime_exception("Error: expecting a match between --kokkos-numa and KOKKOS_NUMA if both are set. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        else
+            numa = env_numa;
+    }
+    auto env_device_str = std::getenv("KOKKOS_DEVICE_ID");
+    if (env_device_str!=nullptr) {
+        errno = 0;
+        auto env_device = std::strtol(env_device_str,&endptr,10);
+        if (endptr== env_device_str) 
+            Impl::throw_runtime_exception("Error: cannot convert KOKKOS_DEVICE_ID to an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        if (errno == ERANGE)
+            Impl::throw_runtime_exception("Error: KOKKOS_DEVICE_ID out of range of representable values by an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        if ((device != -1)&&(env_device!=device))
+            Impl::throw_runtime_exception("Error: expecting a match between --kokkos-device and KOKKOS_DEVICE_ID if both are set. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        else
+            device = env_device;
+    }
+    auto env_ndevices_str = std::getenv("KOKKOS_NUM_DEVICES");
+    if (env_ndevices_str!=nullptr) {
+        errno = 0;
+        auto env_ndevices = std::strtol(env_ndevices_str,&endptr,10);
+        if (endptr== env_ndevices_str) 
+            Impl::throw_runtime_exception("Error: cannot convert KOKKOS_NUM_DEVICES to an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        if (errno == ERANGE)
+            Impl::throw_runtime_exception("Error: KOKKOS_NUM_DEVICES out of range of representable values by an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        if ((ndevices != -1)&&(env_ndevices!=ndevices))
+            Impl::throw_runtime_exception("Error: expecting a match between --kokkos-ndevices and KOKKOS_NUM_DEVICES if both are set. Raised by Kokkos::initialize(int narg, char* argc[]).");
+        else
+            ndevices = env_ndevices;
+        //Skip device
+        auto env_skip_device_str = std::getenv("KOKKOS_SKIP_DEVICE");
+        if (env_skip_device_str!=nullptr) {
+            errno = 0;
+            auto env_skip_device = std::strtol(env_skip_device_str,&endptr,10);
+            if (endptr== env_skip_device_str) 
+                Impl::throw_runtime_exception("Error: cannot convert KOKKOS_SKIP_DEVICE to an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+            if (errno == ERANGE)
+                Impl::throw_runtime_exception("Error: KOKKOS_SKIP_DEVICE out of range of representable values by an integer. Raised by Kokkos::initialize(int narg, char* argc[]).");
+            if ((skip_device != 9999)&&(env_skip_device!=skip_device))
+                Impl::throw_runtime_exception("Error: expecting a match between --kokkos-ndevices and KOKKOS_SKIP_DEVICE if both are set. Raised by Kokkos::initialize(int narg, char* argc[]).");
+            else
+                skip_device = env_skip_device;
+        }
+    }
+    char * env_disablewarnings_str = std::getenv("KOKKOS_DISABLE_WARNINGS");
+    if (env_disablewarnings_str!=nullptr) {
+        std::string env_str (env_disablewarnings_str); // deep-copies string
+        for (char& c : env_str) { c = toupper (c); }
+        if ((env_str == "TRUE") || (env_str == "ON") || (env_str == "1"))
+            disable_warnings = true;
+        else
+            if (disable_warnings)
+                Impl::throw_runtime_exception("Error: expecting a match between --kokkos-disable-warnings and KOKKOS_DISABLE_WARNINGS if both are set. Raised by Kokkos::initialize(int narg, char* argc[]).");
+    }
+
     InitArguments arguments;
     arguments.num_threads = num_threads;
     arguments.num_numa = numa;
diff --git a/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp b/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp
index 868b31861a7b10575b8deb137969cdb2807d8648..611a32c4fe04d925b1fa9e8b34fe2dc3ccb842b6 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp
@@ -409,6 +409,9 @@ struct inclusive_scan_integer_sequence
   static constexpr value_type value  = helper::value ;
 };
 
+template <typename T>
+using identity_t = T;
+
 }} // namespace Kokkos::Impl
 
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp
index d4e3a03d384e3b2fb785eec8fefb80e5517d3135..e1539d10b06335b20bf35a26e54228d380af1585 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp
@@ -103,13 +103,7 @@ namespace Impl {
 
 /** \brief  View mapping for non-specialized data type and standard layout */
 template< class Traits >
-class ViewMapping< Traits ,
-  typename std::enable_if<(
-    std::is_same< typename Traits::specialize , Kokkos::Array<> >::value &&
-    ( std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value ||
-      std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ||
-      std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value )
-  )>::type >
+class ViewMapping< Traits , Kokkos::Array<> >
 {
 private:
 
@@ -345,64 +339,6 @@ public:
   }
 };
 
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-/** \brief  Assign compatible default mappings */
-
-template< class DstTraits , class SrcTraits >
-class ViewMapping< DstTraits , SrcTraits ,
-  typename std::enable_if<(
-    std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
-    &&
-    std::is_same< typename DstTraits::specialize , Kokkos::Array<> >::value
-    &&
-    (
-      std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
-      std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
-      std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
-    )
-    &&
-    std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
-    &&
-    (
-      std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
-      std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
-      std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
-    )
-  )>::type >
-{
-public:
-
-  enum { is_assignable = true };
-
-  typedef Kokkos::Impl::SharedAllocationTracker  TrackType ;
-  typedef ViewMapping< DstTraits , void >  DstType ;
-  typedef ViewMapping< SrcTraits , void >  SrcType ;
-
-  KOKKOS_INLINE_FUNCTION
-  static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
-    {
-      static_assert( std::is_same< typename DstTraits::value_type , typename SrcTraits::value_type >::value ||
-                     std::is_same< typename DstTraits::value_type , typename SrcTraits::const_value_type >::value
-                   , "View assignment must have same value type or const = non-const" );
-
-      static_assert( ViewDimensionAssignable< typename DstTraits::dimension , typename SrcTraits::dimension >::value
-                   , "View assignment must have compatible dimensions" );
-
-      static_assert( std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value ||
-                     std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value ||
-                     ( DstTraits::dimension::rank == 0 ) ||
-                     ( DstTraits::dimension::rank == 1 && DstTraits::dimension::rank_dynamic == 1 )
-                   , "View assignment must have compatible layout or have rank <= 1" );
-
-      typedef typename DstType::offset_type  dst_offset_type ;
-
-      dst.m_impl_offset = dst_offset_type( src.m_impl_offset );
-      dst.m_impl_handle = src.m_impl_handle ;
-      dst.m_stride = src.m_stride ;
-    }
-};
-
 /** \brief Assign Array to non-Array */
 
 template< class DstTraits , class SrcTraits >
@@ -436,7 +372,7 @@ public:
 
   typedef Kokkos::Impl::SharedAllocationTracker  TrackType ;
   typedef ViewMapping< DstTraits , void >  DstType ;
-  typedef ViewMapping< SrcTraits , void >  SrcType ;
+  typedef ViewMapping< SrcTraits , Kokkos::Array<> >  SrcType ;
 
   KOKKOS_INLINE_FUNCTION
   static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
@@ -480,6 +416,7 @@ public:
     }
 };
 
+
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
index bb3bcfd334e072edb68f8368ccf8e18817a2228a..773f3362815f3e681315d598435291584e80bb98 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
@@ -195,7 +195,7 @@ struct ViewDimension
     {}
 
   KOKKOS_INLINE_FUNCTION
-  constexpr size_t extent( const unsigned r ) const
+  constexpr size_t extent( const unsigned r ) const noexcept
     {
       return r == 0 ? N0 : (
              r == 1 ? N1 : (
@@ -207,6 +207,19 @@ struct ViewDimension
              r == 7 ? N7 : 0 )))))));
     }
 
+  static KOKKOS_INLINE_FUNCTION
+  constexpr size_t static_extent( const unsigned r ) noexcept
+    {
+      return r == 0 ? ArgN0 : (
+             r == 1 ? ArgN1 : (
+             r == 2 ? ArgN2 : (
+             r == 3 ? ArgN3 : (
+             r == 4 ? ArgN4 : (
+             r == 5 ? ArgN5 : (
+             r == 6 ? ArgN6 : (
+             r == 7 ? ArgN7 : 0 )))))));
+    }
+
   template< size_t N >
   struct prepend { typedef ViewDimension< N , Vals... > type ; };
 
@@ -2640,6 +2653,12 @@ public:
   KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const
     { return m_impl_offset.m_dim.extent(r); }
 
+  static KOKKOS_INLINE_FUNCTION constexpr size_t static_extent( const unsigned r ) noexcept
+    {
+      using dim_type = typename offset_type::dimension_type;
+      return dim_type::static_extent(r);
+    }
+
   KOKKOS_INLINE_FUNCTION constexpr
   typename Traits::array_layout layout() const
     { return m_impl_offset.layout(); }
diff --git a/packages/kokkos/core/unit_test/TestCrs.hpp b/packages/kokkos/core/unit_test/TestCrs.hpp
index 77ea508b894c250bc3e1991e74cf72e79b83b355..08087ae0625b62aabecbe03a07d840ada9ae046b 100644
--- a/packages/kokkos/core/unit_test/TestCrs.hpp
+++ b/packages/kokkos/core/unit_test/TestCrs.hpp
@@ -63,6 +63,86 @@ struct CountFillFunctor {
   }
 };
 
+/* RunUpdateCrsTest
+ *   4 test cases:
+ *     1. use member object version which is constructed directly using the copy constructor
+ *     2. excplicity copy construct in local variable
+ *     3. construct default and assign to input object
+ *     4. construct object from views
+ */
+template< class CrsType, class ExecSpace, class scalarType >
+struct RunUpdateCrsTest {
+
+  struct TestOne {};
+  struct TestTwo {};
+  struct TestThree {};
+  struct TestFour {};
+
+  CrsType graph;
+  RunUpdateCrsTest( CrsType g_in ) : graph(g_in)
+  {
+  }
+
+  void run_test(int nTest) {
+     switch (nTest)
+     {
+        case 1:
+           parallel_for ("TestCrs1", Kokkos::RangePolicy<ExecSpace, TestOne>(0,graph.numRows()),*this);
+           break;
+        case 2:
+           parallel_for ("TestCrs2", Kokkos::RangePolicy<ExecSpace, TestTwo>(0,graph.numRows()),*this);
+           break;
+        case 3:
+           parallel_for ("TestCrs3", Kokkos::RangePolicy<ExecSpace, TestThree>(0,graph.numRows()),*this);
+           break;
+        case 4:
+           parallel_for ("TestCrs4", Kokkos::RangePolicy<ExecSpace, TestFour>(0,graph.numRows()),*this);
+           break;
+        default:
+           break;
+     }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void updateGraph(const CrsType & g_in, const scalarType row) const {
+     auto row_map = g_in.row_map;
+     auto entries = g_in.entries;
+     auto j_start = row_map(row);
+     auto j_end = row_map(row+1)-j_start;
+     for (scalarType j = 0; j < j_end; ++j) {
+        entries(j_start+j) = (j+1)*(j+1);
+     }
+  }
+
+  // Test Crs class from class member
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const TestOne &, const scalarType row) const {
+      updateGraph(graph, row);
+  }
+
+  // Test Crs class from copy constructor (local_graph(graph)
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const TestTwo &, const scalarType row) const {
+      CrsType local_graph(graph);
+      updateGraph(local_graph, row);
+  }
+
+  // Test Crs class from default constructor assigned to function parameter
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const TestThree &, const scalarType row) const {
+      CrsType local_graph;
+      local_graph = graph;
+      updateGraph(local_graph, row);
+  }
+
+  // Test Crs class from local graph constructed from row_map and entities access on input parameter)
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const TestFour &, const scalarType row) const {
+      CrsType local_graph(graph.row_map, graph.entries);
+      updateGraph(local_graph, row);
+  }
+};
+
 template< class ExecSpace >
 void test_count_fill(std::int32_t nrows) {
   Kokkos::Crs<std::int32_t, ExecSpace, void, std::int32_t> graph;
@@ -81,6 +161,38 @@ void test_count_fill(std::int32_t nrows) {
   }
 }
 
+// Test Crs Constructor / assignment operation by 
+// using count and fill to create/populate initial graph,
+// then use parallel_for with Crs directly to update content
+// then verify results
+template< class ExecSpace >
+void test_constructor(std::int32_t nrows) {
+
+  for (int nTest = 1; nTest < 5; nTest++)
+  {
+     typedef Kokkos::Crs<std::int32_t, ExecSpace, void, std::int32_t> crs_int32;
+     crs_int32 graph;
+     Kokkos::count_and_fill_crs(graph, nrows, CountFillFunctor<ExecSpace>());
+     ASSERT_EQ(graph.numRows(), nrows);
+
+     RunUpdateCrsTest<crs_int32, ExecSpace, std::int32_t> crstest(graph);
+     crstest.run_test(nTest);
+
+     auto row_map = Kokkos::create_mirror_view(graph.row_map);
+     Kokkos::deep_copy(row_map, graph.row_map);
+     auto entries = Kokkos::create_mirror_view(graph.entries);
+     Kokkos::deep_copy(entries, graph.entries);
+
+     for (std::int32_t row = 0; row < nrows; ++row) {
+       auto n = (row % 4) + 1;
+       ASSERT_EQ(row_map(row + 1) - row_map(row), n);    
+       for (std::int32_t j = 0; j < n; ++j) {
+         ASSERT_EQ(entries(row_map(row) + j), (j + 1)*(j+1));
+       }
+     }
+  }
+}
+
 } // anonymous namespace
 
 TEST_F( TEST_CATEGORY, crs_count_fill )
@@ -95,4 +207,17 @@ TEST_F( TEST_CATEGORY, crs_count_fill )
   test_count_fill<TEST_EXECSPACE>(10000);
 }
 
+TEST_F( TEST_CATEGORY, crs_copy_constructor )
+{
+  test_constructor<TEST_EXECSPACE>(0);
+  test_constructor<TEST_EXECSPACE>(1);
+  test_constructor<TEST_EXECSPACE>(2);
+  test_constructor<TEST_EXECSPACE>(3);
+  test_constructor<TEST_EXECSPACE>(13);
+  test_constructor<TEST_EXECSPACE>(100);
+  test_constructor<TEST_EXECSPACE>(1000);
+  test_constructor<TEST_EXECSPACE>(10000);
+}
+
+
 } // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestMDRange.hpp b/packages/kokkos/core/unit_test/TestMDRange.hpp
index 88b3a9b0c6ebad8927f281439fd9ef48fabbc39e..a382a207002bf4d252091931d4139fddeac5c726 100644
--- a/packages/kokkos/core/unit_test/TestMDRange.hpp
+++ b/packages/kokkos/core/unit_test/TestMDRange.hpp
@@ -956,7 +956,12 @@ struct TestMDRange_3D {
           }
         , Kokkos::Min<double>(min) );
 
-      ASSERT_EQ( min, 8.0 );
+      if((N0-1)*(N1-1)*(N2-1)>0)
+        ASSERT_EQ( min, 8.0 );
+      else {
+        double min_identity = Kokkos::reduction_identity<double>::min();
+        ASSERT_EQ( min, min_identity );
+      }
     }
 #endif
 #endif
diff --git a/packages/kokkos/core/unit_test/TestMDRange_d.hpp b/packages/kokkos/core/unit_test/TestMDRange_d.hpp
index 1a477a228fbbe31771e0fd495a7c65d79bab6296..e25213a28936149eef6f207510824f965e982a4d 100644
--- a/packages/kokkos/core/unit_test/TestMDRange_d.hpp
+++ b/packages/kokkos/core/unit_test/TestMDRange_d.hpp
@@ -46,8 +46,10 @@
 namespace Test {
 
 TEST_F( TEST_CATEGORY , mdrange_3d) {
+  TestMDRange_3D< TEST_EXECSPACE >::test_for3( 1, 10, 100 );
   TestMDRange_3D< TEST_EXECSPACE >::test_for3( 100, 10, 100 );
 #if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduced explicitly handled in its own cpp file
+  TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 1, 10, 100 );
   TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 100, 10, 100 );
 #endif
 }
diff --git a/packages/kokkos/core/unit_test/TestRange.hpp b/packages/kokkos/core/unit_test/TestRange.hpp
index bc0acfb21d39a169d7e709161c3f4291c822e188..be878046cb43e06b03ade5492900c89ee53286a9 100644
--- a/packages/kokkos/core/unit_test/TestRange.hpp
+++ b/packages/kokkos/core/unit_test/TestRange.hpp
@@ -60,8 +60,11 @@ struct TestRange {
   struct VerifyInitTag {};
   struct ResetTag {};
   struct VerifyResetTag {};
+  struct OffsetTag {};
+  struct VerifyOffsetTag {};
 
-  int N; 
+  int N;
+  static const int offset = 13;
   TestRange( const size_t N_ )
     : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), N_ ), N(N_)
     {}
@@ -117,6 +120,18 @@ struct TestRange {
       if ( int( 2 * i ) != host_flags( i ) ) ++error_count;
     }
     ASSERT_EQ( error_count, int( 0 ) );
+
+    Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag >( offset, N + offset ), *this );
+    Kokkos::parallel_for( std::string("TestKernelFor"), Kokkos::RangePolicy<ExecSpace, ScheduleType, VerifyOffsetTag>( 0, N ), *this);
+
+    Kokkos::deep_copy(host_flags, m_flags);
+
+    error_count = 0;
+    for (int i = 0; i < N; ++i) {
+      if (i + offset != host_flags(i))
+        ++error_count;
+    }
+    ASSERT_EQ(error_count, int(0));
   }
 
   KOKKOS_INLINE_FUNCTION
@@ -144,9 +159,19 @@ struct TestRange {
     }
   }
 
-  //----------------------------------------
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const OffsetTag &, const int i) const {
+    m_flags(i - offset) = i;
+  }
 
-  struct OffsetTag {};
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const VerifyOffsetTag &, const int i) const {
+    if (i + offset != m_flags(i)) {
+      printf("TestRange::test_for error at %d != %d\n", i + offset, m_flags(i));
+    }
+  }
+
+  //----------------------------------------
 
   void test_reduce( )
   {
@@ -158,7 +183,7 @@ struct TestRange {
     // sum( 0 .. N-1 )
     ASSERT_EQ( size_t( ( N - 1 ) * ( N ) / 2 ), size_t( total ) );
 
-    Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), *this, total );
+    Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( offset, N+offset ), *this, total );
     // sum( 1 .. N )
     ASSERT_EQ( size_t( ( N ) * ( N + 1 ) / 2 ), size_t( total ) );
   }
@@ -169,7 +194,7 @@ struct TestRange {
 
   KOKKOS_INLINE_FUNCTION
   void operator()( const OffsetTag &, const int i, value_type & update ) const
-  { update += 1 + m_flags( i ); }
+  { update += 1 + m_flags( i-offset ); }
 
   //----------------------------------------
 
diff --git a/packages/kokkos/core/unit_test/TestTeamVector.hpp b/packages/kokkos/core/unit_test/TestTeamVector.hpp
index 294247a78df654b4f042d1e7aae48e7598797141..498d156db3935a8a135d81e0e9c4912b3ac22ef1 100644
--- a/packages/kokkos/core/unit_test/TestTeamVector.hpp
+++ b/packages/kokkos/core/unit_test/TestTeamVector.hpp
@@ -532,7 +532,11 @@ struct functor_vec_single {
   typedef ExecutionSpace execution_space;
 
   Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag;
-  functor_vec_single( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {}
+  int nStart;
+  int nEnd;
+
+  functor_vec_single( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_, const int start_, const int end_ ) : 
+                           flag( flag_ ), nStart(start_), nEnd(end_) {}
 
   KOKKOS_INLINE_FUNCTION
   void operator()( typename policy_type::member_type team ) const {
@@ -541,7 +545,7 @@ struct functor_vec_single {
     // inside a parallel_for and write to it.
     Scalar value = 0;
 
-    Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 0, 13 ), [&] ( int i )
+    Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, nStart, nEnd ), [&] ( int i )
     {
       value = i; // This write is violating Kokkos semantics for nested parallelism.
     });
@@ -552,12 +556,12 @@ struct functor_vec_single {
     }, value );
 
     Scalar value2 = 0;
-    Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, 0, 13 ), [&] ( int i, Scalar & val )
+    Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, nStart, nEnd ), [&] ( int i, Scalar & val )
     {
       val += value;
     }, value2 );
 
-    if ( value2 != ( value * 13 ) ) {
+    if ( value2 != ( value * (nEnd-nStart) ) ) {
       printf( "FAILED vector_single broadcast %i %i %f %f\n",
               team.league_rank(), team.team_rank(), (double) value2, (double) value );
 
@@ -746,12 +750,6 @@ bool test_scalar( int nteams, int team_size, int test ) {
                           functor_vec_red< Scalar, ExecutionSpace >( d_flag ) );
   }
   else if ( test == 1 ) {
-    // WORKAROUND CUDA
-    #if defined(KOKKOS_ENABLE_CUDA)
-    #if defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) || defined(KOKKOS_ARCH_PASCAL)
-    if(!std::is_same<ExecutionSpace,Kokkos::Cuda>::value)
-    #endif
-    #endif
     Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
                           functor_vec_red_reducer< Scalar, ExecutionSpace >( d_flag ) );
   }
@@ -765,7 +763,7 @@ bool test_scalar( int nteams, int team_size, int test ) {
   }
   else if ( test == 4 ) {
     Kokkos::parallel_for( "B", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
-                          functor_vec_single< Scalar, ExecutionSpace >( d_flag ) );
+                          functor_vec_single< Scalar, ExecutionSpace >( d_flag, 0, 13 ) );
   }
   else if ( test == 5 ) {
     Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ),
@@ -791,6 +789,10 @@ bool test_scalar( int nteams, int team_size, int test ) {
     Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
                           functor_team_vector_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) );
   }
+  else if ( test == 11 ) {
+    Kokkos::parallel_for( "B", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
+                          functor_vec_single< Scalar, ExecutionSpace >( d_flag, 4, 13 ) );
+  }
 
   Kokkos::deep_copy( h_flag, d_flag );
 
@@ -938,6 +940,7 @@ TEST_F( TEST_CATEGORY, team_vector )
   ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 8 ) ) );
   ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 9 ) ) );
   ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 10 ) ) );
+  ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 11 ) ) );
 }
 #endif
 
diff --git a/packages/kokkos/core/unit_test/TestViewCopy.hpp b/packages/kokkos/core/unit_test/TestViewCopy.hpp
index 7eab9daa11e469ba70207aeb0983664f26360e25..ddcd0ae5ba1ce64d60458711871912f4661f2655 100644
--- a/packages/kokkos/core/unit_test/TestViewCopy.hpp
+++ b/packages/kokkos/core/unit_test/TestViewCopy.hpp
@@ -56,17 +56,13 @@ struct TestViewCopy {
 
   using InExecSpace = ExecSpace;
 
-  static void test_view_copy()
+  static void test_view_copy(const int dim0, const int dim1, const int dim2)
   {
 #if defined( KOKKOS_ENABLE_CUDA ) || defined( KOKKOS_ENABLE_ROCM )
    // ExecSpace = CudaUVM, CudaHostPinned
    // This test will fail at runtime with an illegal memory access if something goes wrong
    // Test 1: deep_copy from host_mirror_space to ExecSpace and ExecSpace back to host_mirror_space
    {
-    const int dim0 = 4;
-    const int dim1 = 2;
-    const int dim2 = 3;
-
     typedef Kokkos::View<double****,InExecSpace> Rank4ViewType;
     Rank4ViewType view_4;
     view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2);
@@ -88,19 +84,21 @@ struct TestViewCopy {
 
    // Test 2: deep_copy from Cuda to ExecSpace and ExecSpace back to Cuda
    {
-    const int dim0 = 4;
-    const int dim1 = 2;
-    const int dim2 = 3;
-
     typedef Kokkos::View<double****,InExecSpace> Rank4ViewType;
     Rank4ViewType view_4;
     view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2);
 
 #if defined( KOKKOS_ENABLE_CUDA )
-    typedef Kokkos::Cuda space_type;
+    typedef typename std::conditional<
+        Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaSpace,typename InExecSpace::memory_space>::accessible,
+        Kokkos::CudaSpace,
+        InExecSpace>::type space_type;
 #endif
 #if defined( KOKKOS_ENABLE_ROCM )
-    typedef Kokkos::Experimental::ROCm space_type;
+    typedef typename std::conditional<
+        Kokkos::Impl::MemorySpaceAccess<Kokkos::ROCmSpace,typename InExecSpace::memory_space>::accessible,
+        Kokkos::ROCmSpace,
+        InExecSpace>::type space_type;
 #endif
     Kokkos::View<double**,Kokkos::LayoutLeft,space_type> srcView("srcView", dim2, dim2);
 
@@ -118,10 +116,6 @@ struct TestViewCopy {
 
    // Test 3: deep_copy from host_space to ExecSpace and ExecSpace back to host_space
    {
-    const int dim0 = 4;
-    const int dim1 = 2;
-    const int dim2 = 3;
-
     typedef Kokkos::View<double****,InExecSpace> Rank4ViewType;
     Rank4ViewType view_4;
     view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2);
@@ -149,7 +143,41 @@ struct TestViewCopy {
 
 TEST_F( TEST_CATEGORY , view_copy_tests ) {
   //Only include this file to be compiled with CudaUVM and CudaHostPinned
-  TestViewCopy< TEST_EXECSPACE >::test_view_copy();
+  TestViewCopy< TEST_EXECSPACE >::test_view_copy(4,2,3);
+  TestViewCopy< TEST_EXECSPACE >::test_view_copy(4,2,0);
+}
+
+TEST_F( TEST_CATEGORY , view_copy_degenerated ) {
+  //Only include this file to be compiled with CudaUVM and CudaHostPinned
+  Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v_um_def_1;
+  Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v_um_1( reinterpret_cast<int*>(-1), 0 );
+  Kokkos::View<int*> v_m_def_1;
+  Kokkos::View<int*> v_m_1("v_m_1", 0);
+
+  Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v_um_def_2;
+  Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v_um_2( reinterpret_cast<int*>(-1), 0 );
+  Kokkos::View<int*> v_m_def_2;
+  Kokkos::View<int*> v_m_2("v_m_2", 0);
+
+  Kokkos::deep_copy(v_um_def_1, v_um_def_2);
+  Kokkos::deep_copy(v_um_def_1, v_um_2);
+  Kokkos::deep_copy(v_um_def_1, v_m_def_2);
+  Kokkos::deep_copy(v_um_def_1, v_m_2);
+
+  Kokkos::deep_copy(v_um_1, v_um_def_2);
+  Kokkos::deep_copy(v_um_1, v_um_2);
+  Kokkos::deep_copy(v_um_1, v_m_def_2);
+  Kokkos::deep_copy(v_um_1, v_m_2);
+
+  Kokkos::deep_copy(v_m_def_1, v_um_def_2);
+  Kokkos::deep_copy(v_m_def_1, v_um_2);
+  Kokkos::deep_copy(v_m_def_1, v_m_def_2);
+  Kokkos::deep_copy(v_m_def_1, v_m_2);
+
+  Kokkos::deep_copy(v_m_1, v_um_def_2);
+  Kokkos::deep_copy(v_m_1, v_um_2);
+  Kokkos::deep_copy(v_m_1, v_m_def_2);
+  Kokkos::deep_copy(v_m_1, v_m_2);
 }
 
 } // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestViewMapping_a.hpp b/packages/kokkos/core/unit_test/TestViewMapping_a.hpp
index 365531cb6f188dc8786279784a47464a0e6e23ee..03d5e501b9dad43e4882539fc1b8b2d6ab6a4fd4 100644
--- a/packages/kokkos/core/unit_test/TestViewMapping_a.hpp
+++ b/packages/kokkos/core/unit_test/TestViewMapping_a.hpp
@@ -1245,5 +1245,12 @@ TEST_F( TEST_CATEGORY , view_mapping_operator )
   test_view_mapping_operator< TEST_EXECSPACE >();
 }
 
+TEST_F( TEST_CATEGORY , static_extent )
+{
+  using T = Kokkos::View<double*[2][3]>;
+  ASSERT_EQ( T::static_extent(1), 2 );
+  ASSERT_EQ( T::static_extent(2), 3 );
+}
+
 }
 
diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp
index f7bfdc67873e2bec8d15dae017d30ed5f7e4c9b8..e871b3c0c063f472d0cb87b0f2eb313e72600e27 100644
--- a/packages/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp
+++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp
@@ -228,6 +228,10 @@ TEST_F( cuda, uvm )
   }
 }
 
+/* Removing UVM Allocs Test due to added time to complete overall unit test
+ * The issue verified with this unit test appears to no longer be an 
+ * problem.  Refer to github issue 1880 for more details
+ *
 TEST_F( cuda, uvm_num_allocs )
 {
   // The max number of UVM allocations allowed is 65536.
@@ -288,6 +292,7 @@ TEST_F( cuda, uvm_num_allocs )
 
   #undef MAX_NUM_ALLOCS
 }
+*/
 
 template< class MemSpace, class ExecSpace >
 struct TestViewCudaAccessible {
diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_e.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_e.cpp
index 4249b58e82e5a2dc6c78a64b2e757c647711a749..2e8134aac448571f9a8b42dc39f1371bd3958eb5 100644
--- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_e.cpp
+++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_e.cpp
@@ -43,3 +43,4 @@
 
 #include <openmp/TestOpenMP_Category.hpp>
 #include <TestViewAPI_e.hpp>
+#include <TestViewCopy.hpp>
diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_ViewAPI_e.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_ViewAPI_e.cpp
index 9f0e765aba20840a1234193b627b3faaf9b4136a..5082729789c769b6845815c852d21ec5178efaf3 100644
--- a/packages/kokkos/core/unit_test/serial/TestSerial_ViewAPI_e.cpp
+++ b/packages/kokkos/core/unit_test/serial/TestSerial_ViewAPI_e.cpp
@@ -43,3 +43,5 @@
 
 #include <serial/TestSerial_Category.hpp>
 #include <TestViewAPI_e.hpp>
+#include <TestViewCopy.hpp>
+
diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_ViewAPI_e.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_ViewAPI_e.cpp
index 2d9b17bc3e2e2ff768728e1582b95c4179b5da5c..616a92349656e4357a4a694be2af99d3588bc4f6 100644
--- a/packages/kokkos/core/unit_test/threads/TestThreads_ViewAPI_e.cpp
+++ b/packages/kokkos/core/unit_test/threads/TestThreads_ViewAPI_e.cpp
@@ -43,3 +43,4 @@
 
 #include <threads/TestThreads_Category.hpp>
 #include <TestViewAPI_e.hpp>
+#include <TestViewCopy.hpp>
diff --git a/packages/kokkos/generate_makefile.bash b/packages/kokkos/generate_makefile.bash
index 34be03f9802dc5dcc4f49ec968bfb5c513094a20..f3c4f16238c9c9efbbead7cc8f1ae8525922ca11 100755
--- a/packages/kokkos/generate_makefile.bash
+++ b/packages/kokkos/generate_makefile.bash
@@ -68,6 +68,9 @@ do
     --cxxflags*)
       CXXFLAGS="${key#*=}"
       ;;
+    --cxxstandard*)
+      KOKKOS_CXX_STANDARD="${key#*=}"
+      ;;
     --ldflags*)
       LDFLAGS="${key#*=}"
       ;;
@@ -127,6 +130,7 @@ do
       echo "--arch=[OPT]:  Set target architectures. Options are:"
       echo "               [AMD]"
       echo "                 AMDAVX          = AMD CPU"
+      echo "                 EPYC            = AMD EPYC Zen-Core CPU"
       echo "               [ARM]"
       echo "                 ARMv80          = ARMv8.0 Compatible CPU"
       echo "                 ARMv81          = ARMv8.1 Compatible CPU"
@@ -165,6 +169,8 @@ do
       echo "                                build.  This will still set certain required"
       echo "                                flags via KOKKOS_CXXFLAGS (such as -fopenmp,"
       echo "                                --std=c++11, etc.)."
+      echo "--cxxstandard=[FLAGS]         Overwrite KOKKOS_CXX_STANDARD for library build and test"
+      echo "                                c++11 (default), c++14, c++17, c++1y, c++1z, c++2a"
       echo "--ldflags=[FLAGS]             Overwrite LDFLAGS for library build and test"
       echo "                                build. This will still set certain required"
       echo "                                flags via KOKKOS_LDFLAGS (such as -fopenmp,"
@@ -243,6 +249,10 @@ if [ ${#CXXFLAGS} -gt 0 ]; then
   KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXXFLAGS=\"${CXXFLAGS}\""
 fi
 
+if [ ${#KOKKOS_CXX_STANDARD} -gt 0 ]; then
+  KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CXX_STANDARD=\"${KOKKOS_CXX_STANDARD}\""
+fi
+
 if [ ${#LDFLAGS} -gt 0 ]; then
   KOKKOS_SETTINGS="${KOKKOS_SETTINGS} LDFLAGS=\"${LDFLAGS}\""
 fi
diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt
index 08453309dae19de682bf9c260d29152f4eed8aaa..777f080c498fce8e8fbb9144eeb3c9e73952e905 100644
--- a/packages/kokkos/master_history.txt
+++ b/packages/kokkos/master_history.txt
@@ -15,3 +15,4 @@ tag:  2.5.00     date: 12:15:2017    master: dfe685f4    develop: ec7ad6d8
 tag:  2.6.00     date: 03:07:2018    master: 62e760fa    develop: d1ba7d71
 tag:  2.7.00     date: 05:24:2018    master: e01945d0    develop: 2d13f608
 tag:  2.7.24     date: 11:04:2018    master: d3a94192    develop: 7a06fc81
+tag:  2.8.00     date: 02:05:2019    master: 34931a36    develop: d1659d1d
diff --git a/packages/kokkos/scripts/testing_scripts/test_all_sandia b/packages/kokkos/scripts/testing_scripts/test_all_sandia
index d1424ade81838fdd4da6d122f32cc4dc27332566..d34d04b7ce951eb9c093a2b389e997066f8ae4b2 100755
--- a/packages/kokkos/scripts/testing_scripts/test_all_sandia
+++ b/packages/kokkos/scripts/testing_scripts/test_all_sandia
@@ -88,6 +88,8 @@ CXX_FLAGS_EXTRA=""
 LD_FLAGS_EXTRA=""
 KOKKOS_OPTIONS=""
 
+CXX_STANDARD="c++11"
+
 #
 # Handle arguments.
 #
@@ -142,6 +144,9 @@ do
     --cxxflags-extra*)
       CXX_FLAGS_EXTRA="${key#*=}"
       ;;
+    --cxxstandard*)
+      CXX_STANDARD="${key#*=}"
+      ;;
     --ldflags-extra*)
       LD_FLAGS_EXTRA="${key#*=}"
       ;;
@@ -227,18 +232,30 @@ elif [ "$MACHINE" = "white" ]; then
   export SLURM_TASKS_PER_NODE=32
 
   BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
-  IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
+  IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>,gcc/7.2.0"
   CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.2.0,ibm/xl/16.1.0"
+  CUDA10_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.4.0,ibm/xl/16.1.0"
 
   # Don't do pthread on white.
   GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
 
-  # Format: (compiler module-list build-list exe-name warning-flag)
-  COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-             "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-             "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
-             "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
-  )
+  if [ "$SPOT_CHECK" = "True" ]; then
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST "OpenMP_Serial" g++ $GCC_WARNING_FLAGS"
+               "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
+               "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  else
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
+               "ibm/16.1.1 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
+               "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "cuda/10.0.130 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  fi
 
   if [ -z "$ARCH_FLAG" ]; then
     ARCH_FLAG="--arch=Power8,Kepler37"
@@ -323,6 +340,7 @@ elif [ "$MACHINE" = "apollo" ]; then
   BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
   CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
   CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
+  CUDA10_MODULE_LIST="sems-env,kokkos-env,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
 
   CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.0.69"
   CLANG7_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.1"
@@ -344,6 +362,7 @@ elif [ "$MACHINE" = "apollo" ]; then
   else
     # Format: (compiler module-list build-list exe-name warning-flag)
     COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "cuda/10.0 $CUDA10_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
                "clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
                "clang/7.0 $CLANG7_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
                "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
@@ -629,6 +648,8 @@ single_build_and_test() {
   local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}"
   local ldflags="${ldflags} ${LD_FLAGS_EXTRA}"
 
+  local cxx_standard="${CXX_STANDARD}"
+
   if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
     local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
   fi
@@ -650,7 +671,7 @@ single_build_and_test() {
       run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; }
     fi
   else
-    run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
+    run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
     local -i build_start_time=$(date +%s)
     run_cmd make -j 48 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
     local -i build_end_time=$(date +%s)