diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo
index 25d2de3f21647f67c2e7b2c28678488d25372975..d9131b0c8b44b22749b5732ae9147f20d821d656 100644
--- a/packages/kokkos/.gitrepo
+++ b/packages/kokkos/.gitrepo
@@ -6,7 +6,7 @@
 [subrepo]
 	remote = git@github.com:kokkos/kokkos.git
 	branch = master
-	commit = 785d19f23ed4ba43354f9ecd7ef67cd5976ca4c8
-	parent = 3c152cb1a42b2192c6ff4ad8066c29541f4dd721
+	commit = 953d7968e8fc5908af954f883e2e38d02c279cf2
+	parent = 55da1f845ac4f9ea049f2d6a97c7edef95a887ab
 	cmdver = 0.4.1
 	method = merge
diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md
index 149e63ca91c8b9b23afb3e78079c23c0dea5eb38..9595b03ff93066631fc1c8d32bb7ccaab04f4980 100644
--- a/packages/kokkos/CHANGELOG.md
+++ b/packages/kokkos/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Change Log
 
+## [3.1.1](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1)
+
+**Fixed bugs:**
+
+- Fix complex_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989)
+- Fix compilation fails when profiling disabled and CUDA enabled [\#3001](https://github.com/kokkos/kokkos/issues/3001)
+- Fix cuda reduction of non-trivial scalars of size 4 [\#2990](https://github.com/kokkos/kokkos/issues/2990)
+- Configure and install version file when building in Trilinos [\#2957](https://github.com/kokkos/kokkos/pull/2957)
+- Fix OpenMPTarget build missing include and namespace [\#3000](https://github.com/kokkos/kokkos/issues/3000)
+- fix typo in KOKKOS_SET_EXE_PROPERTY() [\#2959](https://github.com/kokkos/kokkos/issues/2959)
+- Fix non-zero span subviews of zero sized subviews [\#2979](https://github.com/kokkos/kokkos/issues/2979)
+
 ## [3.1.00](https://github.com/kokkos/kokkos/tree/3.1.00) (2020-04-14)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/3.0.00...3.1.00)
 
diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt
index db88879039603880fc76bce8e0d195fc60f675c8..0e2aaa1897074826ba672d0c6a5a4a0084a0f504 100644
--- a/packages/kokkos/CMakeLists.txt
+++ b/packages/kokkos/CMakeLists.txt
@@ -103,7 +103,7 @@ ENDIF()
 
 set(Kokkos_VERSION_MAJOR 3)
 set(Kokkos_VERSION_MINOR 1)
-set(Kokkos_VERSION_PATCH 0)
+set(Kokkos_VERSION_PATCH 1)
 set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
 math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
 
diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos
index afb3a371e9cc471b696e00d363b8b66995bb166f..320d398d9494ff7c1f7148c3c47cfdbe6b1ca461 100644
--- a/packages/kokkos/Makefile.kokkos
+++ b/packages/kokkos/Makefile.kokkos
@@ -2,7 +2,7 @@
 
 KOKKOS_VERSION_MAJOR = 3
 KOKKOS_VERSION_MINOR = 1
-KOKKOS_VERSION_PATCH = 0
+KOKKOS_VERSION_PATCH = 1
 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
 
 # Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial
diff --git a/packages/kokkos/cmake/kokkos_install.cmake b/packages/kokkos/cmake/kokkos_install.cmake
index 6a39590f036da00f133c0654cf99f0788d7e6075..97bb2bd0b052a31052d7ac3b947501a722ab0e5b 100644
--- a/packages/kokkos/cmake/kokkos_install.cmake
+++ b/packages/kokkos/cmake/kokkos_install.cmake
@@ -1,3 +1,4 @@
+INCLUDE(CMakePackageConfigHelpers)
 IF (NOT KOKKOS_HAS_TRILINOS)
   INCLUDE(GNUInstallDirs)
 
@@ -11,7 +12,6 @@ IF (NOT KOKKOS_HAS_TRILINOS)
     "${Kokkos_BINARY_DIR}/KokkosConfig.cmake"
     INSTALL_DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR}/cmake)
 
-  INCLUDE(CMakePackageConfigHelpers)
   CONFIGURE_PACKAGE_CONFIG_FILE(
 	  cmake/KokkosConfigCommon.cmake.in
 	  "${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake"
@@ -35,6 +35,13 @@ ELSE()
   CONFIGURE_FILE(cmake/KokkosTrilinosConfig.cmake.in ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake @ONLY)
   file(READ ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake KOKKOS_TRILINOS_CONFIG)
   file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" "${KOKKOS_TRILINOS_CONFIG}")
+
+  WRITE_BASIC_PACKAGE_VERSION_FILE("${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake"
+      VERSION "${Kokkos_VERSION}"
+      COMPATIBILITY SameMajorVersion)
+
+  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake
+      DESTINATION "${${PROJECT_NAME}_INSTALL_LIB_DIR}/cmake/${PACKAGE_NAME}")
 ENDIF()
 
 INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION ${KOKKOS_HEADER_DIR})
diff --git a/packages/kokkos/cmake/kokkos_tribits.cmake b/packages/kokkos/cmake/kokkos_tribits.cmake
index 1c3b704ada8eb8541694272ba88ed77c64302ba0..6ee1409aa72127d4ae4c127d097b4420a9149cb1 100644
--- a/packages/kokkos/cmake/kokkos_tribits.cmake
+++ b/packages/kokkos/cmake/kokkos_tribits.cmake
@@ -170,7 +170,7 @@ FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME)
   IF (NOT TARGET ${TARGET_NAME})
     MESSAGE(SEND_ERROR "No target ${TARGET_NAME} exists - cannot set target properties")
   ENDIF()
-  SET_PROPERTY(TARGET ${TARGET_PROPERTY} PROPERTY ${ARGN})
+  SET_PROPERTY(TARGET ${TARGET_NAME} PROPERTY ${ARGN})
 ENDFUNCTION()
 
 MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT)
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
index 7d996fba0438e05c33ba0dbaf4831b21240429df..8795eb5a38b289c68768de4ddfa553307bb75152 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
@@ -97,7 +97,9 @@ __device__ inline
   // Depending on the ValueType _shared__ memory must be aligned up to 8byte
   // boundaries The reason not to use ValueType directly is that for types with
   // constructors it could lead to race conditions
-  __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
+  alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType)
+                                               : alignof(double))
+      __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
   ValueType* result = (ValueType*)&sh_result;
   const int step    = 32 / blockDim.x;
   int shift         = STEP_WIDTH;
@@ -282,7 +284,9 @@ __device__ inline
   // Depending on the ValueType _shared__ memory must be aligned up to 8byte
   // boundaries The reason not to use ValueType directly is that for types with
   // constructors it could lead to race conditions
-  __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
+  alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType)
+                                               : alignof(double))
+      __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
   ValueType* result = (ValueType*)&sh_result;
   const int step    = 32 / blockDim.x;
   int shift         = STEP_WIDTH;
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp
index f75d2e56f7ae687fc4b6345bc6c1762d1eb801e5..62966f859d1e88acfd96bf0949a6acfeac5ef89c 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp
@@ -81,18 +81,19 @@ struct in_place_shfl_op {
     union conv_type {
       Scalar orig;
       shfl_type conv;
+      // This should be fine, members get explicitly reset, which changes the
+      // active member
+      KOKKOS_FUNCTION conv_type() { conv = 0; }
     };
     conv_type tmp_in;
     tmp_in.orig = in;
-    conv_type tmp_out;
-    tmp_out.conv = tmp_in.conv;
+    shfl_type tmp_out;
+    tmp_out = reinterpret_cast<shfl_type&>(tmp_in.orig);
     conv_type res;
     //------------------------------------------------
-    res.conv = self().do_shfl_op(
-        mask, reinterpret_cast<shfl_type const&>(tmp_out.conv), lane_or_delta,
-        width);
+    res.conv = self().do_shfl_op(mask, tmp_out, lane_or_delta, width);
     //------------------------------------------------
-    out = res.orig;
+    out = reinterpret_cast<Scalar&>(res.conv);
   }
 
 // TODO: figure out why 64-bit shfl fails in Clang
diff --git a/packages/kokkos/core/src/Kokkos_CudaSpace.hpp b/packages/kokkos/core/src/Kokkos_CudaSpace.hpp
index 53e3b7778641d45af2d43b6fd08db499d012e370..7db5dd9561ece7679b7cf5b24bd381b143e7a6f9 100644
--- a/packages/kokkos/core/src/Kokkos_CudaSpace.hpp
+++ b/packages/kokkos/core/src/Kokkos_CudaSpace.hpp
@@ -56,6 +56,8 @@
 
 #include <Kokkos_HostSpace.hpp>
 
+#include <impl/Kokkos_Profiling_Interface.hpp>
+
 #include <Cuda/Kokkos_Cuda_abort.hpp>
 
 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp
index d113f244229ff81e95585a5043a65340846ea1d9..e853b8228d07364c45b28aa669feaef2c502dbdb 100644
--- a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp
+++ b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp
@@ -59,7 +59,7 @@
 #include <Kokkos_TaskPolicy.hpp>
 #include <Kokkos_Layout.hpp>
 #include <impl/Kokkos_Tags.hpp>
-
+#include <impl/Kokkos_Profiling_Interface.hpp>
 #include <KokkosExp_MDRangePolicy.hpp>
 /*--------------------------------------------------------------------------*/
 
@@ -124,8 +124,9 @@ class OpenMPTarget {
 namespace Profiling {
 namespace Experimental {
 template <>
-struct DeviceTypeTraits<Experimental::OpenMPTarget> {
-  static constexpr DeviceType id = DeviceType::OpenMPTarget;
+struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> {
+  static constexpr DeviceType id =
+      ::Kokkos::Profiling::Experimental::DeviceType::OpenMPTarget;
 };
 }  // namespace Experimental
 }  // namespace Profiling
diff --git a/packages/kokkos/core/src/impl/Kokkos_Core.cpp b/packages/kokkos/core/src/impl/Kokkos_Core.cpp
index 9640e0fccb7595c884ace5a9097cbb09026400ce..6a6559e415dc15b44f881a0f60f37b739ddd7bb8 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Core.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Core.cpp
@@ -168,34 +168,7 @@ int get_ctest_gpu(const char* local_rank_str) {
 
 namespace {
 
-bool is_unsigned_int(const char* str) {
-  const size_t len = strlen(str);
-  for (size_t i = 0; i < len; ++i) {
-    if (!isdigit(str[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-void initialize_backends(const InitArguments& args) {
-// This is an experimental setting
-// For KNL in Flat mode this variable should be set, so that
-// memkind allocates high bandwidth memory correctly.
-#ifdef KOKKOS_ENABLE_HBWSPACE
-  setenv("MEMKIND_HBW_NODES", "1", 0);
-#endif
-
-  // Protect declarations, to prevent "unused variable" warnings.
-#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_THREADS) || \
-    defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_HPX)
-  const int num_threads = args.num_threads;
-#endif
-#if defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_OPENMPTARGET)
-  const int use_numa = args.num_numa;
-#endif
-#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) || \
-    defined(KOKKOS_ENABLE_HIP)
+int get_gpu(const InitArguments& args) {
   int use_gpu           = args.device_id;
   const int ndevices    = args.ndevices;
   const int skip_device = args.skip_device;
@@ -231,6 +204,38 @@ void initialize_backends(const InitArguments& args) {
     // shift assignments over by one so no one is assigned to "skip_device"
     if (use_gpu >= skip_device) ++use_gpu;
   }
+  return use_gpu;
+}
+
+bool is_unsigned_int(const char* str) {
+  const size_t len = strlen(str);
+  for (size_t i = 0; i < len; ++i) {
+    if (!isdigit(str[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void initialize_backends(const InitArguments& args) {
+// This is an experimental setting
+// For KNL in Flat mode this variable should be set, so that
+// memkind allocates high bandwidth memory correctly.
+#ifdef KOKKOS_ENABLE_HBWSPACE
+  setenv("MEMKIND_HBW_NODES", "1", 0);
+#endif
+
+  // Protect declarations, to prevent "unused variable" warnings.
+#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_THREADS) || \
+    defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_HPX)
+  const int num_threads = args.num_threads;
+#endif
+#if defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_OPENMPTARGET)
+  const int use_numa = args.num_numa;
+#endif
+#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) || \
+    defined(KOKKOS_ENABLE_HIP)
+  int use_gpu = get_gpu(args);
 #endif  // defined( KOKKOS_ENABLE_CUDA )
 
 #if defined(KOKKOS_ENABLE_OPENMP)
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
index a8dc1fb84a6fda5934b0bde58a4ac3f4d828fb90..c8230169e7ce873036d797251827e8e4d75db24b 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
@@ -1286,8 +1286,8 @@ struct ViewOffset<
   /* Span of the range space */
   KOKKOS_INLINE_FUNCTION
   constexpr size_type span() const {
-    return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
-           m_dim.N6 * m_dim.N7;
+    return (m_dim.N0 > size_type(0) ? m_stride : size_type(0)) * m_dim.N1 *
+           m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7;
   }
 
   KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
@@ -1882,7 +1882,9 @@ struct ViewOffset<
 
   /* Span of the range space */
   KOKKOS_INLINE_FUNCTION
-  constexpr size_type span() const { return m_dim.N0 * m_stride; }
+  constexpr size_type span() const {
+    return size() > 0 ? m_dim.N0 * m_stride : 0;
+  }
 
   KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
     return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 *
@@ -2398,14 +2400,16 @@ struct ViewOffset<Dimension, Kokkos::LayoutStride, void> {
   /* Span of the range space, largest stride * dimension */
   KOKKOS_INLINE_FUNCTION
   constexpr size_type span() const {
-    return Max(m_dim.N0 * m_stride.S0,
-               Max(m_dim.N1 * m_stride.S1,
-                   Max(m_dim.N2 * m_stride.S2,
-                       Max(m_dim.N3 * m_stride.S3,
-                           Max(m_dim.N4 * m_stride.S4,
-                               Max(m_dim.N5 * m_stride.S5,
-                                   Max(m_dim.N6 * m_stride.S6,
-                                       m_dim.N7 * m_stride.S7)))))));
+    return size() == size_type(0)
+               ? size_type(0)
+               : Max(m_dim.N0 * m_stride.S0,
+                     Max(m_dim.N1 * m_stride.S1,
+                         Max(m_dim.N2 * m_stride.S2,
+                             Max(m_dim.N3 * m_stride.S3,
+                                 Max(m_dim.N4 * m_stride.S4,
+                                     Max(m_dim.N5 * m_stride.S5,
+                                         Max(m_dim.N6 * m_stride.S6,
+                                             m_dim.N7 * m_stride.S7)))))));
   }
 
   KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
diff --git a/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp
index 33c736c5e01aa2b0e1b33a4b813749ee68ebd220..c27f13e956cef315d12c12607b0766eaa79f2738 100644
--- a/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp
+++ b/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp
@@ -273,7 +273,7 @@ void check_correct_initialization(const Kokkos::InitArguments& argstruct) {
 
     int expected_device = argstruct.device_id;
     if (argstruct.device_id < 0) {
-      expected_device = 0;
+      expected_device = Kokkos::Cuda().cuda_device();
     }
 
     ASSERT_EQ(expected_device, device);
diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt
index f6eb95292c2ebce443d7350155e059f6686d433a..11e803e76026ef9be546255ef1c5406972ca07c4 100644
--- a/packages/kokkos/master_history.txt
+++ b/packages/kokkos/master_history.txt
@@ -19,3 +19,4 @@ tag:  2.8.00     date: 02:05:2019    master: 34931a36    develop: d1659d1d
 tag:  2.9.00     date: 06:24:2019    master: 5d6e7fb3    develop: 4c6cb80a
 tag:  3.0.00     date: 01:31:2020    master: 2983b80d    release-candidate-3.0: fdc904a6
 tag:  3.1.00     date: 04:14:2020    master: cd1b1d0a    develop: fd90af43
+tag:  3.1.1      date: 05:04:2020    master: 785d19f2    release: 2be028bc