diff --git a/koboldcpp.py b/koboldcpp.py
index 6620c7934..43746486d 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -2737,7 +2737,7 @@ def delete_old_pyinstaller():
     for dirname in os.listdir(temp_parentdir_path):
         absdirpath = os.path.abspath(os.path.join(temp_parentdir_path, dirname))
         if os.path.isdir(absdirpath) and os.path.basename(absdirpath).startswith('_MEI'): #only delete kobold pyinstallers
-            if absdirpath!=selfdirpath and (time.time() - os.path.getctime(absdirpath)) > 3600: # remove if older than 1 hour
+            if absdirpath!=selfdirpath and (time.time() - os.path.getctime(absdirpath)) > 14400: # remove if older than 4 hours
                 kobold_itemcheck1 = os.path.join(absdirpath, 'koboldcpp_default.dll')
                 kobold_itemcheck2 = os.path.join(absdirpath, 'koboldcpp_default.so')
                 if os.path.exists(kobold_itemcheck1) or os.path.exists(kobold_itemcheck2):
@@ -2760,7 +2760,11 @@ def main(launch_args,start_server=True):
     embedded_kcpp_docs = None
 
     #perform some basic cleanup of old temporary directories
-    delete_old_pyinstaller()
+    try:
+        delete_old_pyinstaller()
+    except Exception as e:
+        print(f"Error cleaning up orphaned pyinstaller dirs: {e}")
+
 
     if args.config and len(args.config)==1:
         if isinstance(args.config[0], str) and os.path.exists(args.config[0]):
diff --git a/otherarch/ggml_v3-cuda.cu b/otherarch/ggml_v3-cuda.cu
index 1b57b901b..a5201f001 100644
--- a/otherarch/ggml_v3-cuda.cu
+++ b/otherarch/ggml_v3-cuda.cu
@@ -627,18 +627,18 @@ static __device__ __forceinline__ float warp_reduce_max(float x) {
     return x;
 }
 
-// static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
-// #if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
-// #pragma unroll
-//     for (int mask = 16; mask > 0; mask >>= 1) {
-//         x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
-//     }
-//     return x;
-// #else
-//     (void) x;
-//     bad_arch();
-// #endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
-// }
+static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
+#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
+#pragma unroll
+    for (int mask = 16; mask > 0; mask >>= 1) {
+        x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
+    }
+    return x;
+#else
+    (void) x;
+    bad_arch();
+#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
+}
 
 static __device__ __forceinline__ float op_repeat(const float a, const float b) {
     return b;