diff --git a/koboldcpp.py b/koboldcpp.py index 6620c7934..43746486d 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -2737,7 +2737,7 @@ def delete_old_pyinstaller(): for dirname in os.listdir(temp_parentdir_path): absdirpath = os.path.abspath(os.path.join(temp_parentdir_path, dirname)) if os.path.isdir(absdirpath) and os.path.basename(absdirpath).startswith('_MEI'): #only delete kobold pyinstallers - if absdirpath!=selfdirpath and (time.time() - os.path.getctime(absdirpath)) > 3600: # remove if older than 1 hour + if absdirpath!=selfdirpath and (time.time() - os.path.getctime(absdirpath)) > 14400: # remove if older than 4 hours kobold_itemcheck1 = os.path.join(absdirpath, 'koboldcpp_default.dll') kobold_itemcheck2 = os.path.join(absdirpath, 'koboldcpp_default.so') if os.path.exists(kobold_itemcheck1) or os.path.exists(kobold_itemcheck2): @@ -2760,7 +2760,11 @@ def main(launch_args,start_server=True): embedded_kcpp_docs = None #perform some basic cleanup of old temporary directories - delete_old_pyinstaller() + try: + delete_old_pyinstaller() + except Exception as e: + print(f"Error cleaning up orphaned pyinstaller dirs: {e}") + if args.config and len(args.config)==1: if isinstance(args.config[0], str) and os.path.exists(args.config[0]): diff --git a/otherarch/ggml_v3-cuda.cu b/otherarch/ggml_v3-cuda.cu index 1b57b901b..a5201f001 100644 --- a/otherarch/ggml_v3-cuda.cu +++ b/otherarch/ggml_v3-cuda.cu @@ -627,18 +627,18 @@ static __device__ __forceinline__ float warp_reduce_max(float x) { return x; } -// static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { -// #if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX -// #pragma unroll -// for (int mask = 16; mask > 0; mask >>= 1) { -// x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32)); -// } -// return x; -// #else -// (void) x; -// bad_arch(); -// #endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX -// } +static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { +#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32)); + } + return x; +#else + (void) x; + bad_arch(); +#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX +} static __device__ __forceinline__ float op_repeat(const float a, const float b) { return b;