Studio: also scan torch/lib in Windows pip nvidia DLL resolver

PyTorch's Windows CUDA wheels frequently bundle cudart64_X.dll and cublas64_X.dll directly under Lib/site-packages/torch/lib/ instead of shipping separate nvidia-cuda-runtime-cuXX / nvidia-cublas-cuXX wheels. On those installs _windows_pip_nvidia_dll_dirs previously returned nothing useful, and llama-server.exe fell back to needing a system CUDA toolkit on PATH -- the original #5106 failure mode. The install-side equivalent python_runtime_dirs in install_llama_prebuilt.py already treats torch/lib as a Python runtime DLL source for the same reason. Bring the runtime resolver in parity so torch-bundled-CUDA installs find their cudart at llama-server start. Updates the existing test that codified the bug (asserted torch/lib was excluded), and adds three new cases: pickup, combined-with-nvidia, and the must-be-a-directory guard.
2026-05-20 00:51:36 +00:00 · 2026-05-11 10:52:02 +00:00 · 2026-05-11 10:52:02 +00:00 · c1c8a074de
commit c1c8a074de
parent afb1f7cf59
2 changed files with 66 additions and 13 deletions
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@ -958,23 +958,35 @@ class LlamaCppBackend:

    @staticmethod
    def _windows_pip_nvidia_dll_dirs(prefix: str) -> list[str]:
-        """Return DLL dirs from pip-installed nvidia wheels under
-        ``<prefix>/Lib/site-packages/nvidia/`` so llama-server.exe can
-        load cudart64_X.dll / cublas64_X.dll without a system CUDA
-        toolkit. Mirrors the Linux nvidia/cu*/lib LD_LIBRARY_PATH
-        block. Wheel layouts vary, so we cover the two seen patterns:
-        ``nvidia/<pkg>/bin`` and ``nvidia/<pkg>/Library/bin``."""
+        """Return DLL dirs from pip-installed CUDA wheels under
+        ``<prefix>/Lib/site-packages/`` so llama-server.exe can load
+        ``cudart64_X.dll`` / ``cublas64_X.dll`` without a system CUDA
+        toolkit. Mirrors the Linux ``nvidia/cu*/lib`` LD_LIBRARY_PATH
+        block, with parity for two additional Windows-specific layouts.
+        Covered patterns:
+          * ``nvidia/<pkg>/bin`` -- modular ``nvidia-cuda-runtime-cuXX``,
+            ``nvidia-cublas-cuXX`` wheels.
+          * ``nvidia/<pkg>/Library/bin`` -- conda-style wheel repacks.
+          * ``torch/lib`` -- PyTorch's own CUDA-bundled Windows wheel,
+            which ships ``cudart64_*.dll`` directly under ``torch/lib``
+            instead of as separate ``nvidia-*`` wheels (#5106). The
+            install-side equivalent ``python_runtime_dirs`` in
+            ``install_llama_prebuilt.py`` covers this path for the same
+            reason."""
        import glob as _glob

-        nvidia_root = os.path.join(prefix, "Lib", "site-packages", "nvidia")
+        site_packages = os.path.join(prefix, "Lib", "site-packages")
        out: list[str] = []
        for pattern in (
-            os.path.join(nvidia_root, "*", "bin"),
-            os.path.join(nvidia_root, "*", "Library", "bin"),
+            os.path.join(site_packages, "nvidia", "*", "bin"),
+            os.path.join(site_packages, "nvidia", "*", "Library", "bin"),
        ):
            for nv_dir in _glob.glob(pattern):
                if os.path.isdir(nv_dir):
                    out.append(nv_dir)
+        torch_lib = os.path.join(site_packages, "torch", "lib")
+        if os.path.isdir(torch_lib):
+            out.append(torch_lib)
        return out

    @staticmethod
--- a/studio/backend/tests/test_llama_cpp_windows_nvidia_path.py
+++ b/studio/backend/tests/test_llama_cpp_windows_nvidia_path.py
@ -123,12 +123,53 @@ class TestWindowsPipNvidiaDllDirs:
        result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
        assert len(result) == 4

-    def test_does_not_walk_outside_nvidia(self, tmp_path):
-        # Ensure unrelated site-packages contents are not picked up.
+    def test_does_not_walk_outside_known_paths(self, tmp_path):
+        # Only nvidia/<pkg>/{bin,Library/bin} and torch/lib are picked
+        # up. Unrelated site-packages contents (numpy, scipy, ...) must
+        # be ignored.
        site = tmp_path / "Lib" / "site-packages"
-        (site / "torch" / "lib").mkdir(parents = True)
-        (site / "torch" / "lib" / "stub.dll").write_bytes(b"")
        (site / "numpy").mkdir(parents = True)
+        (site / "scipy" / "linalg").mkdir(parents = True)
+        result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
+        assert result == []
+
+    def test_picks_up_torch_lib(self, tmp_path):
+        # PyTorch's Windows CUDA wheel bundles cudart64_X.dll /
+        # cublas64_X.dll directly under Lib/site-packages/torch/lib/
+        # instead of as separate nvidia-* wheels. Without this, users
+        # on torch-bundled-CUDA installs still hit #5106.
+        torch_lib = tmp_path / "Lib" / "site-packages" / "torch" / "lib"
+        torch_lib.mkdir(parents = True)
+        (torch_lib / "cudart64_12.dll").write_bytes(b"")
+        result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
+        assert len(result) == 1
+        assert Path(result[0]) == torch_lib
+
+    def test_torch_lib_combined_with_nvidia_wheels(self, tmp_path):
+        # Both modular nvidia-* wheels and torch/lib are returned when
+        # present together.
+        _make_nvidia_layout(
+            tmp_path,
+            {
+                "cuda_runtime": "bin",
+                "cublas": "bin",
+            },
+        )
+        torch_lib = tmp_path / "Lib" / "site-packages" / "torch" / "lib"
+        torch_lib.mkdir(parents = True)
+        (torch_lib / "cudart64_13.dll").write_bytes(b"")
+        result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
+        assert len(result) == 3
+        names = {Path(p).name for p in result}
+        assert names == {"bin", "lib"}
+        assert any(Path(p) == torch_lib for p in result)
+
+    def test_torch_lib_must_be_a_directory(self, tmp_path):
+        # If torch/lib exists as a file (broken install), it is
+        # ignored, not returned.
+        site = tmp_path / "Lib" / "site-packages" / "torch"
+        site.mkdir(parents = True)
+        (site / "lib").write_bytes(b"not a dir")
        result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
        assert result == []