Studio: also scan torch/lib in Windows pip nvidia DLL resolver

PyTorch's Windows CUDA wheels frequently bundle cudart64_X.dll and
cublas64_X.dll directly under Lib/site-packages/torch/lib/ instead of
shipping separate nvidia-cuda-runtime-cuXX / nvidia-cublas-cuXX wheels.
On those installs _windows_pip_nvidia_dll_dirs previously returned
nothing useful, and llama-server.exe fell back to needing a system CUDA
toolkit on PATH -- the original #5106 failure mode.

The install-side equivalent python_runtime_dirs in
install_llama_prebuilt.py already treats torch/lib as a Python runtime
DLL source for the same reason. Bring the runtime resolver in parity
so torch-bundled-CUDA installs find their cudart at llama-server start.

Updates the existing test that codified the bug (asserted torch/lib was
excluded), and adds three new cases: pickup, combined-with-nvidia, and
the must-be-a-directory guard.
This commit is contained in:
Daniel Han 2026-05-11 10:52:02 +00:00
parent afb1f7cf59
commit c1c8a074de
2 changed files with 66 additions and 13 deletions

View file

@ -958,23 +958,35 @@ class LlamaCppBackend:
@staticmethod
def _windows_pip_nvidia_dll_dirs(prefix: str) -> list[str]:
"""Return DLL dirs from pip-installed nvidia wheels under
``<prefix>/Lib/site-packages/nvidia/`` so llama-server.exe can
load cudart64_X.dll / cublas64_X.dll without a system CUDA
toolkit. Mirrors the Linux nvidia/cu*/lib LD_LIBRARY_PATH
block. Wheel layouts vary, so we cover the two seen patterns:
``nvidia/<pkg>/bin`` and ``nvidia/<pkg>/Library/bin``."""
"""Return DLL dirs from pip-installed CUDA wheels under
``<prefix>/Lib/site-packages/`` so llama-server.exe can load
``cudart64_X.dll`` / ``cublas64_X.dll`` without a system CUDA
toolkit. Mirrors the Linux ``nvidia/cu*/lib`` LD_LIBRARY_PATH
block, with parity for two additional Windows-specific layouts.
Covered patterns:
* ``nvidia/<pkg>/bin`` -- modular ``nvidia-cuda-runtime-cuXX``,
``nvidia-cublas-cuXX`` wheels.
* ``nvidia/<pkg>/Library/bin`` -- conda-style wheel repacks.
* ``torch/lib`` -- PyTorch's own CUDA-bundled Windows wheel,
which ships ``cudart64_*.dll`` directly under ``torch/lib``
instead of as separate ``nvidia-*`` wheels (#5106). The
install-side equivalent ``python_runtime_dirs`` in
``install_llama_prebuilt.py`` covers this path for the same
reason."""
import glob as _glob
nvidia_root = os.path.join(prefix, "Lib", "site-packages", "nvidia")
site_packages = os.path.join(prefix, "Lib", "site-packages")
out: list[str] = []
for pattern in (
os.path.join(nvidia_root, "*", "bin"),
os.path.join(nvidia_root, "*", "Library", "bin"),
os.path.join(site_packages, "nvidia", "*", "bin"),
os.path.join(site_packages, "nvidia", "*", "Library", "bin"),
):
for nv_dir in _glob.glob(pattern):
if os.path.isdir(nv_dir):
out.append(nv_dir)
torch_lib = os.path.join(site_packages, "torch", "lib")
if os.path.isdir(torch_lib):
out.append(torch_lib)
return out
@staticmethod

View file

@ -123,12 +123,53 @@ class TestWindowsPipNvidiaDllDirs:
result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
assert len(result) == 4
def test_does_not_walk_outside_nvidia(self, tmp_path):
# Ensure unrelated site-packages contents are not picked up.
def test_does_not_walk_outside_known_paths(self, tmp_path):
# Only nvidia/<pkg>/{bin,Library/bin} and torch/lib are picked
# up. Unrelated site-packages contents (numpy, scipy, ...) must
# be ignored.
site = tmp_path / "Lib" / "site-packages"
(site / "torch" / "lib").mkdir(parents = True)
(site / "torch" / "lib" / "stub.dll").write_bytes(b"")
(site / "numpy").mkdir(parents = True)
(site / "scipy" / "linalg").mkdir(parents = True)
result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
assert result == []
def test_picks_up_torch_lib(self, tmp_path):
# PyTorch's Windows CUDA wheel bundles cudart64_X.dll /
# cublas64_X.dll directly under Lib/site-packages/torch/lib/
# instead of as separate nvidia-* wheels. Without this, users
# on torch-bundled-CUDA installs still hit #5106.
torch_lib = tmp_path / "Lib" / "site-packages" / "torch" / "lib"
torch_lib.mkdir(parents = True)
(torch_lib / "cudart64_12.dll").write_bytes(b"")
result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
assert len(result) == 1
assert Path(result[0]) == torch_lib
def test_torch_lib_combined_with_nvidia_wheels(self, tmp_path):
# Both modular nvidia-* wheels and torch/lib are returned when
# present together.
_make_nvidia_layout(
tmp_path,
{
"cuda_runtime": "bin",
"cublas": "bin",
},
)
torch_lib = tmp_path / "Lib" / "site-packages" / "torch" / "lib"
torch_lib.mkdir(parents = True)
(torch_lib / "cudart64_13.dll").write_bytes(b"")
result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
assert len(result) == 3
names = {Path(p).name for p in result}
assert names == {"bin", "lib"}
assert any(Path(p) == torch_lib for p in result)
def test_torch_lib_must_be_a_directory(self, tmp_path):
# If torch/lib exists as a file (broken install), it is
# ignored, not returned.
site = tmp_path / "Lib" / "site-packages" / "torch"
site.mkdir(parents = True)
(site / "lib").write_bytes(b"not a dir")
result = LlamaCppBackend._windows_pip_nvidia_dll_dirs(str(tmp_path))
assert result == []