Studio: refresh installs that pre-date the paired cudart bundle

expected_install_fingerprint did not hash the new runtime_name /
runtime_sha256 fields, and runtime_payload_health_groups for windows-
cuda only checked llama.dll / ggml-cuda.dll. The combination meant that
an install made before this PR -- the exact installs reporting #5106 --
would still match the post-PR choice: same main asset name + sha, same
llama.dll, same ggml-cuda.dll, missing cudart64_*.dll, but
existing_install_matches_choice returned True and the cudart download
path in install_from_archives never ran. Fresh installs got the fix;
existing affected installs did not.

This commit:
 * Adds runtime_asset and runtime_sha256 to the fingerprint payload so
   any change to (or first introduction of) the cudart pair invalidates
   pre-existing installs.
 * Refactors write_prebuilt_metadata to call expected_install_fingerprint
   so the recorded fingerprint cannot drift from the expected one when
   new keys are added.
 * Extends runtime_payload_health_groups for windows-cuda to require
   cudart64_*.dll and cublas64_*.dll *only when the choice carries a
   paired runtime archive*. Gating on choice.runtime_name keeps the
   no-pair fallback path (manifest missing cudart hash, upstream
   without paired bundle) from looping on reinstall.

New tests:
 * test_existing_install_matches_plan_windows_cuda_paired_requires_cudart
   -- paired choice rejects installs missing cudart / cublas.
 * test_existing_install_matches_plan_windows_cuda_unpaired_skips_cudart_check
   -- unpaired choice still accepts legacy cudart-less installs.
 * test_existing_install_fingerprint_changes_when_cudart_pair_added
   -- direct fingerprint mismatch between the legacy and paired choice.

Refs #5106
This commit is contained in:
Daniel Han 2026-05-11 10:55:47 +00:00
parent cf7179ae1b
commit 526894a4a5
2 changed files with 353 additions and 19 deletions

View file

@ -4993,24 +4993,21 @@ def write_prebuilt_metadata(
approved_checksums,
llama_tag,
)
fingerprint_payload = {
"published_repo": approved_checksums.repo,
"release_tag": release_tag,
"upstream_tag": llama_tag,
"asset": choice.name,
"asset_sha256": choice.expected_sha256,
"source": choice.source_label,
"source_asset": source_asset_name,
"source_sha256": source_sha256,
"runtime_line": choice.runtime_line,
"bundle_profile": choice.bundle_profile,
"coverage_class": choice.coverage_class,
}
fingerprint = hashlib.sha256(
json.dumps(fingerprint_payload, sort_keys = True, separators = (",", ":")).encode(
"utf-8"
# expected_install_fingerprint is the source of truth for what the
# fingerprint must contain. Calling it here -- instead of inlining a
# parallel payload -- prevents drift where new keys (e.g. the cudart
# pair fields added for #5106) are added to one side but not the
# other, which would cause every install to look stale.
fingerprint = expected_install_fingerprint(
llama_tag = llama_tag,
release_tag = release_tag,
choice = choice,
approved_checksums = approved_checksums,
)
if fingerprint is None:
raise PrebuiltFallback(
f"cannot compute install fingerprint for {choice.name}"
)
).hexdigest()
metadata = {
"requested_tag": requested_tag,
"tag": llama_tag,
@ -5061,6 +5058,14 @@ def expected_install_fingerprint(
"source_asset": source_asset_name,
"source_sha256": source_sha256,
"runtime_line": choice.runtime_line,
# Including the paired runtime archive (Windows cudart bundle)
# in the fingerprint is what forces existing #5106 installs to
# refresh: pre-PR installs hashed nothing in this slot, post-PR
# paired installs hash the cudart sha. Without these two keys
# an existing cudart-less install would keep matching the new
# choice and never re-overlay the cudart DLLs.
"runtime_asset": choice.runtime_name,
"runtime_sha256": choice.runtime_sha256,
"bundle_profile": choice.bundle_profile,
"coverage_class": choice.coverage_class,
}
@ -5121,7 +5126,16 @@ def runtime_payload_health_groups(choice: AssetChoice) -> list[list[str]]:
if choice.install_kind == "windows-cpu":
return [["llama.dll"]]
if choice.install_kind == "windows-cuda":
return [["llama.dll"], ["ggml-cuda.dll"]]
groups = [["llama.dll"], ["ggml-cuda.dll"]]
# When the cudart bundle was paired in (#5106) require its
# DLLs alongside the main archive's payload. install_kind alone
# is not enough -- legacy installs without the cudart pair must
# still pass the health check on the no-pair fallback path,
# otherwise pair-less builds would loop on reinstall forever.
if choice.runtime_name:
groups.append(["cudart64_*.dll"])
groups.append(["cublas64_*.dll"])
return groups
if choice.install_kind == "windows-hip":
return [["llama.dll"], ["*hip*.dll"]]
return []

View file

@ -769,7 +769,11 @@ def write_linux_install_shape(install_dir: Path) -> None:
def write_windows_install_shape(
install_dir: Path, *, include_llama_dll: bool = True, include_cuda_dll: bool = False
install_dir: Path,
*,
include_llama_dll: bool = True,
include_cuda_dll: bool = False,
include_cudart_dlls: bool = False,
) -> None:
runtime_dir = install_dir / "build" / "bin" / "Release"
runtime_dir.mkdir(parents = True, exist_ok = True)
@ -779,6 +783,11 @@ def write_windows_install_shape(
(runtime_dir / "llama.dll").write_bytes(b"DLL")
if include_cuda_dll:
(runtime_dir / "ggml-cuda.dll").write_bytes(b"DLL")
if include_cudart_dlls:
# cudart bundle DLLs that ship in cudart-llama-bin-win-cuda-*-x64.zip
(runtime_dir / "cudart64_12.dll").write_bytes(b"DLL")
(runtime_dir / "cublas64_12.dll").write_bytes(b"DLL")
(runtime_dir / "cublasLt64_12.dll").write_bytes(b"DLL")
(install_dir / "convert_hf_to_gguf.py").write_text(
"#!/usr/bin/env python3\n", encoding = "utf-8"
)
@ -1153,6 +1162,317 @@ def test_existing_install_matches_plan_windows_cuda_requires_cuda_dll(tmp_path:
assert existing_install_matches_plan(install_dir, host, plan) is False
def test_existing_install_matches_plan_windows_cuda_paired_requires_cudart(
tmp_path: Path,
):
"""When the choice ships a paired cudart bundle (#5106), the install
is considered stale unless cudart64_*.dll and cublas64_*.dll are
actually on disk. Otherwise existing broken installs would keep
matching and skip the reinstall that drops cudart in."""
install_dir = tmp_path / "llama.cpp"
install_dir.mkdir()
write_windows_install_shape(
install_dir,
include_llama_dll = True,
include_cuda_dll = True,
include_cudart_dlls = True,
)
host = HostInfo(
system = "Windows",
machine = "AMD64",
is_windows = True,
is_linux = False,
is_macos = False,
is_x86_64 = True,
is_arm64 = False,
nvidia_smi = None,
driver_cuda_version = (12, 4),
compute_caps = [],
visible_cuda_devices = None,
has_physical_nvidia = False,
has_usable_nvidia = True,
)
choice = AssetChoice(
repo = "unslothai/llama.cpp",
tag = "release-1",
name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
url = "https://example.com/x.zip",
source_label = "published",
install_kind = "windows-cuda",
runtime_line = "cuda12",
expected_sha256 = "a" * 64,
runtime_name = "cudart-llama-bin-win-cuda-12.4-x64.zip",
runtime_url = "https://example.com/cudart.zip",
runtime_sha256 = "c" * 64,
)
checksums = ApprovedReleaseChecksums(
repo = "unslothai/llama.cpp",
release_tag = "release-1",
upstream_tag = "b9001",
source_commit = "deadbeef",
artifacts = {
source_archive_logical_name("b9001"): ApprovedArtifactHash(
asset_name = source_archive_logical_name("b9001"),
sha256 = "b" * 64,
repo = "ggml-org/llama.cpp",
kind = "upstream-source",
),
choice.name: ApprovedArtifactHash(
asset_name = choice.name,
sha256 = choice.expected_sha256,
repo = "unslothai/llama.cpp",
kind = "prebuilt",
),
choice.runtime_name: ApprovedArtifactHash(
asset_name = choice.runtime_name,
sha256 = choice.runtime_sha256,
repo = "unslothai/llama.cpp",
kind = "prebuilt",
),
},
)
plan = INSTALL_LLAMA_PREBUILT.InstallReleasePlan(
requested_tag = "latest",
llama_tag = "b9001",
release_tag = "release-1",
attempts = [choice],
approved_checksums = checksums,
)
write_prebuilt_metadata(
install_dir,
requested_tag = "latest",
llama_tag = "b9001",
release_tag = "release-1",
choice = choice,
approved_checksums = checksums,
prebuilt_fallback_used = False,
)
# Fully populated install (main archive + cudart DLLs) matches.
assert existing_install_matches_plan(install_dir, host, plan) is True
# cublas missing -- stale, must reinstall.
(install_dir / "build" / "bin" / "Release" / "cublas64_12.dll").unlink()
assert existing_install_matches_plan(install_dir, host, plan) is False
# cudart missing -- stale, must reinstall.
write_windows_install_shape(
install_dir,
include_llama_dll = True,
include_cuda_dll = True,
include_cudart_dlls = True,
)
(install_dir / "build" / "bin" / "Release" / "cudart64_12.dll").unlink()
assert existing_install_matches_plan(install_dir, host, plan) is False
def test_existing_install_matches_plan_windows_cuda_unpaired_skips_cudart_check(
tmp_path: Path,
):
"""If the choice has no paired runtime archive (manifest dropped it,
or upstream did not ship cudart), legacy installs without cudart on
disk must still pass the health check -- otherwise the installer
would loop on reinstall forever because install_from_archives has no
cudart source to drop in."""
install_dir = tmp_path / "llama.cpp"
install_dir.mkdir()
write_windows_install_shape(
install_dir,
include_llama_dll = True,
include_cuda_dll = True,
include_cudart_dlls = False,
)
host = HostInfo(
system = "Windows",
machine = "AMD64",
is_windows = True,
is_linux = False,
is_macos = False,
is_x86_64 = True,
is_arm64 = False,
nvidia_smi = None,
driver_cuda_version = (12, 4),
compute_caps = [],
visible_cuda_devices = None,
has_physical_nvidia = False,
has_usable_nvidia = True,
)
choice = AssetChoice(
repo = "unslothai/llama.cpp",
tag = "release-1",
name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
url = "https://example.com/x.zip",
source_label = "published",
install_kind = "windows-cuda",
runtime_line = "cuda12",
expected_sha256 = "a" * 64,
)
checksums = ApprovedReleaseChecksums(
repo = "unslothai/llama.cpp",
release_tag = "release-1",
upstream_tag = "b9001",
source_commit = "deadbeef",
artifacts = {
source_archive_logical_name("b9001"): ApprovedArtifactHash(
asset_name = source_archive_logical_name("b9001"),
sha256 = "b" * 64,
repo = "ggml-org/llama.cpp",
kind = "upstream-source",
),
choice.name: ApprovedArtifactHash(
asset_name = choice.name,
sha256 = choice.expected_sha256,
repo = "unslothai/llama.cpp",
kind = "prebuilt",
),
},
)
plan = INSTALL_LLAMA_PREBUILT.InstallReleasePlan(
requested_tag = "latest",
llama_tag = "b9001",
release_tag = "release-1",
attempts = [choice],
approved_checksums = checksums,
)
write_prebuilt_metadata(
install_dir,
requested_tag = "latest",
llama_tag = "b9001",
release_tag = "release-1",
choice = choice,
approved_checksums = checksums,
prebuilt_fallback_used = False,
)
assert existing_install_matches_plan(install_dir, host, plan) is True
def test_existing_install_fingerprint_changes_when_cudart_pair_added(
tmp_path: Path,
):
"""Existing pre-#5322 Windows CUDA installs (no paired cudart) must
be treated as stale once the choice gains a runtime archive,
otherwise the fingerprint match would keep skipping the reinstall
that drops the cudart DLLs in. This is the install-cache half of the
#5106 fix -- the health-check half lives in the test above."""
install_dir = tmp_path / "llama.cpp"
install_dir.mkdir()
write_windows_install_shape(
install_dir,
include_llama_dll = True,
include_cuda_dll = True,
include_cudart_dlls = False,
)
host = HostInfo(
system = "Windows",
machine = "AMD64",
is_windows = True,
is_linux = False,
is_macos = False,
is_x86_64 = True,
is_arm64 = False,
nvidia_smi = None,
driver_cuda_version = (12, 4),
compute_caps = [],
visible_cuda_devices = None,
has_physical_nvidia = False,
has_usable_nvidia = True,
)
legacy_choice = AssetChoice(
repo = "unslothai/llama.cpp",
tag = "release-1",
name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
url = "https://example.com/x.zip",
source_label = "published",
install_kind = "windows-cuda",
runtime_line = "cuda12",
expected_sha256 = "a" * 64,
)
paired_choice = AssetChoice(
repo = "unslothai/llama.cpp",
tag = "release-1",
name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
url = "https://example.com/x.zip",
source_label = "published",
install_kind = "windows-cuda",
runtime_line = "cuda12",
expected_sha256 = "a" * 64,
runtime_name = "cudart-llama-bin-win-cuda-12.4-x64.zip",
runtime_url = "https://example.com/cudart.zip",
runtime_sha256 = "c" * 64,
)
checksums = ApprovedReleaseChecksums(
repo = "unslothai/llama.cpp",
release_tag = "release-1",
upstream_tag = "b9001",
source_commit = "deadbeef",
artifacts = {
source_archive_logical_name("b9001"): ApprovedArtifactHash(
asset_name = source_archive_logical_name("b9001"),
sha256 = "b" * 64,
repo = "ggml-org/llama.cpp",
kind = "upstream-source",
),
legacy_choice.name: ApprovedArtifactHash(
asset_name = legacy_choice.name,
sha256 = legacy_choice.expected_sha256,
repo = "unslothai/llama.cpp",
kind = "prebuilt",
),
paired_choice.runtime_name: ApprovedArtifactHash(
asset_name = paired_choice.runtime_name,
sha256 = paired_choice.runtime_sha256,
repo = "unslothai/llama.cpp",
kind = "prebuilt",
),
},
)
# Install metadata was written for the legacy (no-pair) choice.
write_prebuilt_metadata(
install_dir,
requested_tag = "latest",
llama_tag = "b9001",
release_tag = "release-1",
choice = legacy_choice,
approved_checksums = checksums,
prebuilt_fallback_used = False,
)
# New plan offers the paired choice -- fingerprint must differ so
# the install is refreshed. The health check would also catch this
# because cudart64_*.dll is missing on disk; we test the fingerprint
# half explicitly by comparing the two fingerprints directly.
legacy_fingerprint = INSTALL_LLAMA_PREBUILT.expected_install_fingerprint(
llama_tag = "b9001",
release_tag = "release-1",
choice = legacy_choice,
approved_checksums = checksums,
)
paired_fingerprint = INSTALL_LLAMA_PREBUILT.expected_install_fingerprint(
llama_tag = "b9001",
release_tag = "release-1",
choice = paired_choice,
approved_checksums = checksums,
)
assert legacy_fingerprint != paired_fingerprint, (
"expected_install_fingerprint must hash runtime_name/runtime_sha256 "
"so pre-#5322 installs are not falsely considered up-to-date"
)
paired_plan = INSTALL_LLAMA_PREBUILT.InstallReleasePlan(
requested_tag = "latest",
llama_tag = "b9001",
release_tag = "release-1",
attempts = [paired_choice],
approved_checksums = checksums,
)
assert existing_install_matches_plan(install_dir, host, paired_plan) is False
def test_existing_install_matches_plan_macos_requires_dylibs(tmp_path: Path):
install_dir = tmp_path / "llama.cpp"
install_dir.mkdir()