Studio: refresh installs that pre-date the paired cudart bundle

expected_install_fingerprint did not hash the new runtime_name / runtime_sha256 fields, and runtime_payload_health_groups for windows- cuda only checked llama.dll / ggml-cuda.dll. The combination meant that an install made before this PR -- the exact installs reporting #5106 -- would still match the post-PR choice: same main asset name + sha, same llama.dll, same ggml-cuda.dll, missing cudart64_*.dll, but existing_install_matches_choice returned True and the cudart download path in install_from_archives never ran. Fresh installs got the fix; existing affected installs did not. This commit: * Adds runtime_asset and runtime_sha256 to the fingerprint payload so any change to (or first introduction of) the cudart pair invalidates pre-existing installs. * Refactors write_prebuilt_metadata to call expected_install_fingerprint so the recorded fingerprint cannot drift from the expected one when new keys are added. * Extends runtime_payload_health_groups for windows-cuda to require cudart64_*.dll and cublas64_*.dll *only when the choice carries a paired runtime archive*. Gating on choice.runtime_name keeps the no-pair fallback path (manifest missing cudart hash, upstream without paired bundle) from looping on reinstall. New tests: * test_existing_install_matches_plan_windows_cuda_paired_requires_cudart -- paired choice rejects installs missing cudart / cublas. * test_existing_install_matches_plan_windows_cuda_unpaired_skips_cudart_check -- unpaired choice still accepts legacy cudart-less installs. * test_existing_install_fingerprint_changes_when_cudart_pair_added -- direct fingerprint mismatch between the legacy and paired choice. Refs #5106
2026-05-20 00:51:36 +00:00 · 2026-05-11 10:55:47 +00:00 · 2026-05-11 10:55:47 +00:00 · 526894a4a5
commit 526894a4a5
parent cf7179ae1b
2 changed files with 353 additions and 19 deletions
--- a/studio/install_llama_prebuilt.py
+++ b/studio/install_llama_prebuilt.py
@ -4993,24 +4993,21 @@ def write_prebuilt_metadata(
        approved_checksums,
        llama_tag,
    )
-    fingerprint_payload = {
-        "published_repo": approved_checksums.repo,
-        "release_tag": release_tag,
-        "upstream_tag": llama_tag,
-        "asset": choice.name,
-        "asset_sha256": choice.expected_sha256,
-        "source": choice.source_label,
-        "source_asset": source_asset_name,
-        "source_sha256": source_sha256,
-        "runtime_line": choice.runtime_line,
-        "bundle_profile": choice.bundle_profile,
-        "coverage_class": choice.coverage_class,
-    }
-    fingerprint = hashlib.sha256(
-        json.dumps(fingerprint_payload, sort_keys = True, separators = (",", ":")).encode(
-            "utf-8"
+    # expected_install_fingerprint is the source of truth for what the
+    # fingerprint must contain. Calling it here -- instead of inlining a
+    # parallel payload -- prevents drift where new keys (e.g. the cudart
+    # pair fields added for #5106) are added to one side but not the
+    # other, which would cause every install to look stale.
+    fingerprint = expected_install_fingerprint(
+        llama_tag = llama_tag,
+        release_tag = release_tag,
+        choice = choice,
+        approved_checksums = approved_checksums,
+    )
+    if fingerprint is None:
+        raise PrebuiltFallback(
+            f"cannot compute install fingerprint for {choice.name}"
        )
-    ).hexdigest()
    metadata = {
        "requested_tag": requested_tag,
        "tag": llama_tag,
@ -5061,6 +5058,14 @@ def expected_install_fingerprint(
        "source_asset": source_asset_name,
        "source_sha256": source_sha256,
        "runtime_line": choice.runtime_line,
+        # Including the paired runtime archive (Windows cudart bundle)
+        # in the fingerprint is what forces existing #5106 installs to
+        # refresh: pre-PR installs hashed nothing in this slot, post-PR
+        # paired installs hash the cudart sha. Without these two keys
+        # an existing cudart-less install would keep matching the new
+        # choice and never re-overlay the cudart DLLs.
+        "runtime_asset": choice.runtime_name,
+        "runtime_sha256": choice.runtime_sha256,
        "bundle_profile": choice.bundle_profile,
        "coverage_class": choice.coverage_class,
    }
@ -5121,7 +5126,16 @@ def runtime_payload_health_groups(choice: AssetChoice) -> list[list[str]]:
    if choice.install_kind == "windows-cpu":
        return [["llama.dll"]]
    if choice.install_kind == "windows-cuda":
-        return [["llama.dll"], ["ggml-cuda.dll"]]
+        groups = [["llama.dll"], ["ggml-cuda.dll"]]
+        # When the cudart bundle was paired in (#5106) require its
+        # DLLs alongside the main archive's payload. install_kind alone
+        # is not enough -- legacy installs without the cudart pair must
+        # still pass the health check on the no-pair fallback path,
+        # otherwise pair-less builds would loop on reinstall forever.
+        if choice.runtime_name:
+            groups.append(["cudart64_*.dll"])
+            groups.append(["cublas64_*.dll"])
+        return groups
    if choice.install_kind == "windows-hip":
        return [["llama.dll"], ["*hip*.dll"]]
    return []
--- a/tests/studio/install/test_install_llama_prebuilt_logic.py
+++ b/tests/studio/install/test_install_llama_prebuilt_logic.py
@ -769,7 +769,11 @@ def write_linux_install_shape(install_dir: Path) -> None:


 def write_windows_install_shape(
-    install_dir: Path, *, include_llama_dll: bool = True, include_cuda_dll: bool = False
+    install_dir: Path,
+    *,
+    include_llama_dll: bool = True,
+    include_cuda_dll: bool = False,
+    include_cudart_dlls: bool = False,
 ) -> None:
    runtime_dir = install_dir / "build" / "bin" / "Release"
    runtime_dir.mkdir(parents = True, exist_ok = True)
@ -779,6 +783,11 @@ def write_windows_install_shape(
        (runtime_dir / "llama.dll").write_bytes(b"DLL")
    if include_cuda_dll:
        (runtime_dir / "ggml-cuda.dll").write_bytes(b"DLL")
+    if include_cudart_dlls:
+        # cudart bundle DLLs that ship in cudart-llama-bin-win-cuda-*-x64.zip
+        (runtime_dir / "cudart64_12.dll").write_bytes(b"DLL")
+        (runtime_dir / "cublas64_12.dll").write_bytes(b"DLL")
+        (runtime_dir / "cublasLt64_12.dll").write_bytes(b"DLL")
    (install_dir / "convert_hf_to_gguf.py").write_text(
        "#!/usr/bin/env python3\n", encoding = "utf-8"
    )
@ -1153,6 +1162,317 @@ def test_existing_install_matches_plan_windows_cuda_requires_cuda_dll(tmp_path:
    assert existing_install_matches_plan(install_dir, host, plan) is False


+def test_existing_install_matches_plan_windows_cuda_paired_requires_cudart(
+    tmp_path: Path,
+):
+    """When the choice ships a paired cudart bundle (#5106), the install
+    is considered stale unless cudart64_*.dll and cublas64_*.dll are
+    actually on disk. Otherwise existing broken installs would keep
+    matching and skip the reinstall that drops cudart in."""
+    install_dir = tmp_path / "llama.cpp"
+    install_dir.mkdir()
+    write_windows_install_shape(
+        install_dir,
+        include_llama_dll = True,
+        include_cuda_dll = True,
+        include_cudart_dlls = True,
+    )
+
+    host = HostInfo(
+        system = "Windows",
+        machine = "AMD64",
+        is_windows = True,
+        is_linux = False,
+        is_macos = False,
+        is_x86_64 = True,
+        is_arm64 = False,
+        nvidia_smi = None,
+        driver_cuda_version = (12, 4),
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = True,
+    )
+    choice = AssetChoice(
+        repo = "unslothai/llama.cpp",
+        tag = "release-1",
+        name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
+        url = "https://example.com/x.zip",
+        source_label = "published",
+        install_kind = "windows-cuda",
+        runtime_line = "cuda12",
+        expected_sha256 = "a" * 64,
+        runtime_name = "cudart-llama-bin-win-cuda-12.4-x64.zip",
+        runtime_url = "https://example.com/cudart.zip",
+        runtime_sha256 = "c" * 64,
+    )
+    checksums = ApprovedReleaseChecksums(
+        repo = "unslothai/llama.cpp",
+        release_tag = "release-1",
+        upstream_tag = "b9001",
+        source_commit = "deadbeef",
+        artifacts = {
+            source_archive_logical_name("b9001"): ApprovedArtifactHash(
+                asset_name = source_archive_logical_name("b9001"),
+                sha256 = "b" * 64,
+                repo = "ggml-org/llama.cpp",
+                kind = "upstream-source",
+            ),
+            choice.name: ApprovedArtifactHash(
+                asset_name = choice.name,
+                sha256 = choice.expected_sha256,
+                repo = "unslothai/llama.cpp",
+                kind = "prebuilt",
+            ),
+            choice.runtime_name: ApprovedArtifactHash(
+                asset_name = choice.runtime_name,
+                sha256 = choice.runtime_sha256,
+                repo = "unslothai/llama.cpp",
+                kind = "prebuilt",
+            ),
+        },
+    )
+    plan = INSTALL_LLAMA_PREBUILT.InstallReleasePlan(
+        requested_tag = "latest",
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        attempts = [choice],
+        approved_checksums = checksums,
+    )
+    write_prebuilt_metadata(
+        install_dir,
+        requested_tag = "latest",
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        choice = choice,
+        approved_checksums = checksums,
+        prebuilt_fallback_used = False,
+    )
+
+    # Fully populated install (main archive + cudart DLLs) matches.
+    assert existing_install_matches_plan(install_dir, host, plan) is True
+
+    # cublas missing -- stale, must reinstall.
+    (install_dir / "build" / "bin" / "Release" / "cublas64_12.dll").unlink()
+    assert existing_install_matches_plan(install_dir, host, plan) is False
+
+    # cudart missing -- stale, must reinstall.
+    write_windows_install_shape(
+        install_dir,
+        include_llama_dll = True,
+        include_cuda_dll = True,
+        include_cudart_dlls = True,
+    )
+    (install_dir / "build" / "bin" / "Release" / "cudart64_12.dll").unlink()
+    assert existing_install_matches_plan(install_dir, host, plan) is False
+
+
+def test_existing_install_matches_plan_windows_cuda_unpaired_skips_cudart_check(
+    tmp_path: Path,
+):
+    """If the choice has no paired runtime archive (manifest dropped it,
+    or upstream did not ship cudart), legacy installs without cudart on
+    disk must still pass the health check -- otherwise the installer
+    would loop on reinstall forever because install_from_archives has no
+    cudart source to drop in."""
+    install_dir = tmp_path / "llama.cpp"
+    install_dir.mkdir()
+    write_windows_install_shape(
+        install_dir,
+        include_llama_dll = True,
+        include_cuda_dll = True,
+        include_cudart_dlls = False,
+    )
+
+    host = HostInfo(
+        system = "Windows",
+        machine = "AMD64",
+        is_windows = True,
+        is_linux = False,
+        is_macos = False,
+        is_x86_64 = True,
+        is_arm64 = False,
+        nvidia_smi = None,
+        driver_cuda_version = (12, 4),
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = True,
+    )
+    choice = AssetChoice(
+        repo = "unslothai/llama.cpp",
+        tag = "release-1",
+        name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
+        url = "https://example.com/x.zip",
+        source_label = "published",
+        install_kind = "windows-cuda",
+        runtime_line = "cuda12",
+        expected_sha256 = "a" * 64,
+    )
+    checksums = ApprovedReleaseChecksums(
+        repo = "unslothai/llama.cpp",
+        release_tag = "release-1",
+        upstream_tag = "b9001",
+        source_commit = "deadbeef",
+        artifacts = {
+            source_archive_logical_name("b9001"): ApprovedArtifactHash(
+                asset_name = source_archive_logical_name("b9001"),
+                sha256 = "b" * 64,
+                repo = "ggml-org/llama.cpp",
+                kind = "upstream-source",
+            ),
+            choice.name: ApprovedArtifactHash(
+                asset_name = choice.name,
+                sha256 = choice.expected_sha256,
+                repo = "unslothai/llama.cpp",
+                kind = "prebuilt",
+            ),
+        },
+    )
+    plan = INSTALL_LLAMA_PREBUILT.InstallReleasePlan(
+        requested_tag = "latest",
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        attempts = [choice],
+        approved_checksums = checksums,
+    )
+    write_prebuilt_metadata(
+        install_dir,
+        requested_tag = "latest",
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        choice = choice,
+        approved_checksums = checksums,
+        prebuilt_fallback_used = False,
+    )
+
+    assert existing_install_matches_plan(install_dir, host, plan) is True
+
+
+def test_existing_install_fingerprint_changes_when_cudart_pair_added(
+    tmp_path: Path,
+):
+    """Existing pre-#5322 Windows CUDA installs (no paired cudart) must
+    be treated as stale once the choice gains a runtime archive,
+    otherwise the fingerprint match would keep skipping the reinstall
+    that drops the cudart DLLs in. This is the install-cache half of the
+    #5106 fix -- the health-check half lives in the test above."""
+    install_dir = tmp_path / "llama.cpp"
+    install_dir.mkdir()
+    write_windows_install_shape(
+        install_dir,
+        include_llama_dll = True,
+        include_cuda_dll = True,
+        include_cudart_dlls = False,
+    )
+
+    host = HostInfo(
+        system = "Windows",
+        machine = "AMD64",
+        is_windows = True,
+        is_linux = False,
+        is_macos = False,
+        is_x86_64 = True,
+        is_arm64 = False,
+        nvidia_smi = None,
+        driver_cuda_version = (12, 4),
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = True,
+    )
+    legacy_choice = AssetChoice(
+        repo = "unslothai/llama.cpp",
+        tag = "release-1",
+        name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
+        url = "https://example.com/x.zip",
+        source_label = "published",
+        install_kind = "windows-cuda",
+        runtime_line = "cuda12",
+        expected_sha256 = "a" * 64,
+    )
+    paired_choice = AssetChoice(
+        repo = "unslothai/llama.cpp",
+        tag = "release-1",
+        name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
+        url = "https://example.com/x.zip",
+        source_label = "published",
+        install_kind = "windows-cuda",
+        runtime_line = "cuda12",
+        expected_sha256 = "a" * 64,
+        runtime_name = "cudart-llama-bin-win-cuda-12.4-x64.zip",
+        runtime_url = "https://example.com/cudart.zip",
+        runtime_sha256 = "c" * 64,
+    )
+    checksums = ApprovedReleaseChecksums(
+        repo = "unslothai/llama.cpp",
+        release_tag = "release-1",
+        upstream_tag = "b9001",
+        source_commit = "deadbeef",
+        artifacts = {
+            source_archive_logical_name("b9001"): ApprovedArtifactHash(
+                asset_name = source_archive_logical_name("b9001"),
+                sha256 = "b" * 64,
+                repo = "ggml-org/llama.cpp",
+                kind = "upstream-source",
+            ),
+            legacy_choice.name: ApprovedArtifactHash(
+                asset_name = legacy_choice.name,
+                sha256 = legacy_choice.expected_sha256,
+                repo = "unslothai/llama.cpp",
+                kind = "prebuilt",
+            ),
+            paired_choice.runtime_name: ApprovedArtifactHash(
+                asset_name = paired_choice.runtime_name,
+                sha256 = paired_choice.runtime_sha256,
+                repo = "unslothai/llama.cpp",
+                kind = "prebuilt",
+            ),
+        },
+    )
+
+    # Install metadata was written for the legacy (no-pair) choice.
+    write_prebuilt_metadata(
+        install_dir,
+        requested_tag = "latest",
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        choice = legacy_choice,
+        approved_checksums = checksums,
+        prebuilt_fallback_used = False,
+    )
+
+    # New plan offers the paired choice -- fingerprint must differ so
+    # the install is refreshed. The health check would also catch this
+    # because cudart64_*.dll is missing on disk; we test the fingerprint
+    # half explicitly by comparing the two fingerprints directly.
+    legacy_fingerprint = INSTALL_LLAMA_PREBUILT.expected_install_fingerprint(
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        choice = legacy_choice,
+        approved_checksums = checksums,
+    )
+    paired_fingerprint = INSTALL_LLAMA_PREBUILT.expected_install_fingerprint(
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        choice = paired_choice,
+        approved_checksums = checksums,
+    )
+    assert legacy_fingerprint != paired_fingerprint, (
+        "expected_install_fingerprint must hash runtime_name/runtime_sha256 "
+        "so pre-#5322 installs are not falsely considered up-to-date"
+    )
+
+    paired_plan = INSTALL_LLAMA_PREBUILT.InstallReleasePlan(
+        requested_tag = "latest",
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        attempts = [paired_choice],
+        approved_checksums = checksums,
+    )
+    assert existing_install_matches_plan(install_dir, host, paired_plan) is False
+
+
 def test_existing_install_matches_plan_macos_requires_dylibs(tmp_path: Path):
    install_dir = tmp_path / "llama.cpp"
    install_dir.mkdir()