mirror of
https://github.com/unslothai/unsloth.git
synced 2026-05-20 00:51:36 +00:00
Studio: refresh installs that pre-date the paired cudart bundle
expected_install_fingerprint did not hash the new runtime_name / runtime_sha256 fields, and runtime_payload_health_groups for windows- cuda only checked llama.dll / ggml-cuda.dll. The combination meant that an install made before this PR -- the exact installs reporting #5106 -- would still match the post-PR choice: same main asset name + sha, same llama.dll, same ggml-cuda.dll, missing cudart64_*.dll, but existing_install_matches_choice returned True and the cudart download path in install_from_archives never ran. Fresh installs got the fix; existing affected installs did not. This commit: * Adds runtime_asset and runtime_sha256 to the fingerprint payload so any change to (or first introduction of) the cudart pair invalidates pre-existing installs. * Refactors write_prebuilt_metadata to call expected_install_fingerprint so the recorded fingerprint cannot drift from the expected one when new keys are added. * Extends runtime_payload_health_groups for windows-cuda to require cudart64_*.dll and cublas64_*.dll *only when the choice carries a paired runtime archive*. Gating on choice.runtime_name keeps the no-pair fallback path (manifest missing cudart hash, upstream without paired bundle) from looping on reinstall. New tests: * test_existing_install_matches_plan_windows_cuda_paired_requires_cudart -- paired choice rejects installs missing cudart / cublas. * test_existing_install_matches_plan_windows_cuda_unpaired_skips_cudart_check -- unpaired choice still accepts legacy cudart-less installs. * test_existing_install_fingerprint_changes_when_cudart_pair_added -- direct fingerprint mismatch between the legacy and paired choice. Refs #5106
This commit is contained in:
parent
cf7179ae1b
commit
526894a4a5
2 changed files with 353 additions and 19 deletions
|
|
@ -4993,24 +4993,21 @@ def write_prebuilt_metadata(
|
|||
approved_checksums,
|
||||
llama_tag,
|
||||
)
|
||||
fingerprint_payload = {
|
||||
"published_repo": approved_checksums.repo,
|
||||
"release_tag": release_tag,
|
||||
"upstream_tag": llama_tag,
|
||||
"asset": choice.name,
|
||||
"asset_sha256": choice.expected_sha256,
|
||||
"source": choice.source_label,
|
||||
"source_asset": source_asset_name,
|
||||
"source_sha256": source_sha256,
|
||||
"runtime_line": choice.runtime_line,
|
||||
"bundle_profile": choice.bundle_profile,
|
||||
"coverage_class": choice.coverage_class,
|
||||
}
|
||||
fingerprint = hashlib.sha256(
|
||||
json.dumps(fingerprint_payload, sort_keys = True, separators = (",", ":")).encode(
|
||||
"utf-8"
|
||||
# expected_install_fingerprint is the source of truth for what the
|
||||
# fingerprint must contain. Calling it here -- instead of inlining a
|
||||
# parallel payload -- prevents drift where new keys (e.g. the cudart
|
||||
# pair fields added for #5106) are added to one side but not the
|
||||
# other, which would cause every install to look stale.
|
||||
fingerprint = expected_install_fingerprint(
|
||||
llama_tag = llama_tag,
|
||||
release_tag = release_tag,
|
||||
choice = choice,
|
||||
approved_checksums = approved_checksums,
|
||||
)
|
||||
if fingerprint is None:
|
||||
raise PrebuiltFallback(
|
||||
f"cannot compute install fingerprint for {choice.name}"
|
||||
)
|
||||
).hexdigest()
|
||||
metadata = {
|
||||
"requested_tag": requested_tag,
|
||||
"tag": llama_tag,
|
||||
|
|
@ -5061,6 +5058,14 @@ def expected_install_fingerprint(
|
|||
"source_asset": source_asset_name,
|
||||
"source_sha256": source_sha256,
|
||||
"runtime_line": choice.runtime_line,
|
||||
# Including the paired runtime archive (Windows cudart bundle)
|
||||
# in the fingerprint is what forces existing #5106 installs to
|
||||
# refresh: pre-PR installs hashed nothing in this slot, post-PR
|
||||
# paired installs hash the cudart sha. Without these two keys
|
||||
# an existing cudart-less install would keep matching the new
|
||||
# choice and never re-overlay the cudart DLLs.
|
||||
"runtime_asset": choice.runtime_name,
|
||||
"runtime_sha256": choice.runtime_sha256,
|
||||
"bundle_profile": choice.bundle_profile,
|
||||
"coverage_class": choice.coverage_class,
|
||||
}
|
||||
|
|
@ -5121,7 +5126,16 @@ def runtime_payload_health_groups(choice: AssetChoice) -> list[list[str]]:
|
|||
if choice.install_kind == "windows-cpu":
|
||||
return [["llama.dll"]]
|
||||
if choice.install_kind == "windows-cuda":
|
||||
return [["llama.dll"], ["ggml-cuda.dll"]]
|
||||
groups = [["llama.dll"], ["ggml-cuda.dll"]]
|
||||
# When the cudart bundle was paired in (#5106) require its
|
||||
# DLLs alongside the main archive's payload. install_kind alone
|
||||
# is not enough -- legacy installs without the cudart pair must
|
||||
# still pass the health check on the no-pair fallback path,
|
||||
# otherwise pair-less builds would loop on reinstall forever.
|
||||
if choice.runtime_name:
|
||||
groups.append(["cudart64_*.dll"])
|
||||
groups.append(["cublas64_*.dll"])
|
||||
return groups
|
||||
if choice.install_kind == "windows-hip":
|
||||
return [["llama.dll"], ["*hip*.dll"]]
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -769,7 +769,11 @@ def write_linux_install_shape(install_dir: Path) -> None:
|
|||
|
||||
|
||||
def write_windows_install_shape(
|
||||
install_dir: Path, *, include_llama_dll: bool = True, include_cuda_dll: bool = False
|
||||
install_dir: Path,
|
||||
*,
|
||||
include_llama_dll: bool = True,
|
||||
include_cuda_dll: bool = False,
|
||||
include_cudart_dlls: bool = False,
|
||||
) -> None:
|
||||
runtime_dir = install_dir / "build" / "bin" / "Release"
|
||||
runtime_dir.mkdir(parents = True, exist_ok = True)
|
||||
|
|
@ -779,6 +783,11 @@ def write_windows_install_shape(
|
|||
(runtime_dir / "llama.dll").write_bytes(b"DLL")
|
||||
if include_cuda_dll:
|
||||
(runtime_dir / "ggml-cuda.dll").write_bytes(b"DLL")
|
||||
if include_cudart_dlls:
|
||||
# cudart bundle DLLs that ship in cudart-llama-bin-win-cuda-*-x64.zip
|
||||
(runtime_dir / "cudart64_12.dll").write_bytes(b"DLL")
|
||||
(runtime_dir / "cublas64_12.dll").write_bytes(b"DLL")
|
||||
(runtime_dir / "cublasLt64_12.dll").write_bytes(b"DLL")
|
||||
(install_dir / "convert_hf_to_gguf.py").write_text(
|
||||
"#!/usr/bin/env python3\n", encoding = "utf-8"
|
||||
)
|
||||
|
|
@ -1153,6 +1162,317 @@ def test_existing_install_matches_plan_windows_cuda_requires_cuda_dll(tmp_path:
|
|||
assert existing_install_matches_plan(install_dir, host, plan) is False
|
||||
|
||||
|
||||
def test_existing_install_matches_plan_windows_cuda_paired_requires_cudart(
|
||||
tmp_path: Path,
|
||||
):
|
||||
"""When the choice ships a paired cudart bundle (#5106), the install
|
||||
is considered stale unless cudart64_*.dll and cublas64_*.dll are
|
||||
actually on disk. Otherwise existing broken installs would keep
|
||||
matching and skip the reinstall that drops cudart in."""
|
||||
install_dir = tmp_path / "llama.cpp"
|
||||
install_dir.mkdir()
|
||||
write_windows_install_shape(
|
||||
install_dir,
|
||||
include_llama_dll = True,
|
||||
include_cuda_dll = True,
|
||||
include_cudart_dlls = True,
|
||||
)
|
||||
|
||||
host = HostInfo(
|
||||
system = "Windows",
|
||||
machine = "AMD64",
|
||||
is_windows = True,
|
||||
is_linux = False,
|
||||
is_macos = False,
|
||||
is_x86_64 = True,
|
||||
is_arm64 = False,
|
||||
nvidia_smi = None,
|
||||
driver_cuda_version = (12, 4),
|
||||
compute_caps = [],
|
||||
visible_cuda_devices = None,
|
||||
has_physical_nvidia = False,
|
||||
has_usable_nvidia = True,
|
||||
)
|
||||
choice = AssetChoice(
|
||||
repo = "unslothai/llama.cpp",
|
||||
tag = "release-1",
|
||||
name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
|
||||
url = "https://example.com/x.zip",
|
||||
source_label = "published",
|
||||
install_kind = "windows-cuda",
|
||||
runtime_line = "cuda12",
|
||||
expected_sha256 = "a" * 64,
|
||||
runtime_name = "cudart-llama-bin-win-cuda-12.4-x64.zip",
|
||||
runtime_url = "https://example.com/cudart.zip",
|
||||
runtime_sha256 = "c" * 64,
|
||||
)
|
||||
checksums = ApprovedReleaseChecksums(
|
||||
repo = "unslothai/llama.cpp",
|
||||
release_tag = "release-1",
|
||||
upstream_tag = "b9001",
|
||||
source_commit = "deadbeef",
|
||||
artifacts = {
|
||||
source_archive_logical_name("b9001"): ApprovedArtifactHash(
|
||||
asset_name = source_archive_logical_name("b9001"),
|
||||
sha256 = "b" * 64,
|
||||
repo = "ggml-org/llama.cpp",
|
||||
kind = "upstream-source",
|
||||
),
|
||||
choice.name: ApprovedArtifactHash(
|
||||
asset_name = choice.name,
|
||||
sha256 = choice.expected_sha256,
|
||||
repo = "unslothai/llama.cpp",
|
||||
kind = "prebuilt",
|
||||
),
|
||||
choice.runtime_name: ApprovedArtifactHash(
|
||||
asset_name = choice.runtime_name,
|
||||
sha256 = choice.runtime_sha256,
|
||||
repo = "unslothai/llama.cpp",
|
||||
kind = "prebuilt",
|
||||
),
|
||||
},
|
||||
)
|
||||
plan = INSTALL_LLAMA_PREBUILT.InstallReleasePlan(
|
||||
requested_tag = "latest",
|
||||
llama_tag = "b9001",
|
||||
release_tag = "release-1",
|
||||
attempts = [choice],
|
||||
approved_checksums = checksums,
|
||||
)
|
||||
write_prebuilt_metadata(
|
||||
install_dir,
|
||||
requested_tag = "latest",
|
||||
llama_tag = "b9001",
|
||||
release_tag = "release-1",
|
||||
choice = choice,
|
||||
approved_checksums = checksums,
|
||||
prebuilt_fallback_used = False,
|
||||
)
|
||||
|
||||
# Fully populated install (main archive + cudart DLLs) matches.
|
||||
assert existing_install_matches_plan(install_dir, host, plan) is True
|
||||
|
||||
# cublas missing -- stale, must reinstall.
|
||||
(install_dir / "build" / "bin" / "Release" / "cublas64_12.dll").unlink()
|
||||
assert existing_install_matches_plan(install_dir, host, plan) is False
|
||||
|
||||
# cudart missing -- stale, must reinstall.
|
||||
write_windows_install_shape(
|
||||
install_dir,
|
||||
include_llama_dll = True,
|
||||
include_cuda_dll = True,
|
||||
include_cudart_dlls = True,
|
||||
)
|
||||
(install_dir / "build" / "bin" / "Release" / "cudart64_12.dll").unlink()
|
||||
assert existing_install_matches_plan(install_dir, host, plan) is False
|
||||
|
||||
|
||||
def test_existing_install_matches_plan_windows_cuda_unpaired_skips_cudart_check(
|
||||
tmp_path: Path,
|
||||
):
|
||||
"""If the choice has no paired runtime archive (manifest dropped it,
|
||||
or upstream did not ship cudart), legacy installs without cudart on
|
||||
disk must still pass the health check -- otherwise the installer
|
||||
would loop on reinstall forever because install_from_archives has no
|
||||
cudart source to drop in."""
|
||||
install_dir = tmp_path / "llama.cpp"
|
||||
install_dir.mkdir()
|
||||
write_windows_install_shape(
|
||||
install_dir,
|
||||
include_llama_dll = True,
|
||||
include_cuda_dll = True,
|
||||
include_cudart_dlls = False,
|
||||
)
|
||||
|
||||
host = HostInfo(
|
||||
system = "Windows",
|
||||
machine = "AMD64",
|
||||
is_windows = True,
|
||||
is_linux = False,
|
||||
is_macos = False,
|
||||
is_x86_64 = True,
|
||||
is_arm64 = False,
|
||||
nvidia_smi = None,
|
||||
driver_cuda_version = (12, 4),
|
||||
compute_caps = [],
|
||||
visible_cuda_devices = None,
|
||||
has_physical_nvidia = False,
|
||||
has_usable_nvidia = True,
|
||||
)
|
||||
choice = AssetChoice(
|
||||
repo = "unslothai/llama.cpp",
|
||||
tag = "release-1",
|
||||
name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
|
||||
url = "https://example.com/x.zip",
|
||||
source_label = "published",
|
||||
install_kind = "windows-cuda",
|
||||
runtime_line = "cuda12",
|
||||
expected_sha256 = "a" * 64,
|
||||
)
|
||||
checksums = ApprovedReleaseChecksums(
|
||||
repo = "unslothai/llama.cpp",
|
||||
release_tag = "release-1",
|
||||
upstream_tag = "b9001",
|
||||
source_commit = "deadbeef",
|
||||
artifacts = {
|
||||
source_archive_logical_name("b9001"): ApprovedArtifactHash(
|
||||
asset_name = source_archive_logical_name("b9001"),
|
||||
sha256 = "b" * 64,
|
||||
repo = "ggml-org/llama.cpp",
|
||||
kind = "upstream-source",
|
||||
),
|
||||
choice.name: ApprovedArtifactHash(
|
||||
asset_name = choice.name,
|
||||
sha256 = choice.expected_sha256,
|
||||
repo = "unslothai/llama.cpp",
|
||||
kind = "prebuilt",
|
||||
),
|
||||
},
|
||||
)
|
||||
plan = INSTALL_LLAMA_PREBUILT.InstallReleasePlan(
|
||||
requested_tag = "latest",
|
||||
llama_tag = "b9001",
|
||||
release_tag = "release-1",
|
||||
attempts = [choice],
|
||||
approved_checksums = checksums,
|
||||
)
|
||||
write_prebuilt_metadata(
|
||||
install_dir,
|
||||
requested_tag = "latest",
|
||||
llama_tag = "b9001",
|
||||
release_tag = "release-1",
|
||||
choice = choice,
|
||||
approved_checksums = checksums,
|
||||
prebuilt_fallback_used = False,
|
||||
)
|
||||
|
||||
assert existing_install_matches_plan(install_dir, host, plan) is True
|
||||
|
||||
|
||||
def test_existing_install_fingerprint_changes_when_cudart_pair_added(
|
||||
tmp_path: Path,
|
||||
):
|
||||
"""Existing pre-#5322 Windows CUDA installs (no paired cudart) must
|
||||
be treated as stale once the choice gains a runtime archive,
|
||||
otherwise the fingerprint match would keep skipping the reinstall
|
||||
that drops the cudart DLLs in. This is the install-cache half of the
|
||||
#5106 fix -- the health-check half lives in the test above."""
|
||||
install_dir = tmp_path / "llama.cpp"
|
||||
install_dir.mkdir()
|
||||
write_windows_install_shape(
|
||||
install_dir,
|
||||
include_llama_dll = True,
|
||||
include_cuda_dll = True,
|
||||
include_cudart_dlls = False,
|
||||
)
|
||||
|
||||
host = HostInfo(
|
||||
system = "Windows",
|
||||
machine = "AMD64",
|
||||
is_windows = True,
|
||||
is_linux = False,
|
||||
is_macos = False,
|
||||
is_x86_64 = True,
|
||||
is_arm64 = False,
|
||||
nvidia_smi = None,
|
||||
driver_cuda_version = (12, 4),
|
||||
compute_caps = [],
|
||||
visible_cuda_devices = None,
|
||||
has_physical_nvidia = False,
|
||||
has_usable_nvidia = True,
|
||||
)
|
||||
legacy_choice = AssetChoice(
|
||||
repo = "unslothai/llama.cpp",
|
||||
tag = "release-1",
|
||||
name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
|
||||
url = "https://example.com/x.zip",
|
||||
source_label = "published",
|
||||
install_kind = "windows-cuda",
|
||||
runtime_line = "cuda12",
|
||||
expected_sha256 = "a" * 64,
|
||||
)
|
||||
paired_choice = AssetChoice(
|
||||
repo = "unslothai/llama.cpp",
|
||||
tag = "release-1",
|
||||
name = "llama-b9001-bin-win-cuda-12.4-x64.zip",
|
||||
url = "https://example.com/x.zip",
|
||||
source_label = "published",
|
||||
install_kind = "windows-cuda",
|
||||
runtime_line = "cuda12",
|
||||
expected_sha256 = "a" * 64,
|
||||
runtime_name = "cudart-llama-bin-win-cuda-12.4-x64.zip",
|
||||
runtime_url = "https://example.com/cudart.zip",
|
||||
runtime_sha256 = "c" * 64,
|
||||
)
|
||||
checksums = ApprovedReleaseChecksums(
|
||||
repo = "unslothai/llama.cpp",
|
||||
release_tag = "release-1",
|
||||
upstream_tag = "b9001",
|
||||
source_commit = "deadbeef",
|
||||
artifacts = {
|
||||
source_archive_logical_name("b9001"): ApprovedArtifactHash(
|
||||
asset_name = source_archive_logical_name("b9001"),
|
||||
sha256 = "b" * 64,
|
||||
repo = "ggml-org/llama.cpp",
|
||||
kind = "upstream-source",
|
||||
),
|
||||
legacy_choice.name: ApprovedArtifactHash(
|
||||
asset_name = legacy_choice.name,
|
||||
sha256 = legacy_choice.expected_sha256,
|
||||
repo = "unslothai/llama.cpp",
|
||||
kind = "prebuilt",
|
||||
),
|
||||
paired_choice.runtime_name: ApprovedArtifactHash(
|
||||
asset_name = paired_choice.runtime_name,
|
||||
sha256 = paired_choice.runtime_sha256,
|
||||
repo = "unslothai/llama.cpp",
|
||||
kind = "prebuilt",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
# Install metadata was written for the legacy (no-pair) choice.
|
||||
write_prebuilt_metadata(
|
||||
install_dir,
|
||||
requested_tag = "latest",
|
||||
llama_tag = "b9001",
|
||||
release_tag = "release-1",
|
||||
choice = legacy_choice,
|
||||
approved_checksums = checksums,
|
||||
prebuilt_fallback_used = False,
|
||||
)
|
||||
|
||||
# New plan offers the paired choice -- fingerprint must differ so
|
||||
# the install is refreshed. The health check would also catch this
|
||||
# because cudart64_*.dll is missing on disk; we test the fingerprint
|
||||
# half explicitly by comparing the two fingerprints directly.
|
||||
legacy_fingerprint = INSTALL_LLAMA_PREBUILT.expected_install_fingerprint(
|
||||
llama_tag = "b9001",
|
||||
release_tag = "release-1",
|
||||
choice = legacy_choice,
|
||||
approved_checksums = checksums,
|
||||
)
|
||||
paired_fingerprint = INSTALL_LLAMA_PREBUILT.expected_install_fingerprint(
|
||||
llama_tag = "b9001",
|
||||
release_tag = "release-1",
|
||||
choice = paired_choice,
|
||||
approved_checksums = checksums,
|
||||
)
|
||||
assert legacy_fingerprint != paired_fingerprint, (
|
||||
"expected_install_fingerprint must hash runtime_name/runtime_sha256 "
|
||||
"so pre-#5322 installs are not falsely considered up-to-date"
|
||||
)
|
||||
|
||||
paired_plan = INSTALL_LLAMA_PREBUILT.InstallReleasePlan(
|
||||
requested_tag = "latest",
|
||||
llama_tag = "b9001",
|
||||
release_tag = "release-1",
|
||||
attempts = [paired_choice],
|
||||
approved_checksums = checksums,
|
||||
)
|
||||
assert existing_install_matches_plan(install_dir, host, paired_plan) is False
|
||||
|
||||
|
||||
def test_existing_install_matches_plan_macos_requires_dylibs(tmp_path: Path):
|
||||
install_dir = tmp_path / "llama.cpp"
|
||||
install_dir.mkdir()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue