mirror of
https://github.com/unslothai/unsloth.git
synced 2026-05-17 03:56:07 +00:00
CI(Core): add TRL trainer + Config auto-discovery sweep
New step "TRL trainer + Config auto-discovery sweep" mirrors the
auto-detection in unsloth/models/rl.py:
- rl.py:1934-1949 (`patch_trl_rl_trainers`) walks dir(trl.trainer),
keeps lowercase `<x>_trainer` names except `base_trainer`.
- rl.py:553-569 picks the unique `<prefix>*Trainer` and
`<prefix>*Config` per trainer module.
- rl.py:575-615 falls back to a sibling `<x>_config.py` module
(TRL 0.26+ split) and then to an MRO walk into experimental
parent modules (thin-wrapper trainers).
Three pytest cases per cell:
1. AST-parse every *_trainer and *_config source file on disk via
importlib.util.find_spec(...).origin. Reads files WITHOUT
triggering optional-dep imports (grpo_trainer requires vllm,
nash_md/online_dpo/rloo/xpo do too). Catches TRL source-level
drift on any matrix cell.
2. Drive unsloth's discovery rules over every trainer file.
Records ok / import-skipped / discovery-skipped / fail.
Hard-fails when a trainer imports cleanly + has 1 *Trainer but
no *Config can be resolved via the three rules.
Asserts >=3 trainers fully discover (sft/reward/dpo are the
historical core; below that signals a TRL refactor regression).
3. Orphan check: every *_trainer module must have a sibling
*_config.py OR an inline *Config; raises if neither exists,
because that combination silently breaks `_patch_trl_rl_trainers`.
Local verification on TRL 0.25.1: 31/31 modules AST-parse,
10 trainers fully discover (bco/cpo/dpo/gkd/kto/orpo/ppo/prm/reward/
sft), 5 import-skipped (grpo/nash_md/online_dpo/rloo/xpo, all need
vllm which is intentionally not installed in the CI matrix).
Wall-time ~10-30s per cell, dominated by lazy-module dir()
materialisation.
This commit is contained in:
parent
20e06ac52e
commit
99c42d3e68
1 changed files with 256 additions and 0 deletions
256
.github/workflows/consolidated-tests-ci.yml
vendored
256
.github/workflows/consolidated-tests-ci.yml
vendored
|
|
@ -922,6 +922,262 @@ jobs:
|
|||
python -m pytest -q --tb=short tests/_zoo_compiler_cache_shim.py
|
||||
rm -f tests/_zoo_compiler_cache_shim.py
|
||||
|
||||
- name: TRL trainer + Config auto-discovery sweep (mirrors rl.py:1934-1949)
|
||||
# Mirror unsloth/models/rl.py:patch_trl_rl_trainers — walk
|
||||
# dir(trl.trainer), pick every `<x>_trainer` (lowercase, not
|
||||
# `base_trainer`), and apply the same *Trainer / *Config
|
||||
# discovery rules `_patch_trl_rl_trainers` uses (rl.py:553-620).
|
||||
# Surfaces TRL drift before it crashes Unsloth at training time:
|
||||
# - trainer module that imports cleanly but exposes no
|
||||
# <prefix>*Trainer / <prefix>*Config -> auto-discovery would
|
||||
# log a warning and skip; we count skip-with-reason so a
|
||||
# newly added trainer is visible.
|
||||
# - *_config.py module rename (TRL 0.26+ split many configs
|
||||
# out) -> exercises the same fallback chain rl.py:575-615.
|
||||
# - Trainer that fails to import (e.g. grpo_trainer needs vllm
|
||||
# which we don't install) -> recorded as `import-skipped`,
|
||||
# not `fail`, matching the try/except in rl.py:1944-1948.
|
||||
# Per-cell wall-time ~10-30s, dominated by AST parse + dir().
|
||||
run: |
|
||||
set -euxo pipefail
|
||||
cat > tests/_trl_trainer_discovery_shim.py <<'PY'
|
||||
# Auto-generated by .github/workflows/consolidated-tests-ci.yml.
|
||||
# Walks every *_trainer / *_config module in trl.trainer and
|
||||
# validates that unsloth's auto-discovery rules in
|
||||
# unsloth/models/rl.py:_patch_trl_rl_trainers (lines 542-620,
|
||||
# 1934-1949) still pick out exactly one *Trainer and one
|
||||
# *Config per module on the matrix's TRL version.
|
||||
import sys, pathlib, importlib, importlib.util, ast, inspect
|
||||
|
||||
sys.path.insert(0, str(pathlib.Path(__file__).parent))
|
||||
import _zoo_aggressive_cuda_spoof as _spoof
|
||||
_spoof.apply()
|
||||
|
||||
import pytest
|
||||
pytest.importorskip("trl")
|
||||
import trl # noqa: F401 (forces lazy-module init)
|
||||
import trl.trainer
|
||||
|
||||
|
||||
# Replicate rl.py:1939-1943 verbatim.
|
||||
def _trainer_files():
|
||||
return [
|
||||
x for x in dir(trl.trainer)
|
||||
if x.islower()
|
||||
and x.endswith("_trainer")
|
||||
and x != "base_trainer"
|
||||
]
|
||||
|
||||
|
||||
def _config_files():
|
||||
return [
|
||||
x for x in dir(trl.trainer)
|
||||
if x.islower() and x.endswith("_config")
|
||||
]
|
||||
|
||||
|
||||
def _ast_parse_module_via_spec(qual_name: str):
|
||||
"""AST-parse a module's source on disk WITHOUT importing it.
|
||||
`trl.trainer` uses _LazyModule so `find_spec` resolves the
|
||||
file path without firing the module-level `__init__`. This
|
||||
dodges optional-dep ImportErrors (e.g. grpo_trainer's vllm
|
||||
import) and still surfaces real syntax drift in the file."""
|
||||
spec = importlib.util.find_spec(qual_name)
|
||||
if spec is None or not spec.origin:
|
||||
return None, "no spec"
|
||||
path = pathlib.Path(spec.origin)
|
||||
if not path.is_file():
|
||||
return None, f"spec.origin not a file: {path}"
|
||||
src = path.read_text(encoding="utf-8")
|
||||
ast.parse(src, filename=str(path))
|
||||
return path, None
|
||||
|
||||
|
||||
def test_every_trl_trainer_and_config_module_ast_parses():
|
||||
"""Stage 1: pure file-on-disk AST parse. Catches a TRL
|
||||
source-level syntax issue on any matrix cell without
|
||||
triggering optional-dep imports."""
|
||||
fail = []
|
||||
ok = 0
|
||||
for name in _trainer_files() + _config_files():
|
||||
qual = f"trl.trainer.{name}"
|
||||
try:
|
||||
path, err = _ast_parse_module_via_spec(qual)
|
||||
if err:
|
||||
fail.append((qual, err))
|
||||
else:
|
||||
ok += 1
|
||||
except SyntaxError as e:
|
||||
fail.append((qual, f"SyntaxError: {e}"))
|
||||
except Exception as e:
|
||||
fail.append((qual, f"{type(e).__name__}: {e}"))
|
||||
print(f"AST-parsed {ok} TRL trainer+config modules; failed={len(fail)}")
|
||||
for q, e in fail:
|
||||
print(f" AST FAIL {q}: {e}")
|
||||
assert not fail, f"AST parse failed for {len(fail)} TRL modules"
|
||||
|
||||
|
||||
def _apply_unsloth_discovery_rules(mod, trainer_file):
|
||||
"""Replicate the four endswith filters in
|
||||
rl.py:553-569 verbatim."""
|
||||
prefix = trainer_file.split("_")[0]
|
||||
names = [
|
||||
x for x in dir(mod)
|
||||
if x.endswith("Trainer") and x != "Trainer"
|
||||
and not x.startswith("_") and prefix in x.lower()
|
||||
]
|
||||
configs = [
|
||||
x for x in dir(mod)
|
||||
if x.endswith("Config") and x != "Config"
|
||||
and not x.startswith("_") and prefix in x.lower()
|
||||
]
|
||||
return names, configs
|
||||
|
||||
|
||||
def _resolve_config_via_fallbacks(trainer_file, name_list, mod):
|
||||
"""Replicate rl.py:575-615: try the sibling *_config.py
|
||||
module, then the MRO walk fallback. Returns the resolved
|
||||
config-name list (length 0 or 1)."""
|
||||
# Fallback 1: <prefix>_config.py module sibling.
|
||||
cfg_module_name = trainer_file.replace("_trainer", "_config")
|
||||
try:
|
||||
cfg_mod = getattr(trl.trainer, cfg_module_name)
|
||||
except Exception:
|
||||
cfg_mod = None
|
||||
if cfg_mod is not None:
|
||||
prefix = trainer_file.split("_")[0]
|
||||
hits = [
|
||||
x for x in dir(cfg_mod)
|
||||
if x.endswith("Config") and x != "Config"
|
||||
and not x.startswith("_") and prefix in x.lower()
|
||||
]
|
||||
if len(hits) == 1:
|
||||
return hits
|
||||
# Fallback 2: MRO walk into experimental parent module.
|
||||
if len(name_list) != 1:
|
||||
return []
|
||||
try:
|
||||
trainer_cls = getattr(mod, name_list[0])
|
||||
except Exception:
|
||||
return []
|
||||
prefix = trainer_file.split("_")[0]
|
||||
for parent in trainer_cls.__mro__[1:]:
|
||||
if parent is object:
|
||||
continue
|
||||
parent_mod = inspect.getmodule(parent)
|
||||
if parent_mod is None:
|
||||
continue
|
||||
if parent_mod.__name__ == f"trl.trainer.{trainer_file}":
|
||||
continue
|
||||
hits = [
|
||||
x for x in dir(parent_mod)
|
||||
if x.endswith("Config") and x != "Config"
|
||||
and not x.startswith("_") and prefix in x.lower()
|
||||
]
|
||||
if len(hits) == 1:
|
||||
return hits
|
||||
return []
|
||||
|
||||
|
||||
def test_unsloth_auto_discovery_finds_trainer_and_config_per_module():
|
||||
"""Stage 2: drive the same unsloth rules over every trainer
|
||||
file. import-failures (optional deps) are recorded as
|
||||
`import-skipped`, mirroring rl.py:1944-1948 try/except."""
|
||||
ok = 0
|
||||
import_skipped = []
|
||||
discovery_skipped = []
|
||||
fail = []
|
||||
for trainer_file in _trainer_files():
|
||||
qual = f"trl.trainer.{trainer_file}"
|
||||
try:
|
||||
mod = getattr(trl.trainer, trainer_file)
|
||||
except Exception as e:
|
||||
import_skipped.append((qual, f"{type(e).__name__}: {e}"))
|
||||
continue
|
||||
trainers, configs = _apply_unsloth_discovery_rules(
|
||||
mod, trainer_file,
|
||||
)
|
||||
if len(trainers) != 1:
|
||||
discovery_skipped.append(
|
||||
(qual, f"trainers={trainers}")
|
||||
)
|
||||
continue
|
||||
if len(configs) != 1:
|
||||
configs = _resolve_config_via_fallbacks(
|
||||
trainer_file, trainers, mod,
|
||||
)
|
||||
if len(configs) != 1:
|
||||
fail.append(
|
||||
(qual,
|
||||
f"trainer={trainers[0]} but config not found "
|
||||
"(checked module, *_config sibling, and MRO)")
|
||||
)
|
||||
continue
|
||||
ok += 1
|
||||
print(f" OK {qual}: trainer={trainers[0]}, config={configs[0]}")
|
||||
print(
|
||||
f"\nDiscovery: ok={ok} import_skipped={len(import_skipped)} "
|
||||
f"discovery_skipped={len(discovery_skipped)} fail={len(fail)}"
|
||||
)
|
||||
for q, r in import_skipped:
|
||||
print(f" IMPORT-SKIP {q}: {r}")
|
||||
for q, r in discovery_skipped:
|
||||
print(f" DISC-SKIP {q}: {r}")
|
||||
for q, r in fail:
|
||||
print(f" FAIL {q}: {r}")
|
||||
# Hard contract: every TRAINER that imports cleanly AND has
|
||||
# exactly one *Trainer must also resolve exactly one *Config
|
||||
# via one of the three rules. import-skipped + discovery-
|
||||
# skipped (no/multiple *Trainer) are tolerated.
|
||||
assert not fail, (
|
||||
f"unsloth discovery rules failed for {len(fail)} trainers"
|
||||
)
|
||||
# Sanity: at least 3 trainers should fully discover on any
|
||||
# matrix cell (sft + reward + dpo are the historical core).
|
||||
assert ok >= 3, (
|
||||
f"only {ok} trainers fully discovered; expected >=3 "
|
||||
"(sft/reward/dpo). Possible TRL surface regression."
|
||||
)
|
||||
|
||||
|
||||
def test_orphan_trainer_modules_do_not_exist():
|
||||
"""Stage 3: every <x>_trainer module should have a sibling
|
||||
<x>_config (TRL 0.26+ convention) OR an inline *Config. An
|
||||
ORPHAN <x>_trainer with neither is a TRL refactor we want
|
||||
to know about: it would silently break unsloth's
|
||||
auto-discovery without raising."""
|
||||
orphans = []
|
||||
for trainer_file in _trainer_files():
|
||||
cfg_module_name = trainer_file.replace("_trainer", "_config")
|
||||
has_sibling_cfg = (
|
||||
importlib.util.find_spec(
|
||||
f"trl.trainer.{cfg_module_name}"
|
||||
) is not None
|
||||
)
|
||||
if has_sibling_cfg:
|
||||
continue
|
||||
# No sibling -> require an inline *Config in the
|
||||
# trainer module itself (resolved via discovery rules).
|
||||
try:
|
||||
mod = getattr(trl.trainer, trainer_file)
|
||||
except Exception:
|
||||
# Optional-dep failure -> skip; the AST-parse stage
|
||||
# already covered the file.
|
||||
continue
|
||||
_, configs = _apply_unsloth_discovery_rules(
|
||||
mod, trainer_file,
|
||||
)
|
||||
if not configs:
|
||||
orphans.append(trainer_file)
|
||||
assert not orphans, (
|
||||
"Orphan TRL trainer modules with neither sibling "
|
||||
f"<x>_config.py nor an inline *Config: {orphans}. "
|
||||
"unsloth auto-discovery would silently skip these."
|
||||
)
|
||||
PY
|
||||
python -m pytest -q --tb=short -s tests/_trl_trainer_discovery_shim.py
|
||||
rm -f tests/_trl_trainer_discovery_shim.py
|
||||
|
||||
- name: llama.cpp install via unsloth_zoo.llama_cpp + `llama-cli --help` smoke
|
||||
# Exercise the canonical `unsloth_zoo.llama_cpp.install_llama_cpp`
|
||||
# flow that GGUF export uses at runtime: clone ggml-org/llama.cpp
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue