CI(Core): add TRL trainer + Config auto-discovery sweep

New step "TRL trainer + Config auto-discovery sweep" mirrors the
auto-detection in unsloth/models/rl.py:
  - rl.py:1934-1949 (`patch_trl_rl_trainers`) walks dir(trl.trainer),
    keeps lowercase `<x>_trainer` names except `base_trainer`.
  - rl.py:553-569 picks the unique `<prefix>*Trainer` and
    `<prefix>*Config` per trainer module.
  - rl.py:575-615 falls back to a sibling `<x>_config.py` module
    (TRL 0.26+ split) and then to an MRO walk into experimental
    parent modules (thin-wrapper trainers).

Three pytest cases per cell:
  1. AST-parse every *_trainer and *_config source file on disk via
     importlib.util.find_spec(...).origin. Reads files WITHOUT
     triggering optional-dep imports (grpo_trainer requires vllm,
     nash_md/online_dpo/rloo/xpo do too). Catches TRL source-level
     drift on any matrix cell.
  2. Drive unsloth's discovery rules over every trainer file.
     Records ok / import-skipped / discovery-skipped / fail.
     Hard-fails when a trainer imports cleanly + has 1 *Trainer but
     no *Config can be resolved via the three rules.
     Asserts >=3 trainers fully discover (sft/reward/dpo are the
     historical core; below that signals a TRL refactor regression).
  3. Orphan check: every *_trainer module must have a sibling
     *_config.py OR an inline *Config; raises if neither exists,
     because that combination silently breaks `_patch_trl_rl_trainers`.

Local verification on TRL 0.25.1: 31/31 modules AST-parse,
10 trainers fully discover (bco/cpo/dpo/gkd/kto/orpo/ppo/prm/reward/
sft), 5 import-skipped (grpo/nash_md/online_dpo/rloo/xpo, all need
vllm which is intentionally not installed in the CI matrix).
Wall-time ~10-30s per cell, dominated by lazy-module dir()
materialisation.
This commit is contained in:
Daniel Han 2026-05-07 08:10:13 +00:00
parent 20e06ac52e
commit 99c42d3e68

View file

@ -922,6 +922,262 @@ jobs:
python -m pytest -q --tb=short tests/_zoo_compiler_cache_shim.py
rm -f tests/_zoo_compiler_cache_shim.py
- name: TRL trainer + Config auto-discovery sweep (mirrors rl.py:1934-1949)
# Mirror unsloth/models/rl.py:patch_trl_rl_trainers — walk
# dir(trl.trainer), pick every `<x>_trainer` (lowercase, not
# `base_trainer`), and apply the same *Trainer / *Config
# discovery rules `_patch_trl_rl_trainers` uses (rl.py:553-620).
# Surfaces TRL drift before it crashes Unsloth at training time:
# - trainer module that imports cleanly but exposes no
# <prefix>*Trainer / <prefix>*Config -> auto-discovery would
# log a warning and skip; we count skip-with-reason so a
# newly added trainer is visible.
# - *_config.py module rename (TRL 0.26+ split many configs
# out) -> exercises the same fallback chain rl.py:575-615.
# - Trainer that fails to import (e.g. grpo_trainer needs vllm
# which we don't install) -> recorded as `import-skipped`,
# not `fail`, matching the try/except in rl.py:1944-1948.
# Per-cell wall-time ~10-30s, dominated by AST parse + dir().
run: |
set -euxo pipefail
cat > tests/_trl_trainer_discovery_shim.py <<'PY'
# Auto-generated by .github/workflows/consolidated-tests-ci.yml.
# Walks every *_trainer / *_config module in trl.trainer and
# validates that unsloth's auto-discovery rules in
# unsloth/models/rl.py:_patch_trl_rl_trainers (lines 542-620,
# 1934-1949) still pick out exactly one *Trainer and one
# *Config per module on the matrix's TRL version.
import sys, pathlib, importlib, importlib.util, ast, inspect
sys.path.insert(0, str(pathlib.Path(__file__).parent))
import _zoo_aggressive_cuda_spoof as _spoof
_spoof.apply()
import pytest
pytest.importorskip("trl")
import trl # noqa: F401 (forces lazy-module init)
import trl.trainer
# Replicate rl.py:1939-1943 verbatim.
def _trainer_files():
return [
x for x in dir(trl.trainer)
if x.islower()
and x.endswith("_trainer")
and x != "base_trainer"
]
def _config_files():
return [
x for x in dir(trl.trainer)
if x.islower() and x.endswith("_config")
]
def _ast_parse_module_via_spec(qual_name: str):
"""AST-parse a module's source on disk WITHOUT importing it.
`trl.trainer` uses _LazyModule so `find_spec` resolves the
file path without firing the module-level `__init__`. This
dodges optional-dep ImportErrors (e.g. grpo_trainer's vllm
import) and still surfaces real syntax drift in the file."""
spec = importlib.util.find_spec(qual_name)
if spec is None or not spec.origin:
return None, "no spec"
path = pathlib.Path(spec.origin)
if not path.is_file():
return None, f"spec.origin not a file: {path}"
src = path.read_text(encoding="utf-8")
ast.parse(src, filename=str(path))
return path, None
def test_every_trl_trainer_and_config_module_ast_parses():
"""Stage 1: pure file-on-disk AST parse. Catches a TRL
source-level syntax issue on any matrix cell without
triggering optional-dep imports."""
fail = []
ok = 0
for name in _trainer_files() + _config_files():
qual = f"trl.trainer.{name}"
try:
path, err = _ast_parse_module_via_spec(qual)
if err:
fail.append((qual, err))
else:
ok += 1
except SyntaxError as e:
fail.append((qual, f"SyntaxError: {e}"))
except Exception as e:
fail.append((qual, f"{type(e).__name__}: {e}"))
print(f"AST-parsed {ok} TRL trainer+config modules; failed={len(fail)}")
for q, e in fail:
print(f" AST FAIL {q}: {e}")
assert not fail, f"AST parse failed for {len(fail)} TRL modules"
def _apply_unsloth_discovery_rules(mod, trainer_file):
"""Replicate the four endswith filters in
rl.py:553-569 verbatim."""
prefix = trainer_file.split("_")[0]
names = [
x for x in dir(mod)
if x.endswith("Trainer") and x != "Trainer"
and not x.startswith("_") and prefix in x.lower()
]
configs = [
x for x in dir(mod)
if x.endswith("Config") and x != "Config"
and not x.startswith("_") and prefix in x.lower()
]
return names, configs
def _resolve_config_via_fallbacks(trainer_file, name_list, mod):
"""Replicate rl.py:575-615: try the sibling *_config.py
module, then the MRO walk fallback. Returns the resolved
config-name list (length 0 or 1)."""
# Fallback 1: <prefix>_config.py module sibling.
cfg_module_name = trainer_file.replace("_trainer", "_config")
try:
cfg_mod = getattr(trl.trainer, cfg_module_name)
except Exception:
cfg_mod = None
if cfg_mod is not None:
prefix = trainer_file.split("_")[0]
hits = [
x for x in dir(cfg_mod)
if x.endswith("Config") and x != "Config"
and not x.startswith("_") and prefix in x.lower()
]
if len(hits) == 1:
return hits
# Fallback 2: MRO walk into experimental parent module.
if len(name_list) != 1:
return []
try:
trainer_cls = getattr(mod, name_list[0])
except Exception:
return []
prefix = trainer_file.split("_")[0]
for parent in trainer_cls.__mro__[1:]:
if parent is object:
continue
parent_mod = inspect.getmodule(parent)
if parent_mod is None:
continue
if parent_mod.__name__ == f"trl.trainer.{trainer_file}":
continue
hits = [
x for x in dir(parent_mod)
if x.endswith("Config") and x != "Config"
and not x.startswith("_") and prefix in x.lower()
]
if len(hits) == 1:
return hits
return []
def test_unsloth_auto_discovery_finds_trainer_and_config_per_module():
"""Stage 2: drive the same unsloth rules over every trainer
file. import-failures (optional deps) are recorded as
`import-skipped`, mirroring rl.py:1944-1948 try/except."""
ok = 0
import_skipped = []
discovery_skipped = []
fail = []
for trainer_file in _trainer_files():
qual = f"trl.trainer.{trainer_file}"
try:
mod = getattr(trl.trainer, trainer_file)
except Exception as e:
import_skipped.append((qual, f"{type(e).__name__}: {e}"))
continue
trainers, configs = _apply_unsloth_discovery_rules(
mod, trainer_file,
)
if len(trainers) != 1:
discovery_skipped.append(
(qual, f"trainers={trainers}")
)
continue
if len(configs) != 1:
configs = _resolve_config_via_fallbacks(
trainer_file, trainers, mod,
)
if len(configs) != 1:
fail.append(
(qual,
f"trainer={trainers[0]} but config not found "
"(checked module, *_config sibling, and MRO)")
)
continue
ok += 1
print(f" OK {qual}: trainer={trainers[0]}, config={configs[0]}")
print(
f"\nDiscovery: ok={ok} import_skipped={len(import_skipped)} "
f"discovery_skipped={len(discovery_skipped)} fail={len(fail)}"
)
for q, r in import_skipped:
print(f" IMPORT-SKIP {q}: {r}")
for q, r in discovery_skipped:
print(f" DISC-SKIP {q}: {r}")
for q, r in fail:
print(f" FAIL {q}: {r}")
# Hard contract: every TRAINER that imports cleanly AND has
# exactly one *Trainer must also resolve exactly one *Config
# via one of the three rules. import-skipped + discovery-
# skipped (no/multiple *Trainer) are tolerated.
assert not fail, (
f"unsloth discovery rules failed for {len(fail)} trainers"
)
# Sanity: at least 3 trainers should fully discover on any
# matrix cell (sft + reward + dpo are the historical core).
assert ok >= 3, (
f"only {ok} trainers fully discovered; expected >=3 "
"(sft/reward/dpo). Possible TRL surface regression."
)
def test_orphan_trainer_modules_do_not_exist():
"""Stage 3: every <x>_trainer module should have a sibling
<x>_config (TRL 0.26+ convention) OR an inline *Config. An
ORPHAN <x>_trainer with neither is a TRL refactor we want
to know about: it would silently break unsloth's
auto-discovery without raising."""
orphans = []
for trainer_file in _trainer_files():
cfg_module_name = trainer_file.replace("_trainer", "_config")
has_sibling_cfg = (
importlib.util.find_spec(
f"trl.trainer.{cfg_module_name}"
) is not None
)
if has_sibling_cfg:
continue
# No sibling -> require an inline *Config in the
# trainer module itself (resolved via discovery rules).
try:
mod = getattr(trl.trainer, trainer_file)
except Exception:
# Optional-dep failure -> skip; the AST-parse stage
# already covered the file.
continue
_, configs = _apply_unsloth_discovery_rules(
mod, trainer_file,
)
if not configs:
orphans.append(trainer_file)
assert not orphans, (
"Orphan TRL trainer modules with neither sibling "
f"<x>_config.py nor an inline *Config: {orphans}. "
"unsloth auto-discovery would silently skip these."
)
PY
python -m pytest -q --tb=short -s tests/_trl_trainer_discovery_shim.py
rm -f tests/_trl_trainer_discovery_shim.py
- name: llama.cpp install via unsloth_zoo.llama_cpp + `llama-cli --help` smoke
# Exercise the canonical `unsloth_zoo.llama_cpp.install_llama_cpp`
# flow that GGUF export uses at runtime: clone ggml-org/llama.cpp