mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-05-05 07:11:39 +00:00
add ci (#1642)
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
This commit is contained in:
parent
2cffdf7033
commit
51745a9ea1
14 changed files with 845 additions and 48 deletions
171
kt-kernel/test/ci/ci_utils.py
Normal file
171
kt-kernel/test/ci/ci_utils.py
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
import os
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import psutil, signal, sys
|
||||
def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None):
|
||||
"""Kill the process and all its child processes."""
|
||||
# Remove sigchld handler to avoid spammy logs.
|
||||
if threading.current_thread() is threading.main_thread():
|
||||
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
|
||||
|
||||
if parent_pid is None:
|
||||
parent_pid = os.getpid()
|
||||
include_parent = False
|
||||
|
||||
try:
|
||||
itself = psutil.Process(parent_pid)
|
||||
except psutil.NoSuchProcess:
|
||||
return
|
||||
|
||||
children = itself.children(recursive=True)
|
||||
for child in children:
|
||||
if child.pid == skip_pid:
|
||||
continue
|
||||
try:
|
||||
child.kill()
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
|
||||
if include_parent:
|
||||
try:
|
||||
if parent_pid == os.getpid():
|
||||
itself.kill()
|
||||
sys.exit(0)
|
||||
|
||||
itself.kill()
|
||||
|
||||
# Sometime processes cannot be killed with SIGKILL (e.g, PID=1 launched by kubernetes),
|
||||
# so we send an additional signal to kill them.
|
||||
itself.send_signal(signal.SIGQUIT)
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestFile:
|
||||
name: str
|
||||
estimated_time: float = 60
|
||||
|
||||
|
||||
def run_with_timeout(
|
||||
func: Callable,
|
||||
args: tuple = (),
|
||||
kwargs: Optional[dict] = None,
|
||||
timeout: float = None,
|
||||
):
|
||||
"""Run a function with timeout."""
|
||||
ret_value = []
|
||||
|
||||
def _target_func():
|
||||
ret_value.append(func(*args, **(kwargs or {})))
|
||||
|
||||
t = threading.Thread(target=_target_func)
|
||||
t.start()
|
||||
t.join(timeout=timeout)
|
||||
if t.is_alive():
|
||||
raise TimeoutError()
|
||||
|
||||
if not ret_value:
|
||||
raise RuntimeError()
|
||||
|
||||
return ret_value[0]
|
||||
|
||||
|
||||
def run_unittest_files(
|
||||
files: List[TestFile], timeout_per_file: float, continue_on_error: bool = False
|
||||
):
|
||||
"""
|
||||
Run a list of test files.
|
||||
|
||||
Args:
|
||||
files: List of TestFile objects to run
|
||||
timeout_per_file: Timeout in seconds for each test file
|
||||
continue_on_error: If True, continue running remaining tests even if one fails.
|
||||
If False, stop at first failure (default behavior for PR tests).
|
||||
"""
|
||||
tic = time.perf_counter()
|
||||
success = True
|
||||
passed_tests = []
|
||||
failed_tests = []
|
||||
|
||||
for i, file in enumerate(files):
|
||||
filename, estimated_time = file.name, file.estimated_time
|
||||
process = None
|
||||
|
||||
def run_one_file(filename):
|
||||
nonlocal process
|
||||
|
||||
filename = os.path.join(os.getcwd(), filename)
|
||||
print(
|
||||
f".\n.\nBegin ({i}/{len(files) - 1}):\npython3 {filename}\n.\n.\n",
|
||||
flush=True,
|
||||
)
|
||||
tic = time.perf_counter()
|
||||
|
||||
process = subprocess.Popen(
|
||||
["python3", filename], stdout=None, stderr=None, env=os.environ
|
||||
)
|
||||
process.wait()
|
||||
elapsed = time.perf_counter() - tic
|
||||
|
||||
print(
|
||||
f".\n.\nEnd ({i}/{len(files) - 1}):\n{filename=}, {elapsed=:.0f}, {estimated_time=}\n.\n.\n",
|
||||
flush=True,
|
||||
)
|
||||
return process.returncode
|
||||
|
||||
try:
|
||||
ret_code = run_with_timeout(
|
||||
run_one_file, args=(filename,), timeout=timeout_per_file
|
||||
)
|
||||
if ret_code != 0:
|
||||
print(
|
||||
f"\n✗ FAILED: {filename} returned exit code {ret_code}\n",
|
||||
flush=True,
|
||||
)
|
||||
success = False
|
||||
failed_tests.append((filename, f"exit code {ret_code}"))
|
||||
if not continue_on_error:
|
||||
# Stop at first failure for PR tests
|
||||
break
|
||||
# Otherwise continue to next test for nightly tests
|
||||
else:
|
||||
passed_tests.append(filename)
|
||||
except TimeoutError:
|
||||
kill_process_tree(process.pid)
|
||||
time.sleep(5)
|
||||
print(
|
||||
f"\n✗ TIMEOUT: {filename} after {timeout_per_file} seconds\n",
|
||||
flush=True,
|
||||
)
|
||||
success = False
|
||||
failed_tests.append((filename, f"timeout after {timeout_per_file}s"))
|
||||
if not continue_on_error:
|
||||
# Stop at first timeout for PR tests
|
||||
break
|
||||
# Otherwise continue to next test for nightly tests
|
||||
|
||||
if success:
|
||||
print(f"Success. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
|
||||
else:
|
||||
print(f"Fail. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'='*60}", flush=True)
|
||||
print(f"Test Summary: {len(passed_tests)}/{len(files)} passed", flush=True)
|
||||
print(f"{'='*60}", flush=True)
|
||||
if passed_tests:
|
||||
print("✓ PASSED:", flush=True)
|
||||
for test in passed_tests:
|
||||
print(f" {test}", flush=True)
|
||||
if failed_tests:
|
||||
print("\n✗ FAILED:", flush=True)
|
||||
for test, reason in failed_tests:
|
||||
print(f" {test} ({reason})", flush=True)
|
||||
print(f"{'='*60}\n", flush=True)
|
||||
|
||||
return 0 if success else -1
|
||||
Loading…
Add table
Add a link
Reference in a new issue