smoke test allow pass for flaky providers (#6638)

This commit is contained in:
Zane 2026-01-22 16:10:41 -08:00 committed by GitHub
parent 383ae71759
commit e7bfdf8fa2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -13,6 +13,14 @@ for arg in "$@"; do
esac
done
# Flaky models that are allowed to fail without failing the entire test run.
# These are typically preview/experimental models with inconsistent tool-calling behavior.
# Failures are still reported but don't block PRs.
ALLOWED_FAILURES=(
"google:gemini-3-pro-preview"
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
)
if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
fi
@ -71,7 +79,20 @@ else
fi
echo ""
is_allowed_failure() {
local provider="$1"
local model="$2"
local key="${provider}:${model}"
for allowed in "${ALLOWED_FAILURES[@]}"; do
if [ "$allowed" = "$key" ]; then
return 0
fi
done
return 1
}
RESULTS=()
HARD_FAILURES=()
for provider_config in "${PROVIDERS[@]}"; do
# Split on " -> " to get provider and models
@ -94,8 +115,14 @@ for provider_config in "${PROVIDERS[@]}"; do
echo "✓ SUCCESS: Test passed - $SUCCESS_MSG"
RESULTS+=("${PROVIDER}: ${MODEL}")
else
echo "✗ FAILED: Test failed - $FAILURE_MSG"
RESULTS+=("${PROVIDER}: ${MODEL}")
if is_allowed_failure "$PROVIDER" "$MODEL"; then
echo "⚠ FLAKY: Test failed but model is in allowed failures list - $FAILURE_MSG"
RESULTS+=("${PROVIDER}: ${MODEL} (flaky)")
else
echo "✗ FAILED: Test failed - $FAILURE_MSG"
RESULTS+=("${PROVIDER}: ${MODEL}")
HARD_FAILURES+=("${PROVIDER}: ${MODEL}")
fi
fi
rm "$TMPFILE"
rm -rf "$TESTDIR"
@ -107,11 +134,22 @@ echo "=== Test Summary ==="
for result in "${RESULTS[@]}"; do
echo "$result"
done
if echo "${RESULTS[@]}" | grep -q "✗"; then
if [ ${#HARD_FAILURES[@]} -gt 0 ]; then
echo ""
echo "Hard failures (${#HARD_FAILURES[@]}):"
for failure in "${HARD_FAILURES[@]}"; do
echo " - $failure"
done
echo ""
echo "Some tests failed!"
exit 1
else
echo ""
echo "All tests passed!"
if echo "${RESULTS[@]}" | grep -q "⚠"; then
echo ""
echo "All required tests passed! (some flaky tests failed but are allowed)"
else
echo ""
echo "All tests passed!"
fi
fi