mirror of
https://github.com/block/goose.git
synced 2026-04-28 03:29:36 +00:00
smoke test allow pass for flaky providers (#6638)
This commit is contained in:
parent
383ae71759
commit
e7bfdf8fa2
1 changed files with 43 additions and 5 deletions
|
|
@ -13,6 +13,14 @@ for arg in "$@"; do
|
|||
esac
|
||||
done
|
||||
|
||||
# Flaky models that are allowed to fail without failing the entire test run.
|
||||
# These are typically preview/experimental models with inconsistent tool-calling behavior.
|
||||
# Failures are still reported but don't block PRs.
|
||||
ALLOWED_FAILURES=(
|
||||
"google:gemini-3-pro-preview"
|
||||
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
|
||||
)
|
||||
|
||||
if [ -f .env ]; then
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
fi
|
||||
|
|
@ -71,7 +79,20 @@ else
|
|||
fi
|
||||
echo ""
|
||||
|
||||
is_allowed_failure() {
|
||||
local provider="$1"
|
||||
local model="$2"
|
||||
local key="${provider}:${model}"
|
||||
for allowed in "${ALLOWED_FAILURES[@]}"; do
|
||||
if [ "$allowed" = "$key" ]; then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
RESULTS=()
|
||||
HARD_FAILURES=()
|
||||
|
||||
for provider_config in "${PROVIDERS[@]}"; do
|
||||
# Split on " -> " to get provider and models
|
||||
|
|
@ -94,8 +115,14 @@ for provider_config in "${PROVIDERS[@]}"; do
|
|||
echo "✓ SUCCESS: Test passed - $SUCCESS_MSG"
|
||||
RESULTS+=("✓ ${PROVIDER}: ${MODEL}")
|
||||
else
|
||||
echo "✗ FAILED: Test failed - $FAILURE_MSG"
|
||||
RESULTS+=("✗ ${PROVIDER}: ${MODEL}")
|
||||
if is_allowed_failure "$PROVIDER" "$MODEL"; then
|
||||
echo "⚠ FLAKY: Test failed but model is in allowed failures list - $FAILURE_MSG"
|
||||
RESULTS+=("⚠ ${PROVIDER}: ${MODEL} (flaky)")
|
||||
else
|
||||
echo "✗ FAILED: Test failed - $FAILURE_MSG"
|
||||
RESULTS+=("✗ ${PROVIDER}: ${MODEL}")
|
||||
HARD_FAILURES+=("${PROVIDER}: ${MODEL}")
|
||||
fi
|
||||
fi
|
||||
rm "$TMPFILE"
|
||||
rm -rf "$TESTDIR"
|
||||
|
|
@ -107,11 +134,22 @@ echo "=== Test Summary ==="
|
|||
for result in "${RESULTS[@]}"; do
|
||||
echo "$result"
|
||||
done
|
||||
if echo "${RESULTS[@]}" | grep -q "✗"; then
|
||||
|
||||
if [ ${#HARD_FAILURES[@]} -gt 0 ]; then
|
||||
echo ""
|
||||
echo "Hard failures (${#HARD_FAILURES[@]}):"
|
||||
for failure in "${HARD_FAILURES[@]}"; do
|
||||
echo " - $failure"
|
||||
done
|
||||
echo ""
|
||||
echo "Some tests failed!"
|
||||
exit 1
|
||||
else
|
||||
echo ""
|
||||
echo "All tests passed!"
|
||||
if echo "${RESULTS[@]}" | grep -q "⚠"; then
|
||||
echo ""
|
||||
echo "All required tests passed! (some flaky tests failed but are allowed)"
|
||||
else
|
||||
echo ""
|
||||
echo "All tests passed!"
|
||||
fi
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue