eigent/backend/app/component/model_validation.py
emag165 eacda32c08
Some checks are pending
CodeQL Advanced / Analyze (python) (push) Waiting to run
CodeQL Advanced / Analyze (actions) (push) Waiting to run
CodeQL Advanced / Analyze (javascript-typescript) (push) Waiting to run
Pre-commit / pre-commit (push) Waiting to run
Test / Run Python Tests (push) Waiting to run
fix(backend): default max_tokens for Anthropic model validation (#1549)
Co-authored-by: Cursor Agent <cursoragent@cursor.com>
Co-authored-by: emag165 <emag165@users.noreply.github.com>
Co-authored-by: Sun Tao <2605127667@qq.com>
2026-04-11 22:02:21 +08:00

618 lines
24 KiB
Python

# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
import logging
from enum import Enum
from typing import Any
from camel.agents import ChatAgent
from camel.models import ModelFactory, ModelProcessingError
logger = logging.getLogger("model_validation")
# Expected result from tool execution for validation
EXPECTED_TOOL_RESULT = "Tool execution completed successfully for https://www.camel-ai.org, Website Content: Welcome to CAMEL AI!"
class ValidationStage(str, Enum):
"""Stages of model validation process."""
INITIALIZATION = "initialization"
MODEL_CREATION = "model_creation"
AGENT_CREATION = "agent_creation"
MODEL_CALL = "model_call"
TOOL_CALL_EXECUTION = "tool_call_execution"
RESPONSE_PARSING = "response_parsing"
class ValidationErrorType(str, Enum):
"""Types of validation errors."""
AUTHENTICATION_ERROR = "authentication_error"
NETWORK_ERROR = "network_error"
MODEL_NOT_FOUND = "model_not_found"
RATE_LIMIT_ERROR = "rate_limit_error"
QUOTA_EXCEEDED = "quota_exceeded"
TIMEOUT_ERROR = "timeout_error"
TOOL_CALL_NOT_SUPPORTED = "tool_call_not_supported"
TOOL_CALL_EXECUTION_FAILED = "tool_call_execution_failed"
INVALID_CONFIGURATION = "invalid_configuration"
UNKNOWN_ERROR = "unknown_error"
class ValidationResult:
"""Detailed validation result with diagnostic information."""
def __init__(self):
self.is_valid: bool = False
self.is_tool_calls: bool = False
self.error_type: ValidationErrorType | None = None
self.error_code: str | None = None
self.error_message: str | None = None
self.raw_error_message: str | None = (
None # Original error message from provider
)
self.error_details: dict[str, Any] = {}
self.validation_stages: dict[ValidationStage, bool] = {}
self.diagnostic_info: dict[str, Any] = {}
self.successful_stages: list[ValidationStage] = []
self.failed_stage: ValidationStage | None = None
self.model_response_info: dict[str, Any] | None = None
self.tool_call_info: dict[str, Any] | None = None
def to_dict(self) -> dict[str, Any]:
"""Convert validation result to dictionary."""
return {
"is_valid": self.is_valid,
"is_tool_calls": self.is_tool_calls,
"error_type": self.error_type.value if self.error_type else None,
"error_code": self.error_code,
"error_message": self.error_message,
"raw_error_message": self.raw_error_message,
"error_details": self.error_details,
"validation_stages": {
stage.value: success
for stage, success in self.validation_stages.items()
},
"diagnostic_info": self.diagnostic_info,
"successful_stages": [
stage.value for stage in self.successful_stages
],
"failed_stage": self.failed_stage.value
if self.failed_stage
else None,
"model_response_info": self.model_response_info,
"tool_call_info": self.tool_call_info,
}
def get_website_content(url: str) -> str:
r"""Gets the content of a website.
Args:
url (str): The URL of the website.
Returns:
str: The content of the website.
"""
return EXPECTED_TOOL_RESULT
def format_raw_error(exception: Exception, max_length: int = 300) -> str:
"""Format raw error message from exception, truncating if too long.
This preserves the original error message from the provider without
translation, as provider error messages are often clear and informative.
Args:
exception: The exception to format
max_length: Maximum length of the error message
Returns:
str: Formatted error message
"""
raw = str(exception)
if len(raw) > max_length:
raw = raw[:max_length] + "..."
return raw
def categorize_error(
exception: Exception, stage: ValidationStage
) -> ValidationErrorType:
"""Categorize exception into specific error type.
This function attempts to categorize errors conservatively, only when
we have high confidence (e.g., exception types, HTTP status codes).
The raw error message is always preserved separately.
Args:
exception: The exception to categorize
stage: The validation stage where error occurred
Returns:
ValidationErrorType: The categorized error type, or UNKNOWN_ERROR if uncertain
"""
error_str = str(exception).lower()
error_type = exception.__class__.__name__.lower()
exception_type_str = str(type(exception)).lower()
# First, check exception type for common patterns
# This is the most reliable way to categorize errors
if "timeout" in error_type or "timeouterror" in exception_type_str:
return ValidationErrorType.TIMEOUT_ERROR
if "connection" in error_type or "connectionerror" in exception_type_str:
return ValidationErrorType.NETWORK_ERROR
if "authentication" in error_type or "autherror" in exception_type_str:
return ValidationErrorType.AUTHENTICATION_ERROR
# Check for ModelProcessingError from camel-ai, which wraps provider-specific errors
# This is reliable because camel-ai standardizes error handling
if isinstance(exception, ModelProcessingError):
# Check for HTTP status codes first (most reliable)
if "401" in error_str or "unauthorized" in error_str:
return ValidationErrorType.AUTHENTICATION_ERROR
if "404" in error_str or "not found" in error_str:
return ValidationErrorType.MODEL_NOT_FOUND
if "429" in error_str or "rate limit" in error_str:
return ValidationErrorType.RATE_LIMIT_ERROR
if "timeout" in error_str or "timed out" in error_str:
return ValidationErrorType.TIMEOUT_ERROR
if "quota" in error_str or "insufficient_quota" in error_str:
return ValidationErrorType.QUOTA_EXCEEDED
if "connection" in error_str or "network" in error_str:
return ValidationErrorType.NETWORK_ERROR
# Only check for very specific, unambiguous patterns in error messages
# Prefer HTTP status codes and exception types over string matching
# HTTP status codes are reliable indicators
if "401" in error_str or "unauthorized" in error_str:
return ValidationErrorType.AUTHENTICATION_ERROR
if "404" in error_str:
return ValidationErrorType.MODEL_NOT_FOUND
if "429" in error_str:
return ValidationErrorType.RATE_LIMIT_ERROR
# Very specific error patterns that are unlikely to be false positives
if "insufficient_quota" in error_str or "quota exceeded" in error_str:
return ValidationErrorType.QUOTA_EXCEEDED
# Return unknown if we're not confident
# The raw error message will still be available to users
return ValidationErrorType.UNKNOWN_ERROR
def create_agent(
model_platform: str,
model_type: str,
api_key: str = None,
url: str = None,
model_config_dict: dict = None,
**kwargs,
) -> ChatAgent:
"""Create an agent for model validation.
Args:
model_platform: The model platform
model_type: The model type
api_key: API key for authentication
url: Custom model URL
model_config_dict: Model configuration dictionary
**kwargs: Additional model parameters
Returns:
ChatAgent: The created agent
Raises:
ValueError: If model_type or model_platform is invalid
Exception: If model creation fails
"""
platform = model_platform
mtype = model_type
if mtype is None:
raise ValueError(f"Invalid model_type: {model_type}")
if platform is None:
raise ValueError(f"Invalid model_platform: {model_platform}")
if str(platform).lower() == "anthropic":
model_config_dict = dict(model_config_dict or {})
if model_config_dict.get("max_tokens") is None:
model_config_dict["max_tokens"] = 4096
model = ModelFactory.create(
model_platform=platform,
model_type=mtype,
api_key=api_key,
url=url,
timeout=60, # 1 minute for validation
model_config_dict=model_config_dict,
**kwargs,
)
agent = ChatAgent(
system_message="You are a helpful assistant that must use the tool get_website_content to get the content of a website.",
model=model,
tools=[get_website_content],
step_timeout=60, # 1 minute for validation
)
return agent
def validate_model_with_details(
model_platform: str,
model_type: str,
api_key: str = None,
url: str = None,
model_config_dict: dict = None,
**kwargs,
) -> ValidationResult:
"""Validate model with detailed diagnostic information.
Args:
model_platform: The model platform
model_type: The model type
api_key: API key for authentication
url: Custom model URL
model_config_dict: Model configuration dictionary
**kwargs: Additional model parameters
Returns:
ValidationResult: Detailed validation result
"""
result = ValidationResult()
# Stage 1: Initialization
result.validation_stages[ValidationStage.INITIALIZATION] = False
try:
if model_type is None or model_type.strip() == "":
result.error_type = ValidationErrorType.INVALID_CONFIGURATION
result.error_message = (
"Model type is required but was not provided."
)
result.error_details = {"missing_field": "model_type"}
result.failed_stage = ValidationStage.INITIALIZATION
return result
if model_platform is None or model_platform.strip() == "":
result.error_type = ValidationErrorType.INVALID_CONFIGURATION
result.error_message = (
"Model platform is required but was not provided."
)
result.error_details = {"missing_field": "model_platform"}
result.failed_stage = ValidationStage.INITIALIZATION
return result
result.validation_stages[ValidationStage.INITIALIZATION] = True
result.successful_stages.append(ValidationStage.INITIALIZATION)
result.diagnostic_info["initialization"] = {
"model_platform": model_platform,
"model_type": model_type,
"has_api_key": api_key is not None and api_key.strip() != "",
"has_custom_url": url is not None,
"has_config": model_config_dict is not None,
}
logger.debug(
"Initialization stage passed",
extra={"platform": model_platform, "model_type": model_type},
)
except Exception as e:
result.error_type = ValidationErrorType.INVALID_CONFIGURATION
result.raw_error_message = format_raw_error(e)
result.error_message = result.raw_error_message
result.error_details = {
"exception_type": type(e).__name__,
"exception_message": str(e),
}
result.failed_stage = ValidationStage.INITIALIZATION
logger.error(
"Initialization stage failed",
extra={"error": str(e)},
exc_info=True,
)
return result
# Stage 2: Model Creation
result.validation_stages[ValidationStage.MODEL_CREATION] = False
try:
logger.debug(
"Creating model",
extra={"platform": model_platform, "model_type": model_type},
)
if str(model_platform).lower() == "anthropic":
model_config_dict = dict(model_config_dict or {})
if model_config_dict.get("max_tokens") is None:
model_config_dict["max_tokens"] = 4096
model = ModelFactory.create(
model_platform=model_platform,
model_type=model_type,
api_key=api_key,
url=url,
timeout=60, # 1 minute for validation
model_config_dict=model_config_dict,
**kwargs,
)
result.validation_stages[ValidationStage.MODEL_CREATION] = True
result.successful_stages.append(ValidationStage.MODEL_CREATION)
result.diagnostic_info["model_creation"] = {
"model_platform": model_platform,
"model_type": model_type,
"model_created": True,
}
logger.debug(
"Model creation stage passed",
extra={"platform": model_platform, "model_type": model_type},
)
except Exception as e:
error_type = categorize_error(e, ValidationStage.MODEL_CREATION)
result.error_type = error_type
# Always preserve the raw error message from the provider
result.raw_error_message = format_raw_error(e)
# Use raw error as primary message - it's usually clear and informative
result.error_message = result.raw_error_message
result.error_details = {
"exception_type": type(e).__name__,
"exception_message": str(e),
}
result.failed_stage = ValidationStage.MODEL_CREATION
logger.error(
"Model creation stage failed",
extra={"error": str(e), "error_type": error_type.value},
exc_info=True,
)
return result
# Stage 3: Agent Creation
result.validation_stages[ValidationStage.AGENT_CREATION] = False
try:
logger.debug(
"Creating agent",
extra={"platform": model_platform, "model_type": model_type},
)
agent = ChatAgent(
system_message="You are a helpful assistant that must use the tool get_website_content to get the content of a website.",
model=model,
tools=[get_website_content],
step_timeout=60, # 1 minute for validation
)
result.validation_stages[ValidationStage.AGENT_CREATION] = True
result.successful_stages.append(ValidationStage.AGENT_CREATION)
result.diagnostic_info["agent_creation"] = {
"agent_created": True,
}
logger.debug(
"Agent creation stage passed",
extra={"platform": model_platform, "model_type": model_type},
)
except Exception as e:
error_type = categorize_error(e, ValidationStage.AGENT_CREATION)
result.error_type = error_type
# Always preserve the raw error message from the provider
result.raw_error_message = format_raw_error(e)
result.error_message = result.raw_error_message
result.error_details = {
"exception_type": type(e).__name__,
"exception_message": str(e),
}
result.failed_stage = ValidationStage.AGENT_CREATION
logger.error(
"Agent creation stage failed",
extra={"error": str(e)},
exc_info=True,
)
return result
# Stage 4: Model Call
result.validation_stages[ValidationStage.MODEL_CALL] = False
try:
logger.debug(
"Executing model call",
extra={"platform": model_platform, "model_type": model_type},
)
response = agent.step(
input_message="""
Get the content of https://www.camel-ai.org,
you must use the get_website_content tool to get the content ,
i just want to verify the get_website_content tool is working,
you must call the get_website_content tool only once.
"""
)
if response:
result.validation_stages[ValidationStage.MODEL_CALL] = True
result.successful_stages.append(ValidationStage.MODEL_CALL)
# Extract model response information
result.model_response_info = {
"has_response": True,
"has_message": hasattr(response, "msg")
and response.msg is not None,
"has_info": hasattr(response, "info")
and response.info is not None,
}
if hasattr(response, "msg") and response.msg:
result.model_response_info["message_content"] = (
str(response.msg.content)[:200]
if hasattr(response.msg, "content")
else None
)
if hasattr(response, "info") and response.info:
result.model_response_info["usage"] = response.info.get(
"usage", {}
)
result.model_response_info["tool_calls_count"] = len(
response.info.get("tool_calls", [])
)
logger.debug(
"Model call stage passed",
extra={"platform": model_platform, "model_type": model_type},
)
else:
result.error_type = ValidationErrorType.UNKNOWN_ERROR
result.error_message = (
"Model call succeeded but returned no response."
)
result.error_details = {"response": None}
result.failed_stage = ValidationStage.MODEL_CALL
logger.warning(
"Model call returned no response",
extra={"platform": model_platform, "model_type": model_type},
)
return result
except Exception as e:
error_type = categorize_error(e, ValidationStage.MODEL_CALL)
result.error_type = error_type
# Always preserve the raw error message from the provider
result.raw_error_message = format_raw_error(e)
result.error_message = result.raw_error_message
result.error_details = {
"exception_type": type(e).__name__,
"exception_message": str(e),
}
result.failed_stage = ValidationStage.MODEL_CALL
logger.error(
"Model call stage failed",
extra={"error": str(e), "error_type": error_type.value},
exc_info=True,
)
return result
# Stage 5: Tool Call Execution Check
result.validation_stages[ValidationStage.TOOL_CALL_EXECUTION] = False
try:
if response and hasattr(response, "info") and response.info:
tool_calls = response.info.get("tool_calls", [])
result.tool_call_info = {
"tool_calls_count": len(tool_calls),
"has_tool_calls": len(tool_calls) > 0,
}
if tool_calls and len(tool_calls) > 0:
tool_call = tool_calls[0]
result.tool_call_info["first_tool_call"] = {
"tool_name": getattr(tool_call, "tool_name", None),
"has_result": hasattr(tool_call, "result"),
"result": str(getattr(tool_call, "result", ""))[:200]
if hasattr(tool_call, "result")
else None,
}
expected_result = EXPECTED_TOOL_RESULT
actual_result = (
tool_call.result if hasattr(tool_call, "result") else None
)
if actual_result == expected_result:
result.validation_stages[
ValidationStage.TOOL_CALL_EXECUTION
] = True
result.successful_stages.append(
ValidationStage.TOOL_CALL_EXECUTION
)
result.is_tool_calls = True
result.is_valid = True
result.tool_call_info["execution_successful"] = True
logger.debug(
"Tool call execution stage passed",
extra={
"platform": model_platform,
"model_type": model_type,
},
)
else:
result.error_type = (
ValidationErrorType.TOOL_CALL_EXECUTION_FAILED
)
result.error_message = f"Tool call was made but execution failed. Expected result: '{expected_result[:50]}...', but got: '{str(actual_result)[:50] if actual_result else 'None'}...'"
result.error_details = {
"expected_result": expected_result,
"actual_result": str(actual_result)
if actual_result
else None,
"tool_call": str(tool_call)[:200],
}
result.failed_stage = ValidationStage.TOOL_CALL_EXECUTION
result.tool_call_info["execution_successful"] = False
logger.warning(
"Tool call execution failed",
extra={
"platform": model_platform,
"model_type": model_type,
"expected": expected_result[:50],
"actual": str(actual_result)[:50]
if actual_result
else None,
},
)
else:
result.error_type = ValidationErrorType.TOOL_CALL_NOT_SUPPORTED
result.error_message = "Model call succeeded, but the model did not make any tool calls. This model may not support tool calling functionality."
result.error_details = {
"tool_calls": [],
"response_info": str(response.info)
if hasattr(response, "info")
else None,
}
result.failed_stage = ValidationStage.TOOL_CALL_EXECUTION
result.tool_call_info["execution_successful"] = False
logger.warning(
"No tool calls made by model",
extra={
"platform": model_platform,
"model_type": model_type,
},
)
else:
result.error_type = ValidationErrorType.TOOL_CALL_NOT_SUPPORTED
result.error_message = "Model call succeeded, but response does not contain tool call information. This model may not support tool calling functionality."
result.error_details = {
"has_info": hasattr(response, "info") if response else False,
"response_type": type(response).__name__ if response else None,
}
result.failed_stage = ValidationStage.TOOL_CALL_EXECUTION
result.tool_call_info = {
"execution_successful": False,
"has_info": False,
}
logger.warning(
"Response missing tool call info",
extra={"platform": model_platform, "model_type": model_type},
)
except Exception as e:
error_type = categorize_error(e, ValidationStage.TOOL_CALL_EXECUTION)
result.error_type = error_type
result.raw_error_message = format_raw_error(e)
result.error_message = result.raw_error_message
result.error_details = {
"exception_type": type(e).__name__,
"exception_message": str(e),
}
result.failed_stage = ValidationStage.TOOL_CALL_EXECUTION
logger.error(
"Tool call execution check failed",
extra={"error": str(e)},
exc_info=True,
)
return result