Add langfuse and update logger (#952)

Co-authored-by: bytecraftii <bytecraftii@users.noreply.github.com>
Co-authored-by: Wendong-Fan <w3ndong.fan@gmail.com>
This commit is contained in:
bytecraftii 2026-01-24 16:13:07 -08:00 committed by GitHub
parent cf571b73a4
commit 49e148a2f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
63 changed files with 3095 additions and 2430 deletions

View file

@ -1,30 +1,7 @@
VITE_BASE_URL=/api
VITE_PROXY_URL=https://dev.eigent.ai
VITE_USE_LOCAL_PROXY=false
# VITE_PROXY_URL=http://localhost:3001
# VITE_USE_LOCAL_PROXY=true
TRACEROOT_TOKEN=your_traceroot_token_here
TRACEROOT_SERVICE_NAME=eigent
TRACEROOT_GITHUB_OWNER=eigent
TRACEROOT_GITHUB_REPO_NAME=eigent-ai
TRACEROOT_GITHUB_COMMIT_HASH=main
TRACEROOT_ENABLE_SPAN_CLOUD_EXPORT=false
TRACEROOT_ENABLE_LOG_CLOUD_EXPORT=false
TRACEROOT_ENABLE_SPAN_CONSOLE_EXPORT=false
TRACEROOT_ENABLE_LOG_CONSOLE_EXPORT=false
TRACEROOT_TRACER_VERBOSE=false
TRACEROOT_LOGGER_VERBOSE=false

1
.gitignore vendored
View file

@ -46,7 +46,6 @@ public/
# Testing
coverage/
.traceroot-config.yaml
# Python
__pycache__/

View file

@ -243,9 +243,6 @@ To run the application locally in developer mode:
1. Configure `.env.development`:
- Set `VITE_USE_LOCAL_PROXY=true`
- Set `VITE_PROXY_URL=http://localhost:3001`
- Set `TRACEROOT_ENABLE_SPAN_CLOUD_EXPORT=false`
- Set `TRACEROOT_ENABLE_LOG_CLOUD_EXPORT=false`
- Set `TRACEROOT_ENABLE_LOG_CONSOLE_EXPORT=false`
2. Go to the settings to specify your model key and model type.
## Common Actions 🔄

View file

@ -0,0 +1,87 @@
ci:
# https://pre-commit.ci/#configuration
autofix_prs: true
autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
autoupdate_schedule: monthly
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: no-commit-to-branch
name: No commits to master
- id: end-of-file-fixer
name: End-of-file fixer
- name: mixed-line-ending
id: mixed-line-ending
args: [--fix, lf]
- id: trailing-whitespace
name: Remove trailing whitespaces
- id: check-toml
name: Check toml
- id: check-yaml
name: Check yaml
- repo: https://github.com/adrienverge/yamllint.git
rev: v1.37.1
hooks:
- id: yamllint
name: Lint yaml
args: [-d, '{extends: default, rules: {line-length: disable, document-start: disable, truthy: {level: error}, braces: {max-spaces-inside: 1}}}']
- repo: https://github.com/asottile/pyupgrade
rev: v3.21.0
hooks:
- id: pyupgrade
name: Upgrade Python syntax
args: [--py38-plus]
- repo: https://github.com/PyCQA/autoflake
rev: v2.3.1
hooks:
- id: autoflake
name: Remove unused imports and variables
args: [
--remove-all-unused-imports,
--remove-unused-variables,
--remove-duplicate-keys,
--ignore-init-module-imports,
--in-place,
]
- repo: https://github.com/google/yapf
rev: v0.43.0
hooks:
- id: yapf
name: Format code
additional_dependencies: [toml]
- repo: https://github.com/pycqa/isort
rev: 7.0.0
hooks:
- id: isort
name: Sort imports
- repo: https://github.com/PyCQA/flake8
rev: 7.3.0
hooks:
- id: flake8
name: Check PEP8
additional_dependencies: [Flake8-pyproject]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.3
hooks:
- id: ruff
name: Ruff formatting
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.22
hooks:
- id: mdformat
name: Format Markdown
additional_dependencies:
- mdformat-gfm
- mdformat_frontmatter
- mdformat_footnote

View file

@ -1,18 +1,4 @@
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
from utils import traceroot_wrapper as traceroot
import logging
import importlib.util
import os
from pathlib import Path
@ -22,7 +8,7 @@ import importlib
from typing import Any, overload
import threading
traceroot_logger = traceroot.get_logger("env")
logger = logging.getLogger("env")
# Thread-local storage for user-specific environment
_thread_local = threading.local()
@ -31,27 +17,100 @@ _thread_local = threading.local()
default_env_path = os.path.join(os.path.expanduser("~"), ".eigent", ".env")
load_dotenv(dotenv_path=default_env_path)
# Safe base directory for user environment files
env_base_dir = os.path.join(os.path.expanduser("~"), ".eigent")
def sanitize_env_path(env_path: str | None) -> str | None:
"""
Validate and sanitize user-provided environment file path.
Security: Ensures the path stays within ~/.eigent directory and ends with .env
to prevent path traversal attacks and unauthorized file access.
Args:
env_path: User-provided environment file path
Returns:
Validated absolute path string if valid, None otherwise
"""
if not env_path:
return None
try:
# Convert to Path object for safe manipulation
user_path = Path(env_path)
# Reject absolute paths outside our control
if user_path.is_absolute():
# Check if it's already within env_base_dir
resolved_path = user_path.resolve()
else:
# Join relative path to base directory
resolved_path = (Path(env_base_dir) / user_path).resolve()
# Verify the resolved path is still within env_base_dir
base_resolved = Path(env_base_dir).resolve()
try:
resolved_path.relative_to(base_resolved)
except ValueError:
logger.warning(
f"Security: Rejected env_path outside safe directory. "
f"Path: {env_path}, Resolved: {resolved_path}, "
f"Base: {base_resolved}"
)
return None
# Enforce .env file extension
if not resolved_path.name.endswith('.env'):
logger.warning(
f"Security: Rejected env_path with invalid extension. "
f"Path: {env_path}, must end with .env"
)
return None
return str(resolved_path)
except (ValueError, OSError) as e:
logger.warning(
f"Security: Invalid env_path rejected. "
f"Path: {env_path}, Error: {e}"
)
return None
def set_user_env_path(env_path: str | None = None):
"""
Set user-specific environment path for current thread.
If env_path is None, uses default global environment.
"""
traceroot_logger.info("Setting user environment path", extra={"env_path": env_path, "exists": env_path and os.path.exists(env_path) if env_path else None})
if env_path and os.path.exists(env_path):
_thread_local.env_path = env_path
Security: All paths are validated through sanitize_env_path to prevent
path traversal and unauthorized file access.
"""
# Sanitize the path before any filesystem operations
safe_env_path = sanitize_env_path(env_path)
logger.info(
f"Setting user environment path: original={env_path}, "
f"sanitized={safe_env_path}, "
f"exists={safe_env_path and os.path.exists(safe_env_path) if safe_env_path else None}"
)
if safe_env_path and os.path.exists(safe_env_path):
_thread_local.env_path = safe_env_path
# Load user-specific environment variables
load_dotenv(dotenv_path=env_path, override=True)
traceroot_logger.info("User-specific environment loaded", extra={"env_path": env_path})
load_dotenv(dotenv_path=safe_env_path, override=True)
logger.info(f"User-specific environment loaded: {safe_env_path}")
else:
# Clear thread-local env_path to fall back to global
if hasattr(_thread_local, 'env_path'):
delattr(_thread_local, 'env_path')
traceroot_logger.info("Reset to default global environment")
logger.info("Reset to default global environment")
if env_path and not os.path.exists(env_path):
traceroot_logger.warning("User environment path does not exist, falling back to global", extra={"env_path": env_path})
if env_path and not safe_env_path:
logger.warning(f"User environment path rejected by security validation: {env_path}")
elif safe_env_path and not os.path.exists(safe_env_path):
logger.warning(f"User environment path does not exist, falling back to global: {safe_env_path}")
def get_current_env_path() -> str:
@ -78,35 +137,38 @@ def env(key: str, default=None):
Get environment variable.
First checks thread-local user-specific environment,
then falls back to global environment.
Security: Uses sanitized path stored in _thread_local.env_path
which has already been validated by set_user_env_path.
"""
# If we have a user-specific environment path, try to reload it to get latest values
# Note: _thread_local.env_path is already sanitized by set_user_env_path
if hasattr(_thread_local, 'env_path') and os.path.exists(_thread_local.env_path):
# Temporarily load user-specific env to get the latest value
from dotenv import dotenv_values
user_env_values = dotenv_values(_thread_local.env_path)
if key in user_env_values:
value = user_env_values[key] or default
traceroot_logger.debug("Environment variable retrieved from user-specific config", extra={"key": key, "env_path": _thread_local.env_path, "has_value": value is not None})
logger.debug(f"Environment variable retrieved from user-specific config: key={key}, env_path={_thread_local.env_path}, has_value={value is not None}")
return value
# Fall back to global environment
value = os.getenv(key, default)
traceroot_logger.debug("Environment variable retrieved from global config", extra={"key": key, "has_value": value is not None, "using_default": value == default})
logger.debug(f"Environment variable retrieved from global config: key={key}, has_value={value is not None}, using_default={value == default}")
return value
def env_or_fail(key: str):
value = env(key)
if value is None:
traceroot_logger.warning(f"[ENVIRONMENT] can't get env config value for key: {key}")
logger.warning(f"[ENVIRONMENT] can't get env config value for key: {key}")
raise Exception(f"can't get env config value for key: {key}")
return value
@traceroot.trace()
def env_not_empty(key: str):
value = env(key)
if not value:
traceroot_logger.warning(f"[ENVIRONMENT] env config value can't be empty for key: {key}")
logger.warning(f"[ENVIRONMENT] env config value can't be empty for key: {key}")
raise Exception(f"env config value can't be empty for key: {key}")
return value

View file

@ -20,7 +20,7 @@ from pathlib import Path
from dotenv import load_dotenv
from fastapi import APIRouter, HTTPException, Request, Response
from fastapi.responses import StreamingResponse
from utils import traceroot_wrapper as traceroot
import logging
from app.component import code
from app.exception.exception import UserException
from app.model.chat import Chat, HumanReply, McpServers, Status, SupplementChat, AddTaskRequest, sse_json
@ -40,15 +40,15 @@ from app.service.task import (
delete_task_lock,
task_locks,
)
from app.component.environment import set_user_env_path
from app.component.environment import set_user_env_path, sanitize_env_path
from app.utils.workforce import Workforce
from camel.tasks.task import Task
router = APIRouter()
# Create traceroot logger for chat controller
chat_logger = traceroot.get_logger("chat_controller")
# Logger for chat controller
chat_logger = logging.getLogger("chat_controller")
# SSE timeout configuration (60 minutes in seconds)
SSE_TIMEOUT_SECONDS = 60 * 60
@ -127,7 +127,6 @@ async def timeout_stream_wrapper(stream_generator, timeout_seconds: int = SSE_TI
@router.post("/chat", name="start chat")
@traceroot.trace()
async def post(data: Chat, request: Request):
chat_logger.info(
"Starting new chat session",
@ -138,7 +137,10 @@ async def post(data: Chat, request: Request):
# Set user-specific environment path for this thread
set_user_env_path(data.env_path)
load_dotenv(dotenv_path=data.env_path)
# Load environment with validated path
safe_env_path = sanitize_env_path(data.env_path)
if safe_env_path:
load_dotenv(dotenv_path=safe_env_path)
os.environ["file_save_path"] = data.file_save_path()
os.environ["browser_port"] = str(data.browser_port)
@ -185,7 +187,6 @@ async def post(data: Chat, request: Request):
@router.post("/chat/{id}", name="improve chat")
@traceroot.trace()
def improve(id: str, data: SupplementChat):
chat_logger.info("Chat improvement requested", extra={"task_id": id, "question_length": len(data.question)})
task_lock = get_task_lock(id)
@ -245,7 +246,6 @@ def improve(id: str, data: SupplementChat):
@router.put("/chat/{id}", name="supplement task")
@traceroot.trace()
def supplement(id: str, data: SupplementChat):
chat_logger.info("Chat supplement requested", extra={"task_id": id})
task_lock = get_task_lock(id)
@ -257,7 +257,6 @@ def supplement(id: str, data: SupplementChat):
@router.delete("/chat/{id}", name="stop chat")
@traceroot.trace()
def stop(id: str):
"""stop the task"""
chat_logger.info("=" * 80)
@ -277,7 +276,6 @@ def stop(id: str):
@router.post("/chat/{id}/human-reply")
@traceroot.trace()
def human_reply(id: str, data: HumanReply):
chat_logger.info("Human reply received", extra={"task_id": id, "reply_length": len(data.reply)})
task_lock = get_task_lock(id)
@ -287,7 +285,6 @@ def human_reply(id: str, data: HumanReply):
@router.post("/chat/{id}/install-mcp")
@traceroot.trace()
def install_mcp(id: str, data: McpServers):
chat_logger.info("Installing MCP servers", extra={"task_id": id, "servers_count": len(data.get("mcpServers", {}))})
task_lock = get_task_lock(id)
@ -297,7 +294,6 @@ def install_mcp(id: str, data: McpServers):
@router.post("/chat/{id}/add-task", name="add task to workforce")
@traceroot.trace()
def add_task(id: str, data: AddTaskRequest):
"""Add a new task to the workforce"""
chat_logger.info(f"Adding task to workforce for task_id: {id}, content: {data.content[:100]}...")
@ -321,7 +317,6 @@ def add_task(id: str, data: AddTaskRequest):
@router.delete("/chat/{project_id}/remove-task/{task_id}", name="remove task from workforce")
@traceroot.trace()
def remove_task(project_id: str, task_id: str):
"""Remove a task from the workforce"""
chat_logger.info(f"Removing task {task_id} from workforce for project_id: {project_id}")
@ -341,7 +336,6 @@ def remove_task(project_id: str, task_id: str):
@router.post("/chat/{project_id}/skip-task", name="skip task in workforce")
@traceroot.trace()
def skip_task(project_id: str):
"""
Skip/Stop current task execution while preserving context.

View file

@ -14,9 +14,9 @@
from fastapi import APIRouter
from pydantic import BaseModel
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("health_controller")
logger = logging.getLogger("health_controller")
router = APIRouter(tags=["Health"])

View file

@ -18,9 +18,9 @@ from app.component.model_validation import create_agent
from app.model.chat import PLATFORM_MAPPING
from camel.types import ModelType
from app.component.error_format import normalize_error_to_openai_format
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("model_controller")
logger = logging.getLogger("model_controller")
router = APIRouter()
@ -49,7 +49,6 @@ class ValidateModelResponse(BaseModel):
@router.post("/model/validate")
@traceroot.trace()
async def validate_model(request: ValidateModelRequest):
"""Validate model configuration and tool call support."""
platform = request.model_platform

View file

@ -28,17 +28,16 @@ from app.service.task import (
task_locks,
)
import asyncio
from app.component.environment import set_user_env_path
from utils import traceroot_wrapper as traceroot
from app.component.environment import set_user_env_path, sanitize_env_path
import logging
logger = traceroot.get_logger("task_controller")
logger = logging.getLogger("task_controller")
router = APIRouter()
@router.post("/task/{id}/start", name="start task")
@traceroot.trace()
def start(id: str):
task_lock = get_task_lock(id)
logger.info("Starting task", extra={"task_id": id})
@ -48,7 +47,6 @@ def start(id: str):
@router.put("/task/{id}", name="update task")
@traceroot.trace()
def put(id: str, data: UpdateData):
logger.info("Updating task", extra={"task_id": id, "task_items_count": len(data.task)})
logger.debug("Update task data", extra={"task_id": id, "data": data.model_dump_json()})
@ -63,7 +61,6 @@ class TakeControl(BaseModel):
@router.put("/task/{id}/take-control", name="take control pause or resume")
@traceroot.trace()
def take_control(id: str, data: TakeControl):
logger.info("Task control action", extra={"task_id": id, "action": data.action})
task_lock = get_task_lock(id)
@ -73,20 +70,21 @@ def take_control(id: str, data: TakeControl):
@router.post("/task/{id}/add-agent", name="add new agent")
@traceroot.trace()
def add_agent(id: str, data: NewAgent):
logger.info("Adding new agent to task", extra={"task_id": id, "agent_name": data.name})
logger.debug("New agent data", extra={"task_id": id, "agent_data": data.model_dump_json()})
# Set user-specific environment path for this thread
set_user_env_path(data.env_path)
load_dotenv(dotenv_path=data.env_path)
# Load environment with validated path
safe_env_path = sanitize_env_path(data.env_path)
if safe_env_path:
load_dotenv(dotenv_path=safe_env_path)
asyncio.run(get_task_lock(id).put_queue(ActionNewAgent(**data.model_dump())))
logger.info("Agent added to task", extra={"task_id": id, "agent_name": data.name})
return Response(status_code=204)
@router.delete("/task/stop-all", name="stop all tasks")
@traceroot.trace()
def stop_all():
logger.warning("Stopping all tasks", extra={"task_count": len(task_locks)})
for task_lock in task_locks.values():

View file

@ -16,7 +16,7 @@ from fastapi import APIRouter, HTTPException
from app.utils.toolkit.notion_mcp_toolkit import NotionMCPToolkit
from app.utils.toolkit.google_calendar_toolkit import GoogleCalendarToolkit
from app.utils.oauth_state_manager import oauth_state_manager
from utils import traceroot_wrapper as traceroot
import logging
from camel.toolkits.hybrid_browser_toolkit.hybrid_browser_toolkit_ts import (
HybridBrowserToolkit as BaseHybridBrowserToolkit,
)
@ -24,7 +24,7 @@ from app.utils.cookie_manager import CookieManager
import os
import uuid
logger = traceroot.get_logger("tool_controller")
logger = logging.getLogger("tool_controller")
router = APIRouter()

View file

@ -22,9 +22,9 @@ from app.component import code
from app.exception.exception import NoPermissionException, ProgramException, TokenException
from app.component.pydantic.i18n import trans, get_language
from app.exception.exception import UserException
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("exception_handler")
logger = logging.getLogger("exception_handler")
@api.exception_handler(RequestValidationError)

View file

@ -19,9 +19,9 @@ import re
from typing import Literal
from pydantic import BaseModel, Field, field_validator
from camel.types import ModelType, RoleType
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("chat_model")
logger = logging.getLogger("chat_model")
class Status(str, Enum):

View file

@ -18,9 +18,9 @@ All routers are explicitly registered here for better visibility and maintainabi
"""
from fastapi import FastAPI
from app.controller import chat_controller, model_controller, task_controller, tool_controller, health_controller
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("router")
logger = logging.getLogger("router")
def register_routers(app: FastAPI, prefix: str = "") -> None:

View file

@ -40,6 +40,7 @@ from app.utils.toolkit.human_toolkit import HumanToolkit
from app.utils.toolkit.note_taking_toolkit import NoteTakingToolkit
from app.utils.toolkit.terminal_toolkit import TerminalToolkit
from app.utils.workforce import Workforce
from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback
from app.model.chat import Chat, NewAgent, Status, sse_json, TaskContent
from camel.tasks import Task
from app.utils.agent import (
@ -61,10 +62,10 @@ from app.service.task import Action, Agents
from app.utils.server.sync_step import sync_step
from camel.types import ModelPlatformType
from camel.models import ModelProcessingError
from utils import traceroot_wrapper as traceroot
import logging
import os
logger = traceroot.get_logger("chat_service")
logger = logging.getLogger("chat_service")
def format_task_context(task_data: dict, seen_files: set | None = None, skip_files: bool = False) -> str:
@ -251,7 +252,6 @@ def build_context_for_workforce(task_lock: TaskLock, options: Chat) -> str:
@sync_step
@traceroot.trace()
async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
start_event_loop = True
@ -1061,7 +1061,6 @@ async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
# Continue processing other items instead of breaking
@traceroot.trace()
async def install_mcp(
mcp: ListenChatAgent,
install_mcp: ActionInstallMcpData,
@ -1092,7 +1091,8 @@ def to_sub_tasks(task: Task, summary_task_content: str):
def tree_sub_tasks(sub_tasks: list[Task], depth: int = 0):
if depth > 5:
return []
return (
result = (
chain(sub_tasks)
.filter(lambda x: x.content != "")
.map(
@ -1106,6 +1106,8 @@ def tree_sub_tasks(sub_tasks: list[Task], depth: int = 0):
.value()
)
return result
def update_sub_tasks(sub_tasks: list[Task], update_tasks: dict[str, TaskContent], depth: int = 0):
if depth > 5: # limit the depth of the recursion
@ -1183,7 +1185,6 @@ Is this a complex task? (yes/no):"""
return True
@traceroot.trace()
async def summary_task(agent: ListenChatAgent, task: Task) -> str:
prompt = f"""The user's task is:
---
@ -1286,7 +1287,6 @@ async def get_task_result_with_optional_summary(task: Task, options: Chat) -> st
return result
@traceroot.trace()
async def construct_workforce(options: Chat) -> tuple[Workforce, ListenChatAgent]:
"""Construct a workforce with all required agents.
@ -1403,6 +1403,12 @@ The current date is {datetime.date.today()}. For any date-related tasks, you MUS
except (ValueError, AttributeError):
model_platform_enum = None
# Create workforce metrics callback for workforce analytics
workforce_metrics = WorkforceMetricsCallback(
project_id=options.project_id,
task_id=options.task_id
)
workforce = Workforce(
options.project_id,
"A workforce",
@ -1414,6 +1420,8 @@ The current date is {datetime.date.today()}. For any date-related tasks, you MUS
use_structured_output_handler=False if model_platform_enum == ModelPlatformType.OPENAI else True,
)
# Register workforce metrics callback
workforce._callbacks.append(workforce_metrics)
workforce.add_single_agent_worker(
"Developer Agent: A master-level coding assistant with a powerful "
"terminal. It can write and execute code, manage files, automate "
@ -1473,7 +1481,6 @@ def format_agent_description(agent_data: NewAgent | ActionNewAgent) -> str:
return " ".join(description_parts)
@traceroot.trace()
async def new_agent_model(data: NewAgent | ActionNewAgent, options: Chat):
logger.info("Creating new agent", extra={"agent_name": data.name, "project_id": options.project_id, "task_id": options.task_id})
logger.debug("New agent data", extra={"agent_data": data.model_dump_json()})

View file

@ -24,9 +24,9 @@ from contextlib import contextmanager
from contextvars import ContextVar
from datetime import datetime, timedelta
import weakref
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("task_service")
logger = logging.getLogger("task_service")
class Action(str, Enum):

View file

@ -21,7 +21,7 @@ from threading import Event, Lock
import traceback
from typing import Any, Callable, Dict, List, Tuple
import uuid
from utils import traceroot_wrapper as traceroot
import logging
# Thread-safe reference to main event loop using contextvars
# This ensures each request has its own event loop reference, avoiding race conditions
@ -69,7 +69,7 @@ def _schedule_async_task(coro):
asyncio.run_coroutine_threadsafe(coro, main_loop)
else:
# This should not happen in normal operation - log error and skip
traceroot.get_logger("agent").error(
logging.error(
"No event loop available for async task scheduling, task skipped. "
"Ensure set_main_event_loop() is called before parallel agent creation."
)
@ -129,8 +129,8 @@ import datetime
from pydantic import BaseModel
from app.model.chat import Chat, McpServers
# Create traceroot logger for agent tracking
traceroot_logger = traceroot.get_logger("agent")
# Logger for agent tracking
logger = logging.getLogger("agent")
from app.service.task import (
Action,
ActionActivateAgentData,
@ -148,7 +148,6 @@ NOW_STR = datetime.datetime.now().strftime("%Y-%m-%d %H:00:00")
class ListenChatAgent(ChatAgent):
@traceroot.trace()
def __init__(
self,
api_task_id: str,
@ -218,7 +217,6 @@ class ListenChatAgent(ChatAgent):
process_task_id: str = ""
@traceroot.trace()
def step(
self,
input_message: BaseMessage | str,
@ -244,7 +242,7 @@ class ListenChatAgent(ChatAgent):
error_info = None
message = None
res = None
traceroot_logger.info(
logger.info(
f"Agent {self.agent_name} starting step with message: {input_message.content if isinstance(input_message, BaseMessage) else input_message}"
)
try:
@ -254,20 +252,16 @@ class ListenChatAgent(ChatAgent):
error_info = e
if "Budget has been exceeded" in str(e):
message = "Budget has been exceeded"
traceroot_logger.warning(f"Agent {self.agent_name} budget exceeded")
logger.warning(f"Agent {self.agent_name} budget exceeded")
asyncio.create_task(task_lock.put_queue(ActionBudgetNotEnough()))
else:
message = str(e)
traceroot_logger.error(
f"Agent {self.agent_name} model processing error: {e}"
)
logger.error(f"Agent {self.agent_name} model processing error: {e}")
total_tokens = 0
except Exception as e:
res = None
error_info = e
traceroot_logger.error(
f"Agent {self.agent_name} unexpected error in step: {e}", exc_info=True
)
logger.error(f"Agent {self.agent_name} unexpected error in step: {e}", exc_info=True)
message = f"Error processing message: {e!s}"
total_tokens = 0
@ -314,7 +308,7 @@ class ListenChatAgent(ChatAgent):
message = res.msg.content if res.msg else ""
usage_info = res.info.get("usage") or res.info.get("token_usage") or {}
total_tokens = usage_info.get("total_tokens", 0) if usage_info else 0
traceroot_logger.info(
logger.info(
f"Agent {self.agent_name} completed step, tokens used: {total_tokens}"
)
@ -339,7 +333,6 @@ class ListenChatAgent(ChatAgent):
assert res is not None
return res
@traceroot.trace()
async def astep(
self,
input_message: BaseMessage | str,
@ -365,7 +358,7 @@ class ListenChatAgent(ChatAgent):
error_info = None
message = None
res = None
traceroot_logger.debug(
logger.debug(
f"Agent {self.agent_name} starting async step with message: {input_message.content if isinstance(input_message, BaseMessage) else input_message}"
)
@ -378,30 +371,23 @@ class ListenChatAgent(ChatAgent):
error_info = e
if "Budget has been exceeded" in str(e):
message = "Budget has been exceeded"
traceroot_logger.warning(f"Agent {self.agent_name} budget exceeded")
logger.warning(f"Agent {self.agent_name} budget exceeded")
asyncio.create_task(task_lock.put_queue(ActionBudgetNotEnough()))
else:
message = str(e)
traceroot_logger.error(
f"Agent {self.agent_name} model processing error: {e}"
)
logger.error(f"Agent {self.agent_name} model processing error: {e}")
total_tokens = 0
except Exception as e:
res = None
error_info = e
traceroot_logger.error(
f"Agent {self.agent_name} unexpected error in async step: {e}",
exc_info=True,
)
logger.error(f"Agent {self.agent_name} unexpected error in async step: {e}", exc_info=True)
message = f"Error processing message: {e!s}"
total_tokens = 0
if res is not None:
message = res.msg.content if res.msg else ""
total_tokens = res.info["usage"]["total_tokens"]
traceroot_logger.info(
f"Agent {self.agent_name} completed step, tokens used: {total_tokens}"
)
logger.info(f"Agent {self.agent_name} completed step, tokens used: {total_tokens}")
assert message is not None
@ -424,7 +410,6 @@ class ListenChatAgent(ChatAgent):
assert res is not None
return res
@traceroot.trace()
def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord:
func_name = tool_call_request.tool_name
tool: FunctionTool = self._internal_tools[func_name]
@ -451,7 +436,7 @@ class ListenChatAgent(ChatAgent):
if hasattr(tool, "_toolkit_name")
else "mcp_toolkit"
)
traceroot_logger.debug(
logger.debug(
f"Agent {self.agent_name} executing tool: {func_name} from toolkit: {toolkit_name} with args: {json.dumps(args, ensure_ascii=False)}"
)
@ -473,7 +458,7 @@ class ListenChatAgent(ChatAgent):
# Set process_task context for all tool executions
with set_process_task(self.process_task_id):
raw_result = tool(**args)
traceroot_logger.debug(f"Tool {func_name} executed successfully")
logger.debug(f"Tool {func_name} executed successfully")
if self.mask_tool_output:
self._secure_result_store[tool_call_id] = raw_result
result = (
@ -518,9 +503,7 @@ class ListenChatAgent(ChatAgent):
error_msg = f"Error executing tool '{func_name}': {e!s}"
result = f"Tool execution failed: {error_msg}"
mask_flag = False
traceroot_logger.error(
f"Tool execution failed for {func_name}: {e}", exc_info=True
)
logger.error(f"Tool execution failed for {func_name}: {e}", exc_info=True)
return self._record_tool_calling(
func_name,
@ -531,7 +514,6 @@ class ListenChatAgent(ChatAgent):
extra_content=tool_call_request.extra_content,
)
@traceroot.trace()
async def _aexecute_tool(
self, tool_call_request: ToolCallRequest
) -> ToolCallingRecord:
@ -575,7 +557,7 @@ class ListenChatAgent(ChatAgent):
if not toolkit_name:
toolkit_name = "mcp_toolkit"
traceroot_logger.info(
logger.info(
f"Agent {self.agent_name} executing async tool: {func_name} from toolkit: {toolkit_name} with args: {json.dumps(args, ensure_ascii=False)}"
)
@ -645,9 +627,7 @@ class ListenChatAgent(ChatAgent):
# Capture the error message to prevent framework crash
error_msg = f"Error executing async tool '{func_name}': {e!s}"
result = {"error": error_msg}
traceroot_logger.error(
f"Async tool execution failed for {func_name}: {e}", exc_info=True
)
logger.error(f"Async tool execution failed for {func_name}: {e}", exc_info=True)
# Prepare result message with truncation
if isinstance(result, str):
@ -684,7 +664,6 @@ class ListenChatAgent(ChatAgent):
extra_content=tool_call_request.extra_content,
)
@traceroot.trace()
def clone(self, with_memory: bool = False) -> ChatAgent:
"""Please see super.clone()"""
system_message = None if with_memory else self._original_system_message
@ -730,7 +709,6 @@ class ListenChatAgent(ChatAgent):
return new_agent
@traceroot.trace()
def agent_model(
agent_name: str,
system_message: str | BaseMessage,
@ -743,9 +721,7 @@ def agent_model(
):
task_lock = get_task_lock(options.project_id)
agent_id = str(uuid.uuid4())
traceroot_logger.debug(
f"Creating agent: {agent_name} with id: {agent_id} for project: {options.project_id}"
)
logger.info(f"Creating agent: {agent_name} with id: {agent_id} for project: {options.project_id}")
# Use thread-safe scheduling to support parallel agent creation
_schedule_async_task(
task_lock.put_queue(
@ -807,7 +783,7 @@ def agent_model(
}:
model_config["parallel_tool_calls"] = False
except (ValueError, AttributeError):
traceroot_logger.error(
logging.error(
f"Invalid model platform for browser agent: {options.model_platform}",
exc_info=True,
)
@ -837,7 +813,6 @@ def agent_model(
)
@traceroot.trace()
def question_confirm_agent(options: Chat):
return agent_model(
"question_confirm_agent",
@ -846,7 +821,6 @@ def question_confirm_agent(options: Chat):
)
@traceroot.trace()
def task_summary_agent(options: Chat):
return agent_model(
"task_summary_agent",
@ -855,12 +829,9 @@ def task_summary_agent(options: Chat):
)
@traceroot.trace()
async def developer_agent(options: Chat):
working_directory = get_working_directory(options)
traceroot_logger.info(
f"Creating developer agent for project: {options.project_id} in directory: {working_directory}"
)
logger.info(f"Creating developer agent for project: {options.project_id} in directory: {working_directory}")
message_integration = ToolkitMessageIntegration(
message_handler=HumanToolkit(
options.project_id, Agents.developer_agent
@ -1046,13 +1017,9 @@ these tips to maximize your effectiveness:
)
@traceroot.trace()
def browser_agent(options: Chat):
working_directory = get_working_directory(options)
traceroot_logger.debug(
f"Creating browser agent for project: {options.project_id} in directory: {working_directory}"
)
logger.info(f"Creating browser agent for project: {options.project_id} in directory: {working_directory}")
message_integration = ToolkitMessageIntegration(
message_handler=HumanToolkit(
options.project_id, Agents.browser_agent
@ -1252,12 +1219,9 @@ Your approach depends on available search tools:
)
@traceroot.trace()
async def document_agent(options: Chat):
working_directory = get_working_directory(options)
traceroot_logger.debug(
f"Creating document agent for project: {options.project_id} in directory: {working_directory}"
)
logger.info(f"Creating document agent for project: {options.project_id} in directory: {working_directory}")
message_integration = ToolkitMessageIntegration(
message_handler=HumanToolkit(
@ -1481,12 +1445,9 @@ supported formats including advanced spreadsheet functionality.
)
@traceroot.trace()
def multi_modal_agent(options: Chat):
working_directory = get_working_directory(options)
traceroot_logger.debug(
f"Creating multi-modal agent for project: {options.project_id} in directory: {working_directory}"
)
logger.info(f"Creating multi-modal agent for project: {options.project_id} in directory: {working_directory}")
message_integration = ToolkitMessageIntegration(
message_handler=HumanToolkit(
@ -1677,16 +1638,13 @@ multi-modal content across audio and visual domains.
)
@traceroot.trace()
async def social_medium_agent(options: Chat):
"""
Agent to handling tasks related to social media:
include toolkits: WhatsApp, Twitter, LinkedIn, Reddit, Notion, Slack, Discord and Google Suite.
"""
working_directory = get_working_directory(options)
traceroot_logger.info(
f"Creating social medium agent for project: {options.project_id} in directory: {working_directory}"
)
logger.info(f"Creating social medium agent for project: {options.project_id} in directory: {working_directory}")
tools = [
*WhatsAppToolkit.get_can_use_tools(options.project_id),
*TwitterToolkit.get_can_use_tools(options.project_id),
@ -1807,9 +1765,8 @@ operations.
)
@traceroot.trace()
async def mcp_agent(options: Chat):
traceroot_logger.info(
logger.info(
f"Creating MCP agent for project: {options.project_id} with {len(options.installed_mcp['mcpServers'])} MCP servers"
)
tools = [
@ -1819,7 +1776,7 @@ async def mcp_agent(options: Chat):
if len(options.installed_mcp["mcpServers"]) > 0:
try:
mcp_tools = await get_mcp_tools(options.installed_mcp)
traceroot_logger.info(
logger.info(
f"Retrieved {len(mcp_tools)} MCP tools for task {options.project_id}"
)
if mcp_tools:
@ -1831,16 +1788,14 @@ async def mcp_agent(options: Chat):
)
for tool in mcp_tools
]
traceroot_logger.debug(f"MCP tools: {tool_names}")
logger.debug(f"MCP tools: {tool_names}")
tools = [*tools, *mcp_tools]
except Exception as e:
traceroot_logger.debug(repr(e))
logger.debug(repr(e))
task_lock = get_task_lock(options.project_id)
agent_id = str(uuid.uuid4())
traceroot_logger.info(
f"Creating MCP agent: {Agents.mcp_agent} with id: {agent_id} for task: {options.project_id}"
)
logger.info(f"Creating MCP agent: {Agents.mcp_agent} with id: {agent_id} for task: {options.project_id}")
asyncio.create_task(
task_lock.put_queue(
ActionCreateAgentData(
@ -1883,11 +1838,8 @@ async def mcp_agent(options: Chat):
)
@traceroot.trace()
async def get_toolkits(tools: list[str], agent_name: str, api_task_id: str):
traceroot_logger.info(
f"Getting toolkits for agent: {agent_name}, task: {api_task_id}, tools: {tools}"
)
logger.info(f"Getting toolkits for agent: {agent_name}, task: {api_task_id}, tools: {tools}")
toolkits = {
"audio_analysis_toolkit": AudioAnalysisToolkit,
"openai_image_toolkit": OpenAIImageToolkit,
@ -1925,15 +1877,12 @@ async def get_toolkits(tools: list[str], agent_name: str, api_task_id: str):
)
res.extend(toolkit_tools)
else:
traceroot_logger.warning(f"Toolkit {item} not found for agent {agent_name}")
logger.warning(f"Toolkit {item} not found for agent {agent_name}")
return res
@traceroot.trace()
async def get_mcp_tools(mcp_server: McpServers):
traceroot_logger.info(
f"Getting MCP tools for {len(mcp_server['mcpServers'])} servers"
)
logger.info(f"Getting MCP tools for {len(mcp_server['mcpServers'])} servers")
if len(mcp_server["mcpServers"]) == 0:
return []
@ -1953,9 +1902,7 @@ async def get_mcp_tools(mcp_server: McpServers):
mcp_toolkit = MCPToolkit(config_dict=config_dict, timeout=180)
await mcp_toolkit.connect()
traceroot_logger.info(
f"Successfully connected to MCP toolkit with {len(mcp_server['mcpServers'])} servers"
)
logger.info(f"Successfully connected to MCP toolkit with {len(mcp_server['mcpServers'])} servers")
tools = mcp_toolkit.get_tools()
if tools:
tool_names = [
@ -1966,11 +1913,11 @@ async def get_mcp_tools(mcp_server: McpServers):
)
for tool in tools
]
traceroot_logger.debug(f"MCP tool names: {tool_names}")
logging.debug(f"MCP tool names: {tool_names}")
return tools
except asyncio.CancelledError:
traceroot_logger.info("MCP connection cancelled during get_mcp_tools")
logger.info("MCP connection cancelled during get_mcp_tools")
return []
except Exception as e:
traceroot_logger.error(f"Failed to connect MCP toolkit: {e}", exc_info=True)
logger.error(f"Failed to connect MCP toolkit: {e}", exc_info=True)
return []

View file

@ -15,11 +15,11 @@
import sqlite3
import os
from typing import List, Dict, Optional
from utils import traceroot_wrapper as traceroot
import logging
import shutil
from datetime import datetime
logger = traceroot.get_logger("cookie_manager")
logger = logging.getLogger("cookie_manager")
class CookieManager:

View file

@ -28,9 +28,9 @@ from app.service.task import (
)
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
from app.service.task import process_task
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("toolkit_listen")
logger = logging.getLogger("toolkit_listen")
def _safe_put_queue(task_lock, data):

View file

@ -18,8 +18,8 @@ OAuth authorization state manager for background authorization flows
import threading
from typing import Dict, Optional, Literal, Any
from datetime import datetime
from utils import traceroot_wrapper as traceroot
logger = traceroot.get_logger("main")
import logging
logger = logging.getLogger("main")
AuthStatus = Literal["pending", "authorizing", "success", "failed", "cancelled"]

View file

@ -20,9 +20,9 @@ import json
from app.service.chat_service import Chat
from app.component.environment import env
from app.service.task import get_task_lock_if_exists
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("sync_step")
logger = logging.getLogger("sync_step")
def sync_step(func):
@ -65,6 +65,12 @@ def sync_step(func):
task_id = chat.task_id
if task_id:
# TODO: Filter out unnecessary events to avoid database bloat
# - Skip "decompose_text" streaming events (sent 50-200+ times per task)
# - Only sync structural events: decompose_progress, task_state, create_agent, etc.
# - Consider batching or deduplication for high-frequency events
# - Extract and add task dependencies for analytics
asyncio.create_task(
send_to_api(
sync_url,

View file

@ -16,7 +16,7 @@ import datetime
from camel.agents.chat_agent import AsyncStreamingChatAgentResponse
from camel.societies.workforce.single_agent_worker import SingleAgentWorker as BaseSingleAgentWorker
from camel.tasks.task import Task, TaskState, is_task_result_insufficient
from utils import traceroot_wrapper as traceroot
import logging
from app.utils.agent import ListenChatAgent
from camel.societies.workforce.prompts import PROCESS_TASK_PROMPT
@ -24,7 +24,7 @@ from colorama import Fore
from camel.societies.workforce.utils import TaskResult
from camel.utils.context_utils import ContextUtility
logger = traceroot.get_logger("single_agent_worker")
logger = logging.getLogger("single_agent_worker")
class SingleAgentWorker(BaseSingleAgentWorker):

View file

@ -0,0 +1,3 @@
LANGFUSE_PUBLIC_KEY=pk-lf-...
LANGFUSE_SECRET_KEY=sk-lf-...
LANGFUSE_BASE_URL=https://us.cloud.langfuse.com # Optional, defaults to US cloud

View file

@ -0,0 +1,204 @@
# Workforce Telemetry
OpenTelemetry-based telemetry for CAMEL workforce events, sent to Langfuse for observability.
## Configuration
Add the following environment variables to `~/.eigent/.env`:
```bash
LANGFUSE_PUBLIC_KEY=pk-lf-...
LANGFUSE_SECRET_KEY=sk-lf-...
LANGFUSE_BASE_URL=https://us.cloud.langfuse.com # Optional, defaults to US cloud
```
**If these keys are not specified, telemetry will be disabled.**
## Langfuse Setup
- **Cloud**: Sign up at [Langfuse Cloud](https://cloud.langfuse.com)
- **Self-hosted**: Use the [open-source version](https://langfuse.com/self-hosting)
- **Documentation**: [https://langfuse.com/docs](https://langfuse.com/docs)
## Privacy
Only **metadata** is captured (task IDs, timings, model names, token counts, quality scores). **No PII or detailed task content** is sent to Langfuse.
## Architecture
### Singleton TracerProvider
The `TracerProvider` is initialized once during FastAPI startup (`main.py`) to ensure only one `BatchSpanProcessor` is running, regardless of how many `WorkforceMetricsCallback` instances are created. This prevents:
- Resource leaks from multiple background export threads
- OOM issues from unbounded span queuing (max queue: 4096 spans)
- Excessive memory usage across multiple workforce sessions
The initialization happens in the startup event:
```python
@api.on_event("startup")
async def startup_event():
from app.utils.telemetry.workforce_metrics import initialize_tracer_provider
initialize_tracer_provider()
```
### Batch Processing Configuration
- `max_queue_size`: 4096 spans (drops oldest when full)
- `export_timeout_millis`: 30000 (30s timeout for exports)
- `schedule_delay_millis`: 3000 (exports every 3s)
- `max_export_batch_size`: 1024 (max spans per export)
## Span Structure
All spans share common resource attributes and scope information:
```json
{
"resourceAttributes": {
"service.name": "eigent-workforce",
"eigent.project.id": "1768815931733-6575",
"eigent.task.id": "1768815944094-9806"
},
"scope": {
"name": "eigent.workforce",
"version": "0.2.83a9"
}
}
```
### worker.created
Emitted when a worker is created.
```json
{
"attributes": {
"eigent.worker.id": "73d20286-2c17-467a-8153-2a6ea8cbb6c2",
"eigent.worker.type": "SingleAgentWorker",
"eigent.worker.role": "Developer Agent: A master-level coding...",
"eigent.worker.agent": "developer_agent",
"eigent.worker.model.type": "gpt-4.1-mini"
}
}
```
### task.created
Emitted when a task is created.
```json
{
"attributes": {
"eigent.task.id": "1768815944094-9806.1",
"eigent.task.description": "Task description",
"eigent.project.id": "1768815931733-6575",
"eigent.task.parent_id": "1768815944094-9806",
"eigent.task.type": "task_type"
}
}
```
### task.assigned
Emitted when a task is assigned to a worker.
```json
{
"attributes": {
"eigent.task.id": "1768815944094-9806.1",
"eigent.worker.id": "0fae2d3d-7c0a-4b50-b09d-da35ae61786d",
"eigent.project.id": "1768815931733-6575",
"eigent.task.queue_time_seconds": "1.5",
"eigent.task.dependencies": "[\"dep_1\", \"dep_2\"]"
}
}
```
### task.execution:{task_id}
Long-running span tracking task execution from start to completion.
```json
{
"attributes": {
"eigent.task.id": "1768815944094-9806.1",
"eigent.project.id": "1768815931733-6575",
"eigent.task.status": "completed",
"eigent.worker.id": "0fae2d3d-7c0a-4b50-b09d-da35ae61786d",
"eigent.task.timestamp": "2026-01-19T09:46:40.045077+00:00",
"eigent.task.parent_id": "1768815944094-9806",
"eigent.task.processing_time_seconds": "10.926168203353882",
"eigent.task.quality_score": "80",
"eigent.task.token_usage.total_tokens": "37284"
}
}
```
### workforce.all_tasks_completed
Emitted when all tasks in the workforce are completed.
```json
{
"attributes": {
"eigent.project.id": "1768815931733-6575",
"eigent.task.id": "1768815944094-9806",
"eigent.task.timestamp": "2026-01-19T09:46:44.901068+00:00",
"workforce.total_tasks": "5"
}
}
```
### log.message
Emitted for error and critical log messages.
```json
{
"attributes": {
"log.level": "error",
"log.message": "Error message",
"eigent.project.id": "1768815931733-6575"
}
}
```
## Captured Attributes Reference
### Project & Task
- `eigent.project.id` - Workforce/project identifier
- `eigent.task.id` - Task identifier
- `eigent.task.description` - Task description
- `eigent.task.parent_id` - Parent task ID
- `eigent.task.type` - Task type
- `eigent.task.status` - Task status (started, completed, failed)
- `eigent.task.timestamp` - ISO 8601 timestamp
- `eigent.task.dependencies` - JSON array of dependency task IDs
- `eigent.task.queue_time_seconds` - Time in queue before assignment
- `eigent.task.processing_time_seconds` - Task execution duration
- `eigent.task.quality_score` - Quality score (0-100)
### Worker
- `eigent.worker.id` - Worker UUID
- `eigent.worker.type` - Worker class type
- `eigent.worker.role` - Worker role description
- `eigent.worker.agent` - Agent type (developer_agent, browser_agent, etc.)
- `eigent.worker.model.type` - Model name (gpt-4, claude-3, etc.)
### Token Usage
- `eigent.task.token_usage.total_tokens` - Total tokens used
- `eigent.task.token_usage.*` - Additional token usage metrics
### Langfuse
- `langfuse.session.id` - Set to project ID for grouping
- `langfuse.tags` - ["workforce", "camel", "eigent"]
### Workforce
- `workforce.total_tasks` - Total number of tasks completed

View file

View file

@ -0,0 +1,560 @@
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
import base64
import json
import logging
import os
import re
from typing import Any, Dict
import camel
from camel.societies.workforce.events import (LogEvent, TaskAssignedEvent,
TaskCompletedEvent,
TaskCreatedEvent,
TaskFailedEvent,
TaskStartedEvent,
WorkerCreatedEvent)
from camel.societies.workforce.workforce_metrics import WorkforceMetrics
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.http.trace_exporter import \
OTLPSpanExporter
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.trace import Status, StatusCode
logger = logging.getLogger(__name__)
# Environment variable keys
ENV_LANGFUSE_PUBLIC_KEY = "LANGFUSE_PUBLIC_KEY"
ENV_LANGFUSE_SECRET_KEY = "LANGFUSE_SECRET_KEY"
ENV_LANGFUSE_BASE_URL = "LANGFUSE_BASE_URL"
ENV_OTEL_EXPORTER_OTLP_ENDPOINT = "OTEL_EXPORTER_OTLP_ENDPOINT"
ENV_OTEL_EXPORTER_OTLP_HEADERS = "OTEL_EXPORTER_OTLP_HEADERS"
# Default values
DEFAULT_LANGFUSE_BASE_URL = "https://us.cloud.langfuse.com"
DEFAULT_LANGFUSE_TAGS = ["workforce", "camel", "eigent"]
LANGFUSE_OTEL_PATH = "/api/public/otel"
# Attribute keys for eigent.project namespace
ATTR_PROJECT_ID = "eigent.project.id"
# Attribute keys for eigent.task namespace
ATTR_TASK_ID = "eigent.task.id"
ATTR_TASK_DESCRIPTION = "eigent.task.description"
ATTR_TASK_PARENT_ID = "eigent.task.parent_id"
ATTR_TASK_TYPE = "eigent.task.type"
ATTR_TASK_STATUS = "eigent.task.status"
ATTR_TASK_QUEUE_TIME_SECONDS = "eigent.task.queue_time_seconds"
ATTR_TASK_PROCESSING_TIME_SECONDS = "eigent.task.processing_time_seconds"
ATTR_TASK_QUALITY_SCORE = "eigent.task.quality_score"
ATTR_TASK_TIMESTAMP = "eigent.task.timestamp"
ATTR_TASK_DEPENDENCIES = "eigent.task.dependencies"
# Attribute keys for eigent.worker namespace
ATTR_WORKER_ID = "eigent.worker.id"
ATTR_WORKER_TYPE = "eigent.worker.type"
ATTR_WORKER_ROLE = "eigent.worker.role"
ATTR_WORKER_AGENT = "eigent.worker.agent"
ATTR_WORKER_MODEL_TYPE = "eigent.worker.model.type"
# Attribute keys for workforce namespace
ATTR_WORKFORCE_TOTAL_TASKS = "workforce.total_tasks"
# Langfuse-specific attributes
ATTR_LANGFUSE_SESSION_ID = "langfuse.session.id"
ATTR_LANGFUSE_TAGS = "langfuse.tags"
# OpenTelemetry service and tracer names
SERVICE_NAME_WORKFORCE = "eigent-workforce"
TRACER_NAME_WORKFORCE = "eigent.workforce"
# Span names
SPAN_WORKFORCE_EXECUTION = "workforce.execution"
SPAN_WORKER_CREATED = "worker.created"
SPAN_TASK_CREATED = "task.created"
SPAN_TASK_ASSIGNED = "task.assigned"
SPAN_TASK_EXECUTION = "task.execution"
SPAN_LOG_MESSAGE = "log.message"
SPAN_ALL_TASKS_COMPLETED = "workforce.all_tasks_completed"
# Global tracer provider singleton to avoid creating multiple processors
# This is initialized once during FastAPI startup
_GLOBAL_TRACER_PROVIDER: TracerProvider = None
def initialize_tracer_provider() -> None:
"""Initialize the global TracerProvider during application startup.
Should be called once during FastAPI startup event.
This ensures we only have one BatchSpanProcessor running,
preventing resource leaks when multiple WorkforceMetricsCallback
instances are created.
"""
global _GLOBAL_TRACER_PROVIDER
if _GLOBAL_TRACER_PROVIDER is not None:
logger.warning("TracerProvider already initialized, skipping")
return
# Get configuration from environment
langfuse_public_key = os.getenv(ENV_LANGFUSE_PUBLIC_KEY)
langfuse_secret_key = os.getenv(ENV_LANGFUSE_SECRET_KEY)
langfuse_base_url = os.getenv(ENV_LANGFUSE_BASE_URL,
DEFAULT_LANGFUSE_BASE_URL)
# Create resource with service information
resource = Resource(attributes={SERVICE_NAME: SERVICE_NAME_WORKFORCE})
# Create tracer provider
provider = TracerProvider(resource=resource)
# Configure OTLP exporter for Langfuse if credentials are available
if langfuse_public_key and langfuse_secret_key:
logger.info("Initializing Langfuse telemetry")
# Set environment variables for OTLP exporter
endpoint_url = _create_langfuse_endpoint(langfuse_base_url)
os.environ[ENV_OTEL_EXPORTER_OTLP_ENDPOINT] = endpoint_url
auth_header = _create_basic_auth_header(langfuse_public_key,
langfuse_secret_key)
os.environ[ENV_OTEL_EXPORTER_OTLP_HEADERS] = auth_header
# Create exporter using environment variables
exporter = OTLPSpanExporter()
# Use BatchSpanProcessor for async/non-blocking export
# Configure max_queue_size to prevent OOM when exporter fails
# Configure export_timeout to fail fast if endpoint is down
processor = BatchSpanProcessor(
exporter,
max_queue_size=4096, # Drop spans if queue is full
export_timeout_millis=30000, # 30s timeout
schedule_delay_millis=3000, # Export every 3s
max_export_batch_size=1024, # Export up to 1024 spans
)
provider.add_span_processor(processor)
logger.info("Langfuse telemetry initialized successfully")
else:
logger.info("Langfuse credentials not found, telemetry disabled")
_GLOBAL_TRACER_PROVIDER = provider
def get_tracer_provider() -> TracerProvider:
"""Get the global TracerProvider instance.
Returns:
TracerProvider: The global tracer provider
Raises:
RuntimeError: If called before initialization
"""
if _GLOBAL_TRACER_PROVIDER is None:
raise RuntimeError(
"TracerProvider not initialized. "
"Call initialize_tracer_provider() during app startup.")
return _GLOBAL_TRACER_PROVIDER
def _create_langfuse_endpoint(base_url: str) -> str:
"""Create Langfuse OTLP endpoint URL.
Args:
base_url: Langfuse base URL
Returns:
Full OTLP endpoint URL
"""
return f"{base_url}{LANGFUSE_OTEL_PATH}"
def _create_basic_auth_header(public_key: str, secret_key: str) -> str:
"""Create Basic Authentication header for Langfuse.
Args:
public_key: Langfuse public key
secret_key: Langfuse secret key
Returns:
Authorization header value (e.g., "Authorization=Basic ...")
"""
credentials = f"{public_key}:{secret_key}"
encoded = base64.b64encode(credentials.encode()).decode()
return f"Authorization=Basic {encoded}"
class WorkforceMetricsCallback(WorkforceMetrics):
"""OpenTelemetry metrics callback for workforce events.
Sends workforce events to Langfuse via OTLP protocol.
Events are sent as spans with rich attributes.
Benefits:
- Async/non-blocking: Uses BatchSpanProcessor for background export
- Automatic batching: Reduces network overhead
- Distributed tracing: Task dependencies shown as parent-child spans
"""
def __init__(self, project_id: str, task_id: str):
"""Initialize OpenTelemetry metrics callback.
Uses a global shared TracerProvider to avoid creating multiple
BatchSpanProcessor instances, which would lead to resource leaks.
Args:
project_id: The project/workforce identifier
task_id: The task identifier
Environment variables:
LANGFUSE_PUBLIC_KEY: Langfuse public key (required)
LANGFUSE_SECRET_KEY: Langfuse secret key (required)
LANGFUSE_BASE_URL: Langfuse base URL
(optional, defaults to "https://us.cloud.langfuse.com")
"""
super().__init__()
self.project_id = project_id
self.task_id = task_id
# Check if telemetry is enabled
langfuse_public_key = os.getenv(ENV_LANGFUSE_PUBLIC_KEY)
langfuse_secret_key = os.getenv(ENV_LANGFUSE_SECRET_KEY)
self.enabled = bool(langfuse_public_key and langfuse_secret_key)
# Initialize tracer and root_span as None by default
self.tracer = None
self.root_span = None
# Only initialize OpenTelemetry resources when telemetry is enabled
if self.enabled:
# Get the global shared tracer provider
# This ensures only one BatchSpanProcessor is running
provider = get_tracer_provider()
# Get tracer from the shared provider
# Use CAMEL version for instrumentation versioning
self.tracer = provider.get_tracer(TRACER_NAME_WORKFORCE,
camel.__version__)
self.root_span = self.tracer.start_span(
f"{SPAN_WORKFORCE_EXECUTION}:{task_id}")
# Langfuse-specific attributes
self.root_span.set_attribute(ATTR_LANGFUSE_SESSION_ID, project_id)
tags = json.dumps(DEFAULT_LANGFUSE_TAGS.copy())
self.root_span.set_attribute(ATTR_LANGFUSE_TAGS, tags)
# Custom attributes
self.root_span.set_attribute(ATTR_PROJECT_ID, project_id)
self.root_span.set_attribute(ATTR_TASK_ID, task_id)
# Track active spans for task execution
self.task_spans = {}
# Track quality scores (task_id -> quality_score)
self.task_quality_scores = {}
def log_worker_created(self,
event: WorkerCreatedEvent,
agent_class: str = None,
model_type: str = None,
**kwargs) -> None:
"""Log worker creation as a span.
Args:
event: Worker creation event from CAMEL
agent_class: Agent class name (optional)
model_type: Model type (optional)
**kwargs: Additional unused arguments for compatibility
"""
if not self.enabled:
return
# Create span as child of root span using context
ctx = trace.set_span_in_context(self.root_span)
with self.tracer.start_as_current_span(SPAN_WORKER_CREATED,
context=ctx) as span:
# Eigent-specific attributes
span.set_attribute(ATTR_WORKER_ID, event.worker_id)
span.set_attribute(ATTR_WORKER_TYPE, event.worker_type)
span.set_attribute(ATTR_WORKER_ROLE, event.role)
if agent_class:
span.set_attribute(ATTR_WORKER_AGENT, agent_class)
if model_type:
span.set_attribute(ATTR_WORKER_MODEL_TYPE, model_type)
span.set_status(Status(StatusCode.OK))
def log_task_created(self, event: TaskCreatedEvent) -> None:
"""Log task creation as a span.
Args:
event: Task created event from CAMEL
"""
if not self.enabled:
return
ctx = trace.set_span_in_context(self.root_span)
with self.tracer.start_as_current_span(SPAN_TASK_CREATED,
context=ctx) as span:
span.set_attribute(ATTR_TASK_ID, event.task_id)
span.set_attribute(ATTR_TASK_DESCRIPTION, event.description)
span.set_attribute(ATTR_PROJECT_ID, self.project_id)
if event.parent_task_id:
span.set_attribute(ATTR_TASK_PARENT_ID, event.parent_task_id)
if event.task_type:
span.set_attribute(ATTR_TASK_TYPE, event.task_type)
span.set_status(Status(StatusCode.OK))
def log_task_assigned(self, event: TaskAssignedEvent) -> None:
"""Log task assignment as a span.
Args:
event: Task assignment event from CAMEL
"""
if not self.enabled:
return
ctx = trace.set_span_in_context(self.root_span)
with self.tracer.start_as_current_span(SPAN_TASK_ASSIGNED,
context=ctx) as span:
span.set_attribute(ATTR_TASK_ID, event.task_id)
span.set_attribute(ATTR_WORKER_ID, event.worker_id)
span.set_attribute(ATTR_PROJECT_ID, self.project_id)
if event.queue_time_seconds is not None:
span.set_attribute(ATTR_TASK_QUEUE_TIME_SECONDS,
event.queue_time_seconds)
# Add dependencies as JSON array
if event.dependencies:
deps_json = json.dumps(event.dependencies)
span.set_attribute(ATTR_TASK_DEPENDENCIES, deps_json)
span.set_status(Status(StatusCode.OK))
def log_task_started(self, event: TaskStartedEvent) -> None:
"""Log task start and create a span for the task execution.
Args:
event: Task started event from CAMEL
"""
if not self.enabled:
return
# Start a long-running span for task execution as child of root span
ctx = trace.set_span_in_context(self.root_span)
span = self.tracer.start_span(f"{SPAN_TASK_EXECUTION}:{event.task_id}",
context=ctx)
span.set_attribute(ATTR_TASK_ID, event.task_id)
worker_id = event.worker_id if hasattr(event,
'worker_id') else 'unknown'
span.set_attribute(ATTR_WORKER_ID, worker_id)
span.set_attribute(ATTR_PROJECT_ID, self.project_id)
span.set_attribute(ATTR_TASK_STATUS, "started")
# Store span to end it later
self.task_spans[event.task_id] = span
def log_task_completed(self, event: TaskCompletedEvent) -> None:
"""Log task completion and end the execution span.
Args:
event: Task completion event from CAMEL
"""
if not self.enabled:
return
# End the execution span if it exists
if event.task_id in self.task_spans:
span = self.task_spans.pop(event.task_id)
span.set_attribute(ATTR_TASK_STATUS, "completed")
span.set_attribute(ATTR_WORKER_ID, event.worker_id)
# Add timestamp as ISO string
if hasattr(event, 'timestamp') and event.timestamp:
span.set_attribute(ATTR_TASK_TIMESTAMP,
event.timestamp.isoformat())
if event.parent_task_id:
span.set_attribute(ATTR_TASK_PARENT_ID, event.parent_task_id)
if event.processing_time_seconds is not None:
span.set_attribute(ATTR_TASK_PROCESSING_TIME_SECONDS,
event.processing_time_seconds)
# Check for quality score from parsed log messages first
if event.task_id in self.task_quality_scores:
quality_score = self.task_quality_scores.pop(event.task_id)
span.set_attribute(ATTR_TASK_QUALITY_SCORE, quality_score)
# Fallback to event attributes if available
elif hasattr(event,
'quality_score') and event.quality_score is not None:
span.set_attribute(ATTR_TASK_QUALITY_SCORE,
event.quality_score)
elif hasattr(
event, 'metadata'
) and event.metadata and 'quality_score' in event.metadata:
span.set_attribute(ATTR_TASK_QUALITY_SCORE,
event.metadata['quality_score'])
if event.token_usage:
# Store all token usage as custom attributes
for key, value in event.token_usage.items():
span.set_attribute(f"eigent.task.token_usage.{key}", value)
span.set_status(Status(StatusCode.OK))
span.end()
def log_task_failed(self, event: TaskFailedEvent) -> None:
"""Log task failure and end the execution span with error status.
Args:
event: Task failure event from CAMEL
"""
if not self.enabled:
return
# End the execution span with error if it exists
if event.task_id in self.task_spans:
span = self.task_spans.pop(event.task_id)
span.set_attribute(ATTR_TASK_STATUS, "failed")
# TODO: add error.message
if event.parent_task_id:
span.set_attribute(ATTR_TASK_PARENT_ID, event.parent_task_id)
if event.worker_id:
span.set_attribute(ATTR_WORKER_ID, event.worker_id)
span.set_status(Status(StatusCode.ERROR, event.error_message))
span.end()
def log_message(self, log_event: LogEvent) -> None:
"""Log error and critical messages as span events.
Also parse quality scores from info-level task completion messages.
Args:
log_event: LogEvent from CAMEL
"""
if not self.enabled:
return
# Parse quality score from info-level messages
# Pattern: "Task <task_id> completed successfully (quality score: X)."
# TODO: add this from the camel
if log_event.level == "info":
pattern = (r'Task\s+(\S+)\s+completed successfully'
r'.*quality score:\s*(\d+)')
match = re.search(pattern, log_event.message)
if match:
task_id = match.group(1)
quality_score = int(match.group(2))
self.task_quality_scores[task_id] = quality_score
# Only log errors and critical messages
if log_event.level in ["error", "critical"]:
ctx = trace.set_span_in_context(self.root_span)
with self.tracer.start_as_current_span(SPAN_LOG_MESSAGE,
context=ctx) as span:
span.set_attribute("log.level", log_event.level)
span.set_attribute("log.message", log_event.message)
span.set_attribute(ATTR_PROJECT_ID, self.project_id)
# Add metadata if available
if log_event.metadata:
for key, value in log_event.metadata.items():
span.set_attribute(f"log.{key}", str(value))
# Set span status based on log level
if log_event.level == "critical":
span.set_status(Status(StatusCode.ERROR,
log_event.message))
def log_all_tasks_completed(self, event) -> None:
"""Log when all tasks in the workforce are completed.
Args:
event: All tasks completed event from CAMEL
"""
if not self.enabled:
return
ctx = trace.set_span_in_context(self.root_span)
with self.tracer.start_as_current_span(SPAN_ALL_TASKS_COMPLETED,
context=ctx) as span:
span.set_attribute(ATTR_PROJECT_ID, self.project_id)
span.set_attribute(ATTR_TASK_ID, self.task_id)
# Add timestamp as ISO string
if hasattr(event, 'timestamp') and event.timestamp:
span.set_attribute(ATTR_TASK_TIMESTAMP,
event.timestamp.isoformat())
if hasattr(event, 'total_tasks'):
span.set_attribute(ATTR_WORKFORCE_TOTAL_TASKS,
event.total_tasks)
span.set_status(Status(StatusCode.OK))
# End the root span when all tasks are completed
if self.root_span:
self.root_span.set_status(Status(StatusCode.OK))
self.root_span.end()
def dump_to_json(self) -> str:
"""Dump metrics to JSON string.
Returns:
JSON string representation of metrics
"""
return json.dumps({
"project_id": self.project_id,
"task_id": self.task_id,
"otel_enabled": self.enabled,
"active_spans": len(self.task_spans),
})
def get_ascii_tree_representation(self) -> str:
"""Get ASCII tree representation of workforce metrics.
Returns:
ASCII tree string
"""
active_count = len(self.task_spans)
return (f"OpenTelemetry Metrics for project {self.project_id}, "
f"task {self.task_id} (active spans: {active_count})")
def get_kpis(self) -> Dict[str, Any]:
"""Get key performance indicators.
Returns:
Dictionary of KPIs
"""
return {
"project_id": self.project_id,
"task_id": self.task_id,
"otel_enabled": self.enabled,
"active_task_spans": len(self.task_spans),
}
def reset_task_data(self) -> None:
"""Reset task-specific data.
This is called when starting a new task.
"""
# End any remaining open task execution spans
for _, span in self.task_spans.items():
span.set_status(Status(StatusCode.ERROR, "Task interrupted/reset"))
span.end()
self.task_spans.clear()

View file

@ -21,11 +21,11 @@ from app.service.task import Agents
from app.utils.listen.toolkit_listen import auto_listen_toolkit
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
from app.utils.oauth_state_manager import oauth_state_manager
from utils import traceroot_wrapper as traceroot
import logging
from camel.toolkits import GoogleCalendarToolkit as BaseGoogleCalendarToolkit
logger = traceroot.get_logger("main")
logger = logging.getLogger("main")
SCOPES = ['https://www.googleapis.com/auth/calendar']

View file

@ -19,9 +19,9 @@ from app.service.task import Action, ActionAskData, ActionNoticeData, get_task_l
from app.utils.listen.toolkit_listen import auto_listen_toolkit, listen_toolkit
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
from app.service.task import process_task
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("human_toolkit")
logger = logging.getLogger("human_toolkit")
@auto_listen_toolkit(BaseToolkit)

View file

@ -31,9 +31,9 @@ from app.exception.exception import ProgramException
from app.service.task import Agents
from app.utils.listen.toolkit_listen import auto_listen_toolkit, listen_toolkit
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("hybrid_browser_python_toolkit")
logger = logging.getLogger("hybrid_browser_python_toolkit")
class BrowserSession(BaseHybridBrowserSession):

View file

@ -30,9 +30,9 @@ from app.component.environment import env
from app.service.task import Agents
from app.utils.listen.toolkit_listen import auto_listen_toolkit
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("hybrid_browser_toolkit")
logger = logging.getLogger("hybrid_browser_toolkit")
# Global navigation lock to prevent concurrent visit_page conflicts (ERR_ABORTED)
# This is needed because multiple sessions may share the same browser via CDP

View file

@ -21,9 +21,9 @@ from camel.toolkits import FunctionTool
from app.component.environment import env
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
from camel.toolkits.mcp_toolkit import MCPToolkit
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("notion_mcp_toolkit")
logger = logging.getLogger("notion_mcp_toolkit")
def _customize_function_parameters(schema: Dict[str, Any]) -> None:
r"""Customize function parameters for specific functions.

View file

@ -21,9 +21,9 @@ from app.component.environment import env, env_not_empty
from app.service.task import Agents
from app.utils.listen.toolkit_listen import auto_listen_toolkit, listen_toolkit
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("search_toolkit")
logger = logging.getLogger("search_toolkit")
@auto_listen_toolkit(BaseSearchToolkit)

View file

@ -18,9 +18,9 @@ from app.component.environment import env
from app.service.task import Agents
from app.utils.listen.toolkit_listen import auto_listen_toolkit
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("slack_toolkit")
logger = logging.getLogger("slack_toolkit")
@auto_listen_toolkit(BaseSlackToolkit)

View file

@ -29,9 +29,9 @@ from app.service.task import Action, ActionTerminalData, Agents, get_task_lock
from app.utils.listen.toolkit_listen import auto_listen_toolkit
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
from app.service.task import process_task
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("terminal_toolkit")
logger = logging.getLogger("terminal_toolkit")
# App version - should match electron app version
# TODO: Consider getting this from a shared config

View file

@ -13,41 +13,39 @@
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
import asyncio
import logging
from typing import Generator, List, Optional
from camel.agents import ChatAgent
from camel.societies.workforce.workforce import (
Workforce as BaseWorkforce,
WorkforceState,
DEFAULT_WORKER_POOL_SIZE,
)
from camel.societies.workforce.utils import FailureHandlingConfig
from camel.societies.workforce.task_channel import TaskChannel
from camel.societies.workforce.base import BaseNode
from camel.societies.workforce.utils import TaskAssignResult
from camel.societies.workforce.workforce_metrics import WorkforceMetrics
from camel.societies.workforce.events import WorkerCreatedEvent
from camel.societies.workforce.prompts import TASK_DECOMPOSE_PROMPT
from camel.tasks.task import Task, TaskState, validate_task_content
from app.component import code
from app.exception.exception import UserException
from app.service.task import (Action, ActionAssignTaskData, ActionEndData,
ActionTaskStateData, ActionTimeoutData,
get_camel_task, get_task_lock)
from app.utils.agent import ListenChatAgent
from app.service.task import (
Action,
ActionAssignTaskData,
ActionEndData,
ActionTaskStateData,
ActionTimeoutData,
get_camel_task,
get_task_lock,
)
from app.utils.single_agent_worker import SingleAgentWorker
from utils import traceroot_wrapper as traceroot
logger = traceroot.get_logger("workforce")
from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback
from camel.agents import ChatAgent
from camel.societies.workforce.base import BaseNode
from camel.societies.workforce.events import (TaskAssignedEvent,
TaskCompletedEvent,
TaskCreatedEvent,
TaskFailedEvent,
WorkerCreatedEvent)
from camel.societies.workforce.prompts import TASK_DECOMPOSE_PROMPT
from camel.societies.workforce.task_channel import TaskChannel
from camel.societies.workforce.utils import (FailureHandlingConfig,
TaskAssignResult)
from camel.societies.workforce.workforce import DEFAULT_WORKER_POOL_SIZE
from camel.societies.workforce.workforce import Workforce as BaseWorkforce
from camel.societies.workforce.workforce import WorkforceState
from camel.societies.workforce.workforce_metrics import WorkforceMetrics
from camel.tasks.task import Task, TaskState, validate_task_content
logger = logging.getLogger("workforce")
class Workforce(BaseWorkforce):
def __init__(
self,
api_task_id: str,
@ -62,9 +60,12 @@ class Workforce(BaseWorkforce):
) -> None:
self.api_task_id = api_task_id
logger.info("=" * 80)
logger.info("🏭 [WF-LIFECYCLE] Workforce.__init__ STARTED", extra={"api_task_id": api_task_id})
logger.info("🏭 [WF-LIFECYCLE] Workforce.__init__ STARTED",
extra={"api_task_id": api_task_id})
logger.info(f"[WF-LIFECYCLE] Workforce id will be: {id(self)}")
logger.info(f"[WF-LIFECYCLE] Init params: graceful_shutdown_timeout={graceful_shutdown_timeout}, share_memory={share_memory}")
logger.info(
f"[WF-LIFECYCLE] Init params: graceful_shutdown_timeout="
f"{graceful_shutdown_timeout}, share_memory={share_memory}")
logger.info("=" * 80)
super().__init__(
description=description,
@ -77,12 +78,12 @@ class Workforce(BaseWorkforce):
use_structured_output_handler=use_structured_output_handler,
task_timeout_seconds=3600, # 60 minutes
failure_handling_config=FailureHandlingConfig(
enabled_strategies=["retry", "replan"],
),
enabled_strategies=["retry", "replan"], ),
)
self.task_agent.stream_accumulate = True
self.task_agent._stream_accumulate_explicit = True
logger.info(f"[WF-LIFECYCLE] ✅ Workforce.__init__ COMPLETED, id={id(self)}")
logger.info(
f"[WF-LIFECYCLE] ✅ Workforce.__init__ COMPLETED, id={id(self)}")
def eigent_make_sub_tasks(
self,
@ -91,28 +92,37 @@ class Workforce(BaseWorkforce):
on_stream_batch=None,
on_stream_text=None,
):
"""
Split process_task method to eigent_make_sub_tasks and eigent_start method.
"""Split process_task method to eigent_make_sub_tasks
and eigent_start method.
Args:
task: The main task to decompose
coordinator_context: Optional context ONLY for coordinator agent during decomposition.
This context will NOT be passed to subtasks or worker agents.
on_stream_batch: Optional callback for streaming batches signature (List[Task], bool)
on_stream_text: Optional callback for raw streaming text chunks
coordinator_context: Optional context ONLY for coordinator
agent during decomposition. This context will NOT
be passed to subtasks or worker agents.
on_stream_batch: Optional callback for streaming
batches signature (List[Task], bool)
on_stream_text: Optional callback for raw
streaming text chunks
"""
logger.debug("[DECOMPOSE] eigent_make_sub_tasks called", extra={
"api_task_id": self.api_task_id,
"task_id": task.id
})
logger.debug("[DECOMPOSE] eigent_make_sub_tasks called",
extra={
"api_task_id": self.api_task_id,
"task_id": task.id
})
if not validate_task_content(task.content, task.id):
task.state = TaskState.FAILED
task.result = "Task failed: Invalid or empty content provided"
logger.warning("[DECOMPOSE] Task rejected: Invalid or empty content", extra={
"task_id": task.id,
"content_preview": task.content[:50] + "..." if len(task.content) > 50 else task.content
})
logger.warning(
"[DECOMPOSE] Task rejected: Invalid or empty content",
extra={
"task_id":
task.id,
"content_preview":
task.content[:50] +
"..." if len(task.content) > 50 else task.content
})
raise UserException(code.error, task.result)
self.reset()
@ -120,42 +130,43 @@ class Workforce(BaseWorkforce):
self.set_channel(TaskChannel())
self._state = WorkforceState.RUNNING
task.state = TaskState.OPEN
subtasks = asyncio.run(
self.handle_decompose_append_task(
task,
reset=False,
coordinator_context=coordinator_context,
on_stream_batch=on_stream_batch,
on_stream_text=on_stream_text
)
)
on_stream_text=on_stream_text))
logger.info(f"[DECOMPOSE] Task decomposition completed", extra={
"api_task_id": self.api_task_id,
"task_id": task.id,
"subtasks_count": len(subtasks)
})
logger.info("[DECOMPOSE] Task decomposition completed",
extra={
"api_task_id": self.api_task_id,
"task_id": task.id,
"subtasks_count": len(subtasks)
})
return subtasks
async def eigent_start(self, subtasks: list[Task]):
"""start the workforce"""
logger.debug(f"[WF-LIFECYCLE] eigent_start called with {len(subtasks)} subtasks", extra={
"api_task_id": self.api_task_id
})
logger.debug((f"[WF-LIFECYCLE] eigent_start called with "
f"{len(subtasks)} subtasks"),
extra={"api_task_id": self.api_task_id})
# Clear existing pending tasks to use the user-edited task list
# (tasks may have been added during decomposition before user edits)
self._pending_tasks.clear()
self._pending_tasks.extendleft(reversed(subtasks))
self.save_snapshot("Initial task decomposition")
try:
await self.start()
except Exception as e:
logger.error(f"[WF-LIFECYCLE] Error in workforce execution: {e}", extra={
"api_task_id": self.api_task_id,
"error": str(e)
}, exc_info=True)
logger.error(f"[WF-LIFECYCLE] Error in workforce execution: {e}",
extra={
"api_task_id": self.api_task_id,
"error": str(e)
},
exc_info=True)
self._state = WorkforceState.STOPPED
raise
finally:
@ -169,24 +180,24 @@ class Workforce(BaseWorkforce):
content=task.content,
child_nodes_info=self._get_child_nodes_info(),
additional_info=task.additional_info,
)
)
))
self.task_agent.reset()
result = task.decompose(
self.task_agent, decompose_prompt, stream_callback=stream_callback
)
result = task.decompose(self.task_agent,
decompose_prompt,
stream_callback=stream_callback)
if isinstance(result, Generator):
def streaming_with_dependencies():
all_subtasks = []
for new_tasks in result:
all_subtasks.extend(new_tasks)
if new_tasks:
self._update_dependencies_for_decomposition(
task, all_subtasks
)
task, all_subtasks)
yield new_tasks
return streaming_with_dependencies()
else:
subtasks = result
@ -202,29 +213,32 @@ class Workforce(BaseWorkforce):
on_stream_batch=None,
on_stream_text=None,
) -> List[Task]:
"""
Override to support coordinator_context parameter.
Handle task decomposition and validation, then append to pending tasks.
"""Override to support coordinator_context parameter.
Handle task decomposition and validation,
then append to pending tasks.
Args:
task: The task to be processed
reset: Should trigger workforce reset (Workforce must not be running)
coordinator_context: Optional context ONLY for coordinator during decomposition
on_stream_batch: Optional callback for streaming batches signature (List[Task], bool)
reset: Should trigger workforce reset
(Workforce must not be running)
coordinator_context: Optional context ONLY for
coordinator during decomposition
on_stream_batch: Optional callback for streaming
batches signature (List[Task], bool)
on_stream_text: Optional callback for raw streaming text chunks
Returns:
List[Task]: The decomposed subtasks or the original task
"""
logger.debug(f"[DECOMPOSE] handle_decompose_append_task called, task_id={task.id}, reset={reset}")
logger.debug(f"[DECOMPOSE] handle_decompose_append_task called, "
f"task_id={task.id}, reset={reset}")
if not validate_task_content(task.content, task.id):
task.state = TaskState.FAILED
task.result = "Task failed: Invalid or empty content provided"
logger.warning(
f"[DECOMPOSE] Task {task.id} rejected: Invalid or empty content. "
f"Content preview: '{task.content}'"
)
logger.warning(f"[DECOMPOSE] Task {task.id} rejected: "
f"Invalid or empty content. "
f"Content preview: '{task.content}'")
return [task]
if reset and self._state != WorkforceState.RUNNING:
@ -235,12 +249,15 @@ class Workforce(BaseWorkforce):
if coordinator_context:
original_content = task.content
task_with_context = coordinator_context + "\n=== CURRENT TASK ===\n" + original_content
task_with_context = (coordinator_context +
"\n=== CURRENT TASK ===\n" + original_content)
task.content = task_with_context
subtasks_result = self._decompose_task(task, stream_callback=on_stream_text)
subtasks_result = self._decompose_task(
task, stream_callback=on_stream_text)
task.content = original_content
else:
subtasks_result = self._decompose_task(task, stream_callback=on_stream_text)
subtasks_result = self._decompose_task(
task, stream_callback=on_stream_text)
if isinstance(subtasks_result, Generator):
subtasks = []
@ -252,7 +269,8 @@ class Workforce(BaseWorkforce):
except Exception as e:
logger.warning(f"Streaming callback failed: {e}")
# After consuming the generator, check task.subtasks for final result as fallback
# After consuming the generator, check task.subtasks
# for final result as fallback
if not subtasks and task.subtasks:
subtasks = task.subtasks
else:
@ -260,9 +278,24 @@ class Workforce(BaseWorkforce):
if subtasks:
self._pending_tasks.extendleft(reversed(subtasks))
# Log task created events
metrics_callbacks = [
cb for cb in self._callbacks
if isinstance(cb, WorkforceMetrics)
]
if metrics_callbacks:
for subtask in subtasks:
event = TaskCreatedEvent(
task_id=subtask.id,
description=subtask.content,
parent_task_id=task.id if task else None,
task_type=None,
)
metrics_callbacks[0].log_task_created(event)
if not subtasks:
logger.warning(f"[DECOMPOSE] No subtasks returned, creating fallback task")
logger.warning(
"[DECOMPOSE] No subtasks returned, creating fallback task")
fallback_task = Task(
content=task.content,
id=f"{task.id}.1",
@ -271,27 +304,73 @@ class Workforce(BaseWorkforce):
task.subtasks = [fallback_task]
subtasks = [fallback_task]
# Log fallback task created event
metrics_callbacks = [
cb for cb in self._callbacks
if isinstance(cb, WorkforceMetrics)
]
if metrics_callbacks:
event = TaskCreatedEvent(
task_id=fallback_task.id,
description=fallback_task.content,
parent_task_id=task.id if task else None,
task_type=None,
)
metrics_callbacks[0].log_task_created(event)
if on_stream_batch:
try:
on_stream_batch(subtasks, True)
except Exception as e:
logger.warning(f"Final streaming callback failed: {e}")
logger.debug(f"[DECOMPOSE] handle_decompose_append_task completed, returned {len(subtasks)} subtasks")
logger.debug(f"[DECOMPOSE] handle_decompose_append_task completed, "
f"returned {len(subtasks)} subtasks")
return subtasks
def _get_agent_id_from_node_id(self, node_id: str) -> str | None:
"""Map worker node_id to the actual agent_id for frontend communication.
"""Map worker node_id to the actual agent_id for
frontend communication.
The CAMEL base class uses node_id for task assignment, but the frontend
uses agent_id to identify agents. This method provides the mapping.
The CAMEL base class uses node_id for task assignment,
but the frontend uses agent_id to identify agents.
This method provides the mapping.
"""
for child in self._children:
if hasattr(child, 'node_id') and child.node_id == node_id:
if hasattr(child, 'worker') and hasattr(child.worker, 'agent_id'):
if hasattr(child, 'worker') and hasattr(
child.worker, 'agent_id'):
return child.worker.agent_id
return None
def _extract_model_type(self, agent: ChatAgent) -> Optional[str]:
"""Extract model type from agent's model_backend.
Handles both ModelManager (multiple models) and single model cases.
Args:
agent: The chat agent to extract model type from
Returns:
Model type as string, or None if not found
"""
if not hasattr(agent, 'model_backend') or not agent.model_backend:
return None
model_obj = agent.model_backend
# Handle ModelManager case (multiple models)
if hasattr(model_obj, 'models') and model_obj.models:
first_model = model_obj.models[0] if model_obj.models else None
if first_model:
mt = getattr(first_model, 'model_type', None)
return str(
mt.value if hasattr(mt, 'value') else mt) if mt else None
# Handle single model case
mt = getattr(model_obj, 'model_type', None)
return str(mt.value if hasattr(mt, 'value') else mt) if mt else None
async def _find_assignee(self, tasks: List[Task]) -> TaskAssignResult:
# Task assignment phase: send "waiting for execution" notification
# to the frontend, and send "start execution" notification when the
@ -300,8 +379,10 @@ class Workforce(BaseWorkforce):
task_lock = get_task_lock(self.api_task_id)
for item in assigned.assignments:
# DEBUG ▶ Task has been assigned to which worker and its dependencies
logger.debug(f"[WF] ASSIGN {item.task_id} -> {item.assignee_id} deps={item.dependencies}")
# DEBUG ▶ Task has been assigned to which worker
# and its dependencies
logger.debug(f"[WF] ASSIGN {item.task_id} -> {item.assignee_id} "
f"deps={item.dependencies}")
# The main task itself does not need notification
if self._task and item.task_id == self._task.id:
continue
@ -309,33 +390,39 @@ class Workforce(BaseWorkforce):
task_obj = get_camel_task(item.task_id, tasks)
if task_obj is None:
logger.warning(
f"[WF] WARN: Task {item.task_id} not found in tasks list during ASSIGN phase. This may indicate a task tree inconsistency."
)
f"[WF] WARN: Task {item.task_id} not found in "
f"tasks list during ASSIGN phase. This may indicate "
f"a task tree inconsistency.")
content = ""
else:
content = task_obj.content
# Skip sending notification if this is a retry/replan for an already assigned task
# This prevents the frontend from showing "Reassigned" when a task is being retried
# with the same or different worker due to failure recovery
# Skip sending notification if this is a retry/replan for
# an already assigned task
# This prevents the frontend from showing "Reassigned"
# when a task is being retried with the same or different
# worker due to failure recovery
if task_obj and task_obj.assigned_worker_id:
logger.debug(
f"[WF] ASSIGN Skip notification for task {item.task_id}: "
f"already has assigned_worker_id={task_obj.assigned_worker_id}, "
f"new assignee={item.assignee_id} (retry/replan scenario)"
)
f"already has assigned_worker_id="
f"{task_obj.assigned_worker_id}, "
f"new assignee={item.assignee_id} (retry/replan scenario)")
continue
# Map node_id to agent_id for frontend communication
# The CAMEL base class returns node_id as assignee_id, but the frontend
# uses agent_id to identify agents
# The CAMEL base class returns node_id as assignee_id,
# but the frontend uses agent_id to identify agents
agent_id = self._get_agent_id_from_node_id(item.assignee_id)
if agent_id is None:
workers = [
c.node_id for c in self._children if hasattr(c, 'node_id')
]
logger.error(
f"[WF] ERROR: Could not find agent_id for node_id={item.assignee_id}. "
f"Task {item.task_id} will not be properly tracked on frontend. "
f"Available workers: {[c.node_id for c in self._children if hasattr(c, 'node_id')]}"
)
f"[WF] ERROR: Could not find agent_id for "
f"node_id={item.assignee_id}. Task {item.task_id} "
f"will not be properly tracked on frontend. "
f"Available workers: {workers}")
continue # Skip sending notification for unmapped worker
# Asynchronously send waiting notification
@ -350,28 +437,47 @@ class Workforce(BaseWorkforce):
"state": "waiting", # Mark as waiting state
"failure_count": 0,
},
)
)
)
)))
# Track the task for cleanup
task_lock.add_background_task(task)
metrics_callbacks = [
cb for cb in self._callbacks
if isinstance(cb, WorkforceMetrics)
]
if metrics_callbacks:
event = TaskAssignedEvent(
task_id=item.task_id,
worker_id=agent_id,
dependencies=item.dependencies,
)
metrics_callbacks[0].log_task_assigned(event)
return assigned
async def _post_task(self, task: Task, assignee_id: str) -> None:
# DEBUG ▶ Dependencies are met, the task really starts to execute
logger.debug(f"[WF] POST {task.id} -> {assignee_id}")
"""Override the _post_task method to notify the frontend when the task really starts to execute"""
# When the dependency check is passed and the task is about to be published to the execution queue, send a notification to the frontend
"""Override the _post_task method to notify the frontend
when the task really starts to execute
"""
# When the dependency check is passed and the task is
# about to be published to the execution queue, send a
# notification to the frontend
task_lock = get_task_lock(self.api_task_id)
if self._task and task.id != self._task.id: # Skip the main task itself
if self._task and task.id != self._task.id:
# Skip the main task itself
# Map node_id to agent_id for frontend communication
agent_id = self._get_agent_id_from_node_id(assignee_id)
workers = [
c.node_id for c in self._children if hasattr(c, 'node_id')
]
if agent_id is None:
logger.error(
f"[WF] ERROR: Could not find agent_id for node_id={assignee_id}. "
f"Task {task.id} will not be properly tracked on frontend. "
f"Available workers: {[c.node_id for c in self._children if hasattr(c, 'node_id')]}"
)
logger.error(f"[WF] ERROR: Could not find agent_id "
f"for node_id={assignee_id}. "
f"Task {task.id} will not be properly "
f"tracked on frontend. "
f"Available workers: "
f"{workers}")
else:
await task_lock.put_queue(
ActionAssignTaskData(
@ -383,9 +489,9 @@ class Workforce(BaseWorkforce):
"state": "running", # running state
"failure_count": task.failure_count,
},
)
)
# Call the parent class method to continue the normal task publishing process
))
# Call the parent class method to continue the
# normal task publishing process
await super()._post_task(task, assignee_id)
def add_single_agent_worker(
@ -396,7 +502,9 @@ class Workforce(BaseWorkforce):
enable_workflow_memory: bool = False,
) -> BaseWorkforce:
if self._state == WorkforceState.RUNNING:
raise RuntimeError("Cannot add workers while workforce is running. Pause the workforce first.")
raise RuntimeError(
"Cannot add workers while workforce is running. "
"Pause the workforce first.")
# Validate worker agent compatibility
self._validate_agent_compatibility(worker, "Worker agent")
@ -422,14 +530,32 @@ class Workforce(BaseWorkforce):
self._start_child_node_when_paused(worker_node.start())
# Use proper CAMEL pattern for metrics logging
metrics_callbacks = [cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)]
metrics_callbacks = [
cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
]
if metrics_callbacks:
# Collect agent metadata for telemetry
agent_class_name = getattr(worker, 'agent_name',
worker.__class__.__name__)
model_type = self._extract_model_type(worker)
# Log worker created event
event = WorkerCreatedEvent(
worker_id=worker_node.node_id,
worker_type="SingleAgentWorker",
role=worker_node.description,
)
metrics_callbacks[0].log_worker_created(event)
# Call log_worker_created for all callbacks
for cb in self._callbacks:
if isinstance(cb, WorkforceMetricsCallback):
cb.log_worker_created(
event,
agent_class=agent_class_name,
model_type=model_type,
)
else:
cb.log_worker_created(event)
return self
@ -442,8 +568,11 @@ class Workforce(BaseWorkforce):
is_main_task = self._task and task.id == self._task.id
task_type = "MAIN TASK" if is_main_task else "SUB-TASK"
logger.info(f"[TASK-RESULT] {task_type} COMPLETED: {task.id}")
logger.info(f"[TASK-RESULT] Content: {task.content[:200]}..." if len(task.content) > 200 else f"[TASK-RESULT] Content: {task.content}")
logger.info(f"[TASK-RESULT] Result: {task.result[:500]}..." if task.result and len(str(task.result)) > 500 else f"[TASK-RESULT] Result: {task.result}")
logger.info(f"[TASK-RESULT] Content: {task.content[:200]}..." if len(
task.content) > 200 else f"[TASK-RESULT] Content: {task.content}")
logger.info(f"[TASK-RESULT] Result: {task.result[:500]}..."
if task.result and len(str(task.result)) > 500 else
f"[TASK-RESULT] Result: {task.result}")
task_data = {
"task_id": task.id,
@ -452,12 +581,21 @@ class Workforce(BaseWorkforce):
"result": task.result or "",
"failure_count": task.failure_count,
}
await task_lock.put_queue(
ActionTaskStateData(
data=task_data
await task_lock.put_queue(ActionTaskStateData(data=task_data))
# Log task completion to metrics
metrics_callbacks = [
cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
]
if metrics_callbacks:
# worker_id is required and cannot be None
worker_id = getattr(task, 'assigned_worker_id', None) or 'unknown'
event = TaskCompletedEvent(
task_id=task.id,
worker_id=worker_id,
)
)
metrics_callbacks[0].log_task_completed(event)
# IMPORTANT: Sync this subtask's result back to parent.subtasks BEFORE calling super()
# This fixes the issue where parent.subtasks[i].result is None because CAMEL's
@ -482,17 +620,21 @@ class Workforce(BaseWorkforce):
result = await super()._handle_failed_task(task)
# Only send completion report to frontend when all retries are exhausted
# Only send completion report to frontend when all
# retries are exhausted
max_retries = self.failure_handling_config.max_retries
if task.failure_count < max_retries:
return result
error_message = ""
# Use proper CAMEL pattern for metrics logging
metrics_callbacks = [cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)]
metrics_callbacks = [
cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
]
if metrics_callbacks and hasattr(metrics_callbacks[0], "log_entries"):
for entry in reversed(metrics_callbacks[0].log_entries):
if entry.get("event_type") == "task_failed" and entry.get("task_id") == task.id:
if entry.get("event_type") == "task_failed" and entry.get(
"task_id") == task.id:
error_message = entry.get("error_message")
break
@ -505,9 +647,18 @@ class Workforce(BaseWorkforce):
"state": task.state,
"failure_count": task.failure_count,
"result": str(error_message),
}
)
)
}))
if metrics_callbacks:
event = TaskFailedEvent(task_id=task.id, )
# Add failure details if available
if hasattr(task, 'assigned_worker_id'):
event.worker_id = task.assigned_worker_id
event.error_message = error_message or str(task.result
or "Unknown error")
event.failure_count = task.failure_count
metrics_callbacks[0].log_task_failed(event)
return result
@ -532,77 +683,114 @@ class Workforce(BaseWorkforce):
f"⏰ [WF-TIMEOUT] Task timeout in workforce {self.node_id}. "
f"Timeout: {self.task_timeout_seconds}s, "
f"Pending tasks: {len(self._pending_tasks)}, "
f"In-flight tasks: {self._in_flight_tasks}"
)
f"In-flight tasks: {self._in_flight_tasks}")
# Try to notify frontend, but don't let notification failure mask the timeout
# Try to notify frontend, but don't let
# notification failure mask the timeout
try:
task_lock = get_task_lock(self.api_task_id)
timeout_minutes = self.task_timeout_seconds // 60
await task_lock.put_queue(
ActionTimeoutData(
data={
"message": f"Task execution timeout: No response received for {timeout_minutes} minutes",
"in_flight_tasks": self._in_flight_tasks,
"pending_tasks": len(self._pending_tasks),
"timeout_seconds": self.task_timeout_seconds,
}
)
)
"message":
(f"Task execution timeout: No response received "
f"for {timeout_minutes} minutes"),
"in_flight_tasks":
self._in_flight_tasks,
"pending_tasks":
len(self._pending_tasks),
"timeout_seconds":
self.task_timeout_seconds,
}))
except Exception as notify_err:
logger.error(f"Failed to send timeout notification: {notify_err}")
logger.error(
f"Failed to send timeout notification: {notify_err}")
raise
except Exception as e:
logger.error(
f"Error getting returned task {e} in workforce {self.node_id}. "
f"Current pending tasks: {len(self._pending_tasks)}, "
f"In-flight tasks: {self._in_flight_tasks}"
)
logger.error(f"Error getting returned task {e} in "
f"workforce {self.node_id}. "
f"Current pending tasks: {len(self._pending_tasks)}, "
f"In-flight tasks: {self._in_flight_tasks}")
raise
def stop(self) -> None:
logger.info("=" * 80)
logger.info(f"⏹️ [WF-LIFECYCLE] stop() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
logger.info(f"[WF-LIFECYCLE] Current state before stop: {self._state.name}, _running: {self._running}")
logger.info("⏹️ [WF-LIFECYCLE] stop() CALLED",
extra={
"api_task_id": self.api_task_id,
"workforce_id": id(self)
})
logger.info(f"[WF-LIFECYCLE] Current state before stop: "
f"{self._state.name}, _running: {self._running}")
logger.info("=" * 80)
super().stop()
logger.info(f"[WF-LIFECYCLE] super().stop() completed, new state: {self._state.name}")
logger.info(f"[WF-LIFECYCLE] super().stop() completed, "
f"new state: {self._state.name}")
task_lock = get_task_lock(self.api_task_id)
task = asyncio.create_task(task_lock.put_queue(ActionEndData()))
task_lock.add_background_task(task)
logger.info(f"[WF-LIFECYCLE] ✅ ActionEndData queued")
logger.info("[WF-LIFECYCLE] ✅ ActionEndData queued")
def stop_gracefully(self) -> None:
logger.info("=" * 80)
logger.info(f"🛑 [WF-LIFECYCLE] stop_gracefully() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
logger.info(f"[WF-LIFECYCLE] Current state before stop_gracefully: {self._state.name}, _running: {self._running}")
logger.info("🛑 [WF-LIFECYCLE] stop_gracefully() CALLED",
extra={
"api_task_id": self.api_task_id,
"workforce_id": id(self)
})
logger.info(f"[WF-LIFECYCLE] Current state before stop_gracefully: "
f"{self._state.name}, _running: {self._running}")
logger.info("=" * 80)
super().stop_gracefully()
logger.info(f"[WF-LIFECYCLE] ✅ super().stop_gracefully() completed, new state: {self._state.name}, _running: {self._running}")
logger.info(
f"[WF-LIFECYCLE] ✅ super().stop_gracefully() completed, "
f"new state: {self._state.name}, _running: {self._running}")
def skip_gracefully(self) -> None:
logger.info("=" * 80)
logger.info(f"⏭️ [WF-LIFECYCLE] skip_gracefully() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
logger.info(f"[WF-LIFECYCLE] Current state before skip_gracefully: {self._state.name}, _running: {self._running}")
logger.info("⏭️ [WF-LIFECYCLE] skip_gracefully() CALLED",
extra={
"api_task_id": self.api_task_id,
"workforce_id": id(self)
})
logger.info(f"[WF-LIFECYCLE] Current state before skip_gracefully: "
f"{self._state.name}, _running: {self._running}")
logger.info("=" * 80)
super().skip_gracefully()
logger.info(f"[WF-LIFECYCLE] ✅ super().skip_gracefully() completed, new state: {self._state.name}, _running: {self._running}")
logger.info(
f"[WF-LIFECYCLE] ✅ super().skip_gracefully() completed, "
f"new state: {self._state.name}, _running: {self._running}")
def pause(self) -> None:
logger.info("=" * 80)
logger.info(f"⏸️ [WF-LIFECYCLE] pause() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
logger.info(f"[WF-LIFECYCLE] Current state before pause: {self._state.name}, _running: {self._running}")
logger.info("⏸️ [WF-LIFECYCLE] pause() CALLED",
extra={
"api_task_id": self.api_task_id,
"workforce_id": id(self)
})
logger.info(f"[WF-LIFECYCLE] Current state before pause: "
f"{self._state.name}, _running: {self._running}")
logger.info("=" * 80)
super().pause()
logger.info(f"[WF-LIFECYCLE] ✅ super().pause() completed, new state: {self._state.name}, _running: {self._running}")
logger.info(
f"[WF-LIFECYCLE] ✅ super().pause() completed, "
f"new state: {self._state.name}, _running: {self._running}")
def resume(self) -> None:
logger.info("=" * 80)
logger.info(f"▶️ [WF-LIFECYCLE] resume() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
logger.info(f"[WF-LIFECYCLE] Current state before resume: {self._state.name}, _running: {self._running}")
logger.info("▶️ [WF-LIFECYCLE] resume() CALLED",
extra={
"api_task_id": self.api_task_id,
"workforce_id": id(self)
})
logger.info(f"[WF-LIFECYCLE] Current state before resume: "
f"{self._state.name}, _running: {self._running}")
logger.info("=" * 80)
super().resume()
logger.info(f"[WF-LIFECYCLE] ✅ super().resume() completed, new state: {self._state.name}, _running: {self._running}")
logger.info(
f"[WF-LIFECYCLE] ✅ super().resume() completed, "
f"new state: {self._state.name}, _running: {self._running}")
async def cleanup(self) -> None:
r"""Clean up resources when workforce is done"""

View file

@ -24,23 +24,27 @@ _project_root = pathlib.Path(__file__).parent.parent
if str(_project_root) not in sys.path:
sys.path.insert(0, str(_project_root))
# 1) Load env and init traceroot BEFORE importing modules that get a logger
from utils import traceroot_wrapper as traceroot
import logging
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Disable verbose CAMEL logs
logging.getLogger("camel").setLevel(logging.WARNING)
logging.getLogger("camel.base_model").setLevel(logging.WARNING)
logging.getLogger("camel.agents").setLevel(logging.WARNING)
logging.getLogger("camel.societies").setLevel(logging.WARNING)
from app import api
# Only initialize traceroot if enabled
if traceroot.is_enabled():
from traceroot.integrations.fastapi import connect_fastapi
connect_fastapi(api)
# 2) Now safe to import modules that use traceroot.get_logger() at import-time
from app.component.environment import env
from app.router import register_routers
os.environ["PYTHONIOENCODING"] = "utf-8"
app_logger = traceroot.get_logger("main")
app_logger = logging.getLogger("main")
# Log application startup
app_logger.info("Starting Eigent Multi-Agent System API")
@ -82,9 +86,19 @@ async def write_pid_file():
app_logger.info(f"PID file written: {os.getpid()}")
# Create task to write PID
pid_task = asyncio.create_task(write_pid_file())
app_logger.info("PID write task created")
# PID task will be created on startup
pid_task = None
@api.on_event("startup")
async def startup_event():
global pid_task
pid_task = asyncio.create_task(write_pid_file())
app_logger.info("PID write task created")
# Initialize telemetry tracer provider
from app.utils.telemetry.workforce_metrics import initialize_tracer_provider
initialize_tracer_provider()
app_logger.info("Telemetry tracer provider initialized")
# Graceful shutdown handler
shutdown_event = asyncio.Event()

View file

@ -17,10 +17,12 @@ dependencies = [
"inflection>=0.5.1",
"aiofiles>=24.1.0",
"openai>=1.99.3,<2",
"traceroot>=0.0.7",
"nodejs-wheel>=22.18.0",
"numpy>=1.23.0,<2.0.0",
"debugpy>=1.8.17",
"opentelemetry-api>=1.34.1",
"opentelemetry-sdk>=1.34.1",
"opentelemetry-exporter-otlp-proto-http>=1.34.1",
]

View file

@ -0,0 +1,170 @@
import os
import tempfile
from pathlib import Path
import pytest
from app.component.environment import env_base_dir, sanitize_env_path
def test_none_input_returns_none():
"""Test that None input returns None."""
assert sanitize_env_path(None) is None
def test_empty_string_returns_none():
"""Test that empty string returns None."""
assert sanitize_env_path("") is None
def test_valid_relative_path():
"""Test that valid relative path within base dir is accepted."""
result = sanitize_env_path("project1.env")
assert result is not None
assert result.startswith(env_base_dir)
assert result.endswith("project1.env")
def test_valid_absolute_path_within_base_dir():
"""Test that absolute path within base directory is accepted."""
valid_path = os.path.join(env_base_dir, "valid.env")
result = sanitize_env_path(valid_path)
assert result == os.path.abspath(valid_path)
def test_path_traversal_attack_rejected():
"""Test that path traversal attempts are rejected."""
malicious_paths = [
"../../../etc/passwd",
"../../.ssh/id_rsa.env",
"../outside.env",
"subdir/../../outside.env",
]
for path in malicious_paths:
result = sanitize_env_path(path)
# Path traversal should either be rejected
# or normalized within base_dir
if result:
assert result.startswith(env_base_dir), \
f"Path traversal not blocked: {path} -> {result}"
def test_absolute_path_outside_base_dir_rejected():
"""Test that absolute paths outside base directory are rejected."""
malicious_paths = [
"/etc/passwd",
"/tmp/evil.env",
"/root/.env",
str(Path.home() / "evil.env"),
]
for path in malicious_paths:
result = sanitize_env_path(path)
assert result is None, \
f"Absolute path outside base dir not rejected: {path}"
def test_non_env_extension_rejected():
"""Test that files without .env extension are rejected."""
invalid_paths = [
"config.txt",
"settings.json",
"environment",
".bashrc",
"script.py",
]
for path in invalid_paths:
result = sanitize_env_path(path)
assert result is None, \
f"Non-.env file not rejected: {path}"
def test_nested_valid_path():
"""Test that valid nested paths within base dir are accepted."""
result = sanitize_env_path("projects/project1/config.env")
assert result is not None
assert result.startswith(env_base_dir)
assert result.endswith("config.env")
def test_symlink_escape_attempt():
"""Test that symlinks cannot be used to escape base directory."""
with tempfile.TemporaryDirectory() as tmpdir:
# Create a symlink pointing outside base_dir
link_path = os.path.join(env_base_dir, "evil_link.env")
target_path = os.path.join(tmpdir, "outside.env")
# This test only makes sense if we can create symlinks
try:
if os.path.exists(link_path):
os.unlink(link_path)
os.symlink(target_path, link_path)
# The sanitized path should resolve the symlink
result = sanitize_env_path("evil_link.env")
# Either rejected or stays within base_dir after resolution
if result:
resolved = Path(result).resolve()
base_resolved = Path(env_base_dir).resolve()
# Check if resolved path is under base directory
try:
resolved.relative_to(base_resolved)
# If this succeeds, symlink stayed in base (acceptable)
except ValueError:
# Symlink escaped - should have been rejected
pytest.fail(f"Symlink escape not prevented: {result}")
except (OSError, NotImplementedError):
# Symlinks not supported on this system, skip test
pytest.skip("Symlinks not supported")
finally:
# Cleanup
if os.path.exists(link_path):
os.unlink(link_path)
def test_dot_env_in_filename():
"""Test that .env can appear in the middle of filename."""
result = sanitize_env_path("project.env.backup.env")
assert result is not None
assert result.endswith(".env")
def test_case_sensitivity():
"""Test handling of different case extensions."""
# Only .env (lowercase) should be accepted
assert sanitize_env_path("config.ENV") is None
assert sanitize_env_path("config.Env") is None
assert sanitize_env_path("config.env") is not None
def test_special_characters_in_path():
"""Test handling of special characters in valid paths."""
# These should be accepted if they end with .env and stay in base_dir
valid_special_chars = [
"my-project.env",
"project_name.env",
"project.2024.env",
]
for path in valid_special_chars:
result = sanitize_env_path(path)
assert result is not None, (f"Valid path with special "
f"chars rejected: {path}")
assert result.startswith(env_base_dir)
def test_whitespace_handling():
"""Test handling of whitespace in paths."""
# Whitespace should be preserved in valid paths
result = sanitize_env_path("my project.env")
assert result is not None
assert "my project.env" in result
def test_current_directory_traversal():
"""Test that ./ and current directory references are handled."""
result = sanitize_env_path("./project.env")
assert result is not None
assert result.startswith(env_base_dir)
result = sanitize_env_path("././project.env")
assert result is not None
assert result.startswith(env_base_dir)

View file

@ -0,0 +1,297 @@
"""Tests for workforce metrics telemetry."""
from datetime import datetime
from unittest.mock import MagicMock, Mock, patch
import app.utils.telemetry.workforce_metrics as wm_module
import pytest
from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback
from camel.societies.workforce.events import (LogEvent, TaskAssignedEvent,
TaskCompletedEvent,
TaskCreatedEvent,
TaskFailedEvent,
TaskStartedEvent,
WorkerCreatedEvent)
@pytest.fixture(autouse=True)
def reset_global_tracer_provider():
"""Reset global tracer provider between tests for isolation."""
yield
# Reset global after each test
wm_module._GLOBAL_TRACER_PROVIDER = None
@pytest.fixture
def mock_env_vars():
"""Mock environment variables for Langfuse."""
envs = {
"LANGFUSE_PUBLIC_KEY": "test_public_key",
"LANGFUSE_SECRET_KEY": "test_secret_key",
"LANGFUSE_BASE_URL": "https://test.langfuse.com",
}
with patch.dict(
"os.environ",
envs,
):
yield
@pytest.fixture
def metrics_callback(mock_env_vars):
"""Create a WorkforceMetricsCallback instance for testing."""
with patch("app.utils.telemetry.workforce_metrics.OTLPSpanExporter"):
# Initialize the tracer provider first
wm_module.initialize_tracer_provider()
callback = WorkforceMetricsCallback(project_id="test_project",
task_id="test_task")
# Mock the tracer and spans
callback.tracer = Mock()
callback.root_span = Mock()
yield callback
def test_log_worker_created(metrics_callback):
"""Test log_worker_created function."""
event = WorkerCreatedEvent(worker_id="worker_1",
worker_type="test_worker",
role="test_role")
mock_span = Mock()
metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
__enter__=Mock(return_value=mock_span), __exit__=Mock()))
metrics_callback.log_worker_created(event,
agent_class="TestAgent",
model_type="gpt-4")
# Verify span attributes were set
assert mock_span.set_attribute.called
assert mock_span.set_status.called
def test_log_task_created(metrics_callback):
"""Test log_task_created function."""
event = TaskCreatedEvent(
task_id="task_1",
description="Test task",
parent_task_id="parent_1",
task_type="test_type",
)
mock_span = Mock()
metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
__enter__=Mock(return_value=mock_span), __exit__=Mock()))
metrics_callback.log_task_created(event)
# Verify span attributes were set
assert mock_span.set_attribute.called
assert mock_span.set_status.called
def test_log_task_assigned(metrics_callback):
"""Test log_task_assigned function."""
event = TaskAssignedEvent(
task_id="task_1",
worker_id="worker_1",
queue_time_seconds=1.5,
dependencies=["dep_1", "dep_2"],
)
mock_span = Mock()
metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
__enter__=Mock(return_value=mock_span), __exit__=Mock()))
metrics_callback.log_task_assigned(event)
# Verify span attributes were set
assert mock_span.set_attribute.called
assert mock_span.set_status.called
def test_log_task_started(metrics_callback):
"""Test log_task_started function."""
event = TaskStartedEvent(task_id="task_1", worker_id="worker_1")
mock_span = Mock()
metrics_callback.tracer.start_span = Mock(return_value=mock_span)
metrics_callback.log_task_started(event)
# Verify span was created and stored
assert "task_1" in metrics_callback.task_spans
assert mock_span.set_attribute.called
def test_log_task_completed(metrics_callback):
"""Test log_task_completed function."""
# Setup: start a task first
task_id = "task_1"
mock_span = Mock()
metrics_callback.task_spans[task_id] = mock_span
event = TaskCompletedEvent(
task_id=task_id,
worker_id="worker_1",
parent_task_id="parent_1",
processing_time_seconds=2.5,
timestamp=datetime.now(),
token_usage={
"input_tokens": 100,
"output_tokens": 50
},
)
metrics_callback.log_task_completed(event)
# Verify span was ended and attributes were set
assert task_id not in metrics_callback.task_spans
assert mock_span.set_attribute.called
assert mock_span.set_status.called
assert mock_span.end.called
def test_log_task_failed(metrics_callback):
"""Test log_task_failed function."""
# Setup: start a task first
task_id = "task_1"
mock_span = Mock()
metrics_callback.task_spans[task_id] = mock_span
event = TaskFailedEvent(
task_id=task_id,
worker_id="worker_1",
parent_task_id="parent_1",
error_message="Test error",
)
metrics_callback.log_task_failed(event)
# Verify span was ended with error status
assert task_id not in metrics_callback.task_spans
assert mock_span.set_attribute.called
assert mock_span.set_status.called
assert mock_span.end.called
def test_log_message_error(metrics_callback):
"""Test log_message function with error level."""
event = LogEvent(level="error",
message="Test error message",
metadata={"key": "value"})
mock_span = Mock()
metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
__enter__=Mock(return_value=mock_span), __exit__=Mock()))
metrics_callback.log_message(event)
# Verify span was created for error message
assert mock_span.set_attribute.called
def test_log_message_quality_score(metrics_callback):
"""Test log_message function parsing quality score."""
event = LogEvent(
level="info",
message="Task task_1 completed successfully (quality score: 85).",
)
metrics_callback.log_message(event)
# Verify quality score was parsed and stored
assert "task_1" in metrics_callback.task_quality_scores
assert metrics_callback.task_quality_scores["task_1"] == 85
def test_log_all_tasks_completed(metrics_callback):
"""Test log_all_tasks_completed function."""
event = MagicMock()
event.timestamp = datetime.now()
event.total_tasks = 5
mock_span = Mock()
metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
__enter__=Mock(return_value=mock_span), __exit__=Mock()))
metrics_callback.log_all_tasks_completed(event)
# Verify span was created and root span was ended
assert mock_span.set_attribute.called
assert mock_span.set_status.called
assert metrics_callback.root_span.end.called
def test_batch_span_processor_configuration_prevents_oom(mock_env_vars):
"""Test BatchSpanProcessor config with limits to prevent OOM."""
with patch("app.utils.telemetry.workforce_metrics.OTLPSpanExporter"
) as mock_exporter_class, patch(
"app.utils.telemetry.workforce_metrics.BatchSpanProcessor"
) as mock_processor_class:
# Initialize tracer provider
wm_module.initialize_tracer_provider()
# Verify BatchSpanProcessor was called with OOM prevention config
mock_processor_class.assert_called_once()
call_args = mock_processor_class.call_args
# Verify the exporter was passed
assert call_args[0][0] == mock_exporter_class.return_value
# Verify kwargs have proper configuration
assert call_args[1]["max_queue_size"] == 4096
assert call_args[1]["export_timeout_millis"] == 30000
assert call_args[1]["schedule_delay_millis"] == 3000
assert call_args[1]["max_export_batch_size"] == 1024
def test_missing_langfuse_env_vars_disables_tracing():
"""Test that missing Langfuse env vars disables tracing."""
with patch.dict("os.environ", {}, clear=True), patch(
"app.utils.telemetry.workforce_metrics.OTLPSpanExporter"
) as mock_exporter_class, patch(
"app.utils.telemetry.workforce_metrics.BatchSpanProcessor"
) as mock_processor_class:
# Initialize tracer provider without credentials
wm_module.initialize_tracer_provider()
# Create callback without Langfuse credentials
callback = WorkforceMetricsCallback(project_id="test_project",
task_id="test_task")
# Verify tracing is disabled
assert callback.enabled is False
# Verify no exporter or processor was created
mock_exporter_class.assert_not_called()
mock_processor_class.assert_not_called()
# Verify log methods do nothing when disabled
event = WorkerCreatedEvent(worker_id="worker_1",
worker_type="test_worker",
role="test_role")
callback.log_worker_created(event) # Should not raise errors
def test_multiple_callbacks_share_tracer_provider(mock_env_vars):
"""Test that multiple callbacks share the same TracerProvider."""
with patch("app.utils.telemetry.workforce_metrics.BatchSpanProcessor"
) as mock_processor_class:
# Initialize tracer provider once
wm_module.initialize_tracer_provider()
# Create first callback
callback1 = WorkforceMetricsCallback(project_id="project1",
task_id="task1")
# Create second callback
callback2 = WorkforceMetricsCallback(project_id="project2",
task_id="task2")
# Verify BatchSpanProcessor was only called once (singleton)
assert mock_processor_class.call_count == 1
# Both callbacks should be enabled
assert callback1.enabled is True
assert callback2.enabled is True

View file

@ -383,15 +383,11 @@ class TestWorkforce:
description="Test workforce"
)
with patch('app.service.task.delete_task_lock', side_effect=Exception("Delete failed")), \
patch('traceroot.get_logger') as mock_get_logger:
with patch('app.service.task.delete_task_lock', side_effect=Exception("Delete failed")):
# Should not raise exception
await workforce.cleanup()
# Should log the error
mock_get_logger.assert_called_once()
@pytest.mark.integration
class TestWorkforceIntegration:
@ -636,15 +632,10 @@ class TestWorkforceErrorCases:
description="Nonexistent lock test workforce"
)
with patch('app.service.task.delete_task_lock', side_effect=Exception("Task lock not found")), \
patch('traceroot.get_logger') as mock_get_logger:
with patch('app.service.task.delete_task_lock', side_effect=Exception("Task lock not found")):
# Should handle missing task lock gracefully
await workforce.cleanup()
# Should log the error
mock_get_logger.assert_called_once()
def test_workforce_inheritance(self):
"""Test that Workforce properly inherits from BaseWorkforce."""
from camel.societies.workforce.workforce import Workforce as BaseWorkforce

1767
backend/uv.lock generated

File diff suppressed because it is too large Load diff

View file

@ -14,9 +14,9 @@
from sqlmodel import Session, create_engine
from app.component.environment import env, env_or_fail
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("database")
logger = logging.getLogger("database")
logger.info("Initializing database engine", extra={
"database_url_prefix": env_or_fail("database_url")[:20] + "...",

View file

@ -19,9 +19,9 @@ from fastapi import APIRouter, FastAPI
from dotenv import load_dotenv
import importlib
from typing import Any, overload
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("environment")
logger = logging.getLogger("environment")
logger.info("Loading environment variables from .env file")
load_dotenv()

View file

@ -21,17 +21,16 @@ from fastapi_babel import _
from sqlmodel import Session, select, desc, case
from app.component.auth import Auth, auth_must
from app.component.database import session
from utils import traceroot_wrapper as traceroot
import logging
from typing import Optional, Dict, List
from collections import defaultdict
logger = traceroot.get_logger("server_chat_history")
logger = logging.getLogger("server_chat_history")
router = APIRouter(prefix="/chat", tags=["Chat History"])
@router.post("/history", name="save chat history", response_model=ChatHistoryOut)
@traceroot.trace()
def create_chat_history(data: ChatHistoryIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Save new chat history."""
user_id = auth.user.id
@ -51,7 +50,6 @@ def create_chat_history(data: ChatHistoryIn, session: Session = Depends(session)
@router.get("/histories", name="get chat history")
@traceroot.trace()
def list_chat_history(session: Session = Depends(session), auth: Auth = Depends(auth_must)) -> Page[ChatHistoryOut]:
"""List chat histories for current user."""
user_id = auth.user.id
@ -75,7 +73,6 @@ def list_chat_history(session: Session = Depends(session), auth: Auth = Depends(
@router.get("/histories/grouped", name="get grouped chat history")
@traceroot.trace()
def list_grouped_chat_history(
include_tasks: Optional[bool] = Query(True, description="Whether to include individual tasks in groups"),
session: Session = Depends(session),
@ -177,7 +174,6 @@ def list_grouped_chat_history(
@router.delete("/history/{history_id}", name="delete chat history")
@traceroot.trace()
def delete_chat_history(history_id: str, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Delete chat history."""
user_id = auth.user.id
@ -203,7 +199,6 @@ def delete_chat_history(history_id: str, session: Session = Depends(session), au
@router.put("/history/{history_id}", name="update chat history", response_model=ChatHistoryOut)
@traceroot.trace()
def update_chat_history(
history_id: int, data: ChatHistoryUpdate, session: Session = Depends(session), auth: Auth = Depends(auth_must)
):
@ -232,7 +227,6 @@ def update_chat_history(
@router.put("/project/{project_id}/name", name="update project name")
@traceroot.trace()
def update_project_name(
project_id: str,
new_name: str,

View file

@ -22,15 +22,14 @@ from starlette.responses import StreamingResponse
from app.model.chat.chat_share import ChatHistoryShareOut, ChatShare, ChatShareIn
from app.model.chat.chat_step import ChatStep
from app.model.chat.chat_history import ChatHistory
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_chat_share")
logger = logging.getLogger("server_chat_share")
router = APIRouter(prefix="/chat", tags=["Chat Share"])
@router.get("/share/info/{token}", name="Get shared chat info", response_model=ChatHistoryShareOut)
@traceroot.trace()
def get_share_info(token: str, session: Session = Depends(session)):
"""
Get shared chat history info by token, excluding sensitive data.
@ -56,7 +55,6 @@ def get_share_info(token: str, session: Session = Depends(session)):
@router.get("/share/playback/{token}", name="Playback shared chat via SSE")
@traceroot.trace()
async def share_playback(token: str, session: Session = Depends(session), delay_time: float = 0):
"""
Playbacks the chat history via a sharing token (SSE).
@ -109,7 +107,6 @@ async def share_playback(token: str, session: Session = Depends(session), delay_
@router.post("/share", name="Generate sharable link for a task(1 day expiration)")
@traceroot.trace()
def create_share_link(data: ChatShareIn):
"""Generate sharing token with 1-day expiration for task."""
try:

View file

@ -19,15 +19,14 @@ from sqlmodel import Session, select
from app.component.database import session
from app.component.auth import Auth, auth_must
from fastapi_babel import _
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_chat_snapshot")
logger = logging.getLogger("server_chat_snapshot")
router = APIRouter(prefix="/chat", tags=["Chat Snapshot Management"])
@router.get("/snapshots", name="list chat snapshots", response_model=List[ChatSnapshot])
@traceroot.trace()
async def list_chat_snapshots(
api_task_id: Optional[str] = None,
camel_task_id: Optional[str] = None,
@ -49,7 +48,6 @@ async def list_chat_snapshots(
@router.get("/snapshots/{snapshot_id}", name="get chat snapshot", response_model=ChatSnapshot)
@traceroot.trace()
async def get_chat_snapshot(snapshot_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Get specific chat snapshot."""
user_id = auth.user.id
@ -64,7 +62,6 @@ async def get_chat_snapshot(snapshot_id: int, session: Session = Depends(session
@router.post("/snapshots", name="create chat snapshot", response_model=ChatSnapshot)
@traceroot.trace()
async def create_chat_snapshot(
snapshot: ChatSnapshotIn, auth: Auth = Depends(auth_must), session: Session = Depends(session)
):
@ -92,7 +89,6 @@ async def create_chat_snapshot(
@router.put("/snapshots/{snapshot_id}", name="update chat snapshot", response_model=ChatSnapshot)
@traceroot.trace()
async def update_chat_snapshot(
snapshot_id: int,
snapshot_update: ChatSnapshot,
@ -127,7 +123,6 @@ async def update_chat_snapshot(
@router.delete("/snapshots/{snapshot_id}", name="delete chat snapshot")
@traceroot.trace()
async def delete_chat_snapshot(snapshot_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Delete chat snapshot."""
user_id = auth.user.id

View file

@ -23,15 +23,14 @@ from app.component.database import session
from app.component.auth import Auth, auth_must
from fastapi_babel import _
from app.model.chat.chat_step import ChatStep, ChatStepOut, ChatStepIn
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_chat_step")
logger = logging.getLogger("server_chat_step")
router = APIRouter(prefix="/chat", tags=["Chat Step Management"])
@router.get("/steps", name="list chat steps", response_model=List[ChatStepOut])
@traceroot.trace()
async def list_chat_steps(
task_id: str, step: Optional[str] = None, session: Session = Depends(session), auth: Auth = Depends(auth_must)
):
@ -49,7 +48,6 @@ async def list_chat_steps(
@router.get("/steps/playback/{task_id}", name="Playback Chat Step via SSE")
@traceroot.trace()
async def share_playback(
task_id: str, delay_time: float = 0, session: Session = Depends(session), auth: Auth = Depends(auth_must)
):
@ -97,7 +95,6 @@ async def share_playback(
@router.get("/steps/{step_id}", name="get chat step", response_model=ChatStepOut)
@traceroot.trace()
async def get_chat_step(step_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Get specific chat step."""
user_id = auth.user.id
@ -112,7 +109,6 @@ async def get_chat_step(step_id: int, session: Session = Depends(session), auth:
@router.post("/steps", name="create chat step")
@traceroot.trace()
async def create_chat_step(step: ChatStepIn, session: Session = Depends(session)):
"""Create new chat step. TODO: Implement request source validation."""
try:
@ -134,7 +130,6 @@ async def create_chat_step(step: ChatStepIn, session: Session = Depends(session)
@router.put("/steps/{step_id}", name="update chat step", response_model=ChatStepOut)
@traceroot.trace()
async def update_chat_step(
step_id: int, chat_step_update: ChatStep, session: Session = Depends(session), auth: Auth = Depends(auth_must)
):
@ -162,7 +157,6 @@ async def update_chat_step(
@router.delete("/steps/{step_id}", name="delete chat step")
@traceroot.trace()
async def delete_chat_step(step_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Delete chat step."""
user_id = auth.user.id

View file

@ -19,15 +19,14 @@ from app.component.database import session
from app.component.auth import Auth, auth_must
from fastapi_babel import _
from app.model.config.config import Config, ConfigCreate, ConfigUpdate, ConfigInfo, ConfigOut
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_config_controller")
logger = logging.getLogger("server_config_controller")
router = APIRouter(tags=["Config Management"])
@router.get("/configs", name="list configs", response_model=list[ConfigOut])
@traceroot.trace()
async def list_configs(
config_group: Optional[str] = None, session: Session = Depends(session), auth: Auth = Depends(auth_must)
):
@ -44,7 +43,6 @@ async def list_configs(
@router.get("/configs/{config_id}", name="get config", response_model=ConfigOut)
@traceroot.trace()
async def get_config(
config_id: int,
session: Session = Depends(session),
@ -66,7 +64,6 @@ async def get_config(
@router.post("/configs", name="create config", response_model=ConfigOut)
@traceroot.trace()
async def create_config(config: ConfigCreate, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Create new configuration."""
user_id = auth.user.id
@ -103,7 +100,6 @@ async def create_config(config: ConfigCreate, session: Session = Depends(session
@router.put("/configs/{config_id}", name="update config", response_model=ConfigOut)
@traceroot.trace()
async def update_config(
config_id: int, config_update: ConfigUpdate, session: Session = Depends(session), auth: Auth = Depends(auth_must)
):
@ -149,7 +145,6 @@ async def update_config(
@router.delete("/configs/{config_id}", name="delete config")
@traceroot.trace()
async def delete_config(config_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Delete configuration."""
user_id = auth.user.id
@ -171,7 +166,6 @@ async def delete_config(config_id: int, session: Session = Depends(session), aut
@router.get("/config/info", name="get config info")
@traceroot.trace()
async def get_config_info(
show_all: bool = Query(False, description="Show all config info, including those with empty env_vars"),
):

View file

@ -27,9 +27,9 @@ from app.model.mcp.mcp_env import McpEnv, Status as McpEnvStatus
from app.model.mcp.mcp_user import McpImportType, McpUser, Status
from camel.toolkits.mcp_toolkit import MCPToolkit
from app.component.environment import env
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_mcp_controller")
logger = logging.getLogger("server_mcp_controller")
from app.component.validator.McpServer import (
McpRemoteServer,
@ -81,7 +81,6 @@ async def pre_instantiate_mcp_toolkit(config_dict: dict) -> bool:
@router.get("/mcps", name="mcp list")
@traceroot.trace()
async def gets(
keyword: str | None = None,
category_id: int | None = None,
@ -121,7 +120,6 @@ async def gets(
@router.get("/mcp", name="mcp detail", response_model=McpOut)
@traceroot.trace()
async def get(id: int, session: Session = Depends(session)):
"""Get MCP server details."""
try:
@ -135,7 +133,6 @@ async def get(id: int, session: Session = Depends(session)):
@router.post("/mcp/install", name="mcp install")
@traceroot.trace()
async def install(mcp_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Install MCP server for user."""
user_id = auth.user.id
@ -191,7 +188,6 @@ async def install(mcp_id: int, session: Session = Depends(session), auth: Auth =
@router.post("/mcp/import/{mcp_type}", name="mcp import")
@traceroot.trace()
async def import_mcp(
mcp_type: McpImportType, mcp_data: dict, session: Session = Depends(session), auth: Auth = Depends(auth_must)
):

View file

@ -19,9 +19,9 @@ from app.component.environment import env_not_empty
from app.model.mcp.proxy import ExaSearch
from typing import Any, cast
import requests
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_proxy_controller")
logger = logging.getLogger("server_proxy_controller")
from app.model.user.key import Key
@ -30,7 +30,6 @@ router = APIRouter(prefix="/proxy", tags=["Mcp Servers"])
@router.post("/exa")
@traceroot.trace()
def exa_search(search: ExaSearch, key: Key = Depends(key_must)):
"""Search using Exa API."""
EXA_API_KEY = env_not_empty("EXA_API_KEY")
@ -100,7 +99,6 @@ def exa_search(search: ExaSearch, key: Key = Depends(key_must)):
@router.get("/google")
@traceroot.trace()
def google_search(query: str, search_type: str = "web", key: Key = Depends(key_must)):
"""Search using Google Custom Search API."""
# https://developers.google.com/custom-search/v1/overview

View file

@ -23,9 +23,9 @@ from app.model.mcp.mcp_user import McpUser, McpUserIn, McpUserOut, McpUserUpdate
from app.model.mcp.mcp import Mcp
from camel.toolkits.mcp_toolkit import MCPToolkit
from app.component.environment import env
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_mcp_user_controller")
logger = logging.getLogger("server_mcp_user_controller")
router = APIRouter(tags=["McpUser Management"])
@ -70,7 +70,6 @@ async def pre_instantiate_mcp_toolkit(config_dict: dict) -> bool:
@router.get("/mcp/users", name="list mcp users", response_model=List[McpUserOut])
@traceroot.trace()
async def list_mcp_users(
mcp_id: Optional[int] = None,
session: Session = Depends(session),
@ -89,7 +88,6 @@ async def list_mcp_users(
@router.get("/mcp/users/{mcp_user_id}", name="get mcp user", response_model=McpUserOut)
@traceroot.trace()
async def get_mcp_user(mcp_user_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Get MCP user details."""
query = select(McpUser).where(McpUser.id == mcp_user_id)
@ -102,7 +100,6 @@ async def get_mcp_user(mcp_user_id: int, session: Session = Depends(session), au
@router.post("/mcp/users", name="create mcp user", response_model=McpUserOut)
@traceroot.trace()
async def create_mcp_user(mcp_user: McpUserIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Create MCP user installation."""
user_id = auth.user.id
@ -145,7 +142,6 @@ async def create_mcp_user(mcp_user: McpUserIn, session: Session = Depends(sessio
@router.put("/mcp/users/{id}", name="update mcp user")
@traceroot.trace()
async def update_mcp_user(
id: int,
update_item: McpUserUpdate,
@ -175,7 +171,6 @@ async def update_mcp_user(
@router.delete("/mcp/users/{mcp_user_id}", name="delete mcp user")
@traceroot.trace()
async def delete_mcp_user(mcp_user_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Delete MCP user installation."""
user_id = auth.user.id

View file

@ -17,15 +17,14 @@ from fastapi.responses import RedirectResponse, JSONResponse, HTMLResponse
from app.component.environment import env
from app.component.oauth_adapter import OauthCallbackPayload, get_oauth_adapter
from typing import Optional
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_oauth_controller")
logger = logging.getLogger("server_oauth_controller")
router = APIRouter(prefix="/oauth", tags=["Oauth Servers"])
@router.get("/{app}/login", name="OAuth Login Redirect")
@traceroot.trace()
def oauth_login(app: str, request: Request, state: Optional[str] = None):
"""Redirect user to OAuth provider's authorization endpoint."""
try:
@ -50,7 +49,6 @@ def oauth_login(app: str, request: Request, state: Optional[str] = None):
@router.get("/{app}/callback", name="OAuth Callback")
@traceroot.trace()
def oauth_callback(app: str, request: Request, code: Optional[str] = None, state: Optional[str] = None):
"""Handle OAuth provider callback and redirect to client app."""
if not code:
@ -78,7 +76,6 @@ def oauth_callback(app: str, request: Request, code: Optional[str] = None, state
@router.post("/{app}/token", name="OAuth Fetch Token")
@traceroot.trace()
def fetch_token(app: str, request: Request, data: OauthCallbackPayload):
"""Exchange authorization code for access token."""
try:

View file

@ -24,15 +24,14 @@ from sqlalchemy.exc import SQLAlchemyError
from app.component.database import session
from app.component.auth import Auth, auth_must
from app.model.provider.provider import Provider, ProviderIn, ProviderOut, ProviderPreferIn
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_provider_controller")
logger = logging.getLogger("server_provider_controller")
router = APIRouter(tags=["Provider Management"])
@router.get("/providers", name="list providers", response_model=Page[ProviderOut])
@traceroot.trace()
async def gets(
keyword: str | None = None,
prefer: Optional[bool] = Query(None, description="Filter by prefer status"),
@ -52,7 +51,6 @@ async def gets(
@router.get("/provider", name="get provider detail", response_model=ProviderOut)
@traceroot.trace()
async def get(id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Get provider details."""
user_id = auth.user.id
@ -66,7 +64,6 @@ async def get(id: int, session: Session = Depends(session), auth: Auth = Depends
@router.post("/provider", name="create provider", response_model=ProviderOut)
@traceroot.trace()
async def post(data: ProviderIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Create a new provider."""
user_id = auth.user.id
@ -81,7 +78,6 @@ async def post(data: ProviderIn, session: Session = Depends(session), auth: Auth
@router.put("/provider/{id}", name="update provider", response_model=ProviderOut)
@traceroot.trace()
async def put(id: int, data: ProviderIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Update provider details."""
user_id = auth.user.id
@ -109,7 +105,6 @@ async def put(id: int, data: ProviderIn, session: Session = Depends(session), au
@router.delete("/provider/{id}", name="delete provider")
@traceroot.trace()
async def delete(id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Delete a provider."""
user_id = auth.user.id
@ -130,7 +125,6 @@ async def delete(id: int, session: Session = Depends(session), auth: Auth = Depe
@router.post("/provider/prefer", name="set provider prefer")
@traceroot.trace()
async def set_prefer(data: ProviderPreferIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Set preferred provider for user."""
user_id = auth.user.id

View file

@ -29,16 +29,15 @@ from app.model.user.user import (
RegisterIn,
)
from app.component.environment import env
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_login_controller")
logger = logging.getLogger("server_login_controller")
router = APIRouter(tags=["Login/Registration"])
@router.post("/login", name="login by email or password")
@traceroot.trace()
async def by_password(
data: LoginByPasswordIn, session: Session = Depends(session)
) -> LoginResponse:
@ -63,7 +62,6 @@ async def by_password(
@router.post("/dev_login", name="OAuth2 password flow login (for Swagger UI)")
@traceroot.trace()
async def dev_login(
username: str = Form(...), # OAuth2 uses 'username' but we accept email
password: str = Form(...),
@ -96,7 +94,6 @@ async def dev_login(
@router.post("/login-by_stack", name="login by stack")
@traceroot.trace()
async def by_stack_auth(
token: str,
type: str = "signup",
@ -169,7 +166,6 @@ async def by_stack_auth(
@router.post("/register", name="register by email/password")
@traceroot.trace()
async def register(data: RegisterIn, session: Session = Depends(session)):
email = data.email

View file

@ -25,15 +25,14 @@ from app.model.mcp.mcp_user import McpUser
from app.model.config.config import Config
from app.model.chat.chat_snpshot import ChatSnapshot
from app.model.user.user_credits_record import UserCreditsRecord
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_user_controller")
logger = logging.getLogger("server_user_controller")
router = APIRouter(tags=["User"])
@router.get("/user", name="user info", response_model=UserOut)
@traceroot.trace()
def get(auth: Auth = Depends(auth_must), session: Session = Depends(session)):
"""Get current user information and refresh credits."""
user: User = auth.user
@ -43,7 +42,6 @@ def get(auth: Auth = Depends(auth_must), session: Session = Depends(session)):
@router.put("/user", name="update user info", response_model=UserOut)
@traceroot.trace()
def put(data: UserIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Update user basic information."""
model = auth.user
@ -54,7 +52,6 @@ def put(data: UserIn, session: Session = Depends(session), auth: Auth = Depends(
@router.put("/user/profile", name="update user profile", response_model=UserProfile)
@traceroot.trace()
def put_profile(data: UserProfile, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Update user profile details."""
model = auth.user
@ -67,7 +64,6 @@ def put_profile(data: UserProfile, session: Session = Depends(session), auth: Au
@router.get("/user/privacy", name="get user privacy")
@traceroot.trace()
def get_privacy(session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Get user privacy settings."""
user_id = auth.user.id
@ -83,7 +79,6 @@ def get_privacy(session: Session = Depends(session), auth: Auth = Depends(auth_m
@router.put("/user/privacy", name="update user privacy")
@traceroot.trace()
def put_privacy(data: UserPrivacySettings, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
"""Update user privacy settings."""
user_id = auth.user.id
@ -104,7 +99,6 @@ def put_privacy(data: UserPrivacySettings, session: Session = Depends(session),
@router.get("/user/current_credits", name="get user current credits")
@traceroot.trace()
def get_user_credits(auth: Auth = Depends(auth_must), session: Session = Depends(session)):
"""Get user's current credit balance."""
user = auth.user
@ -121,7 +115,6 @@ def get_user_credits(auth: Auth = Depends(auth_must), session: Session = Depends
@router.get("/user/stat", name="get user stat", response_model=UserStatOut)
@traceroot.trace()
def get_user_stat(auth: Auth = Depends(auth_must), session: Session = Depends(session)):
"""Get current user's operation statistics."""
user_id = auth.user.id
@ -152,7 +145,6 @@ def get_user_stat(auth: Auth = Depends(auth_must), session: Session = Depends(se
@router.post("/user/stat", name="record user stat")
@traceroot.trace()
def record_user_stat(
data: UserStatActionIn,
auth: Auth = Depends(auth_must),

View file

@ -22,15 +22,14 @@ from app.component.encrypt import password_hash, password_verify
from app.exception.exception import UserException
from app.model.user.user import UpdatePassword, UserOut
from fastapi_babel import _
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("server_password_controller")
logger = logging.getLogger("server_password_controller")
router = APIRouter(tags=["User"])
@router.put("/user/update-password", name="update password", response_model=UserOut)
@traceroot.trace()
def update_password(data: UpdatePassword, auth: Auth = Depends(auth_must), session: Session = Depends(session)):
"""Update user password after verifying current password."""
user_id = auth.user.id

View file

@ -24,9 +24,9 @@ from fastapi_babel import _
from app.exception.exception import UserException
from app.component.database import engine
from convert_case import snake_case
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("abstract_model")
logger = logging.getLogger("abstract_model")
class AbstractModel(SQLModel):

View file

@ -15,7 +15,7 @@
from enum import IntEnum
from typing import Optional
from pydantic import BaseModel
from sqlalchemy import Boolean, Column, SmallInteger, String
from sqlalchemy import Boolean, Column, SmallInteger
from sqlalchemy.orm import Mapped
from sqlmodel import Field, JSON
from sqlalchemy_utils import ChoiceType

View file

@ -22,9 +22,9 @@ from app.model.abstract.model import AbstractModel, DefaultTimes
from datetime import date, datetime, timedelta
from app.model.user.key import ModelType
from app.component.database import session_make
from utils import traceroot_wrapper as traceroot
import logging
logger = traceroot.get_logger("user_credits_record")
logger = logging.getLogger("user_credits_record")
class CreditsChannel(IntEnum):

View file

@ -21,7 +21,7 @@ _project_root = pathlib.Path(__file__).parent.parent
if str(_project_root) not in sys.path:
sys.path.insert(0, str(_project_root))
from utils import traceroot_wrapper as traceroot
import logging
from app import api
from app.component.environment import auto_include_routers, env
from fastapi.staticfiles import StaticFiles
@ -32,13 +32,7 @@ import app.middleware # noqa: F401
# Import exception handlers to register them
import app.exception.handler # noqa: F401
# Only initialize traceroot if enabled
if traceroot.is_enabled():
from traceroot.integrations.fastapi import connect_fastapi
connect_fastapi(api)
logger = traceroot.get_logger("server_main")
logger = logging.getLogger("server_main")
prefix = env("url_prefix", "")
auto_include_routers(api, prefix, "app/controller")

View file

@ -34,7 +34,6 @@ dependencies = [
"cryptography>=45.0.4",
"sqids>=0.5.2",
"exa-py>=1.14.16",
"traceroot>=0.0.7",
]
[tool.ruff]

1172
server/uv.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,17 +0,0 @@
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
from . import traceroot_wrapper
__all__ = ['traceroot_wrapper']

View file

@ -1,99 +0,0 @@
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
from pathlib import Path
from typing import Callable
import logging
from dotenv import load_dotenv
# Try to import traceroot, but handle gracefully if not available
try:
import traceroot
TRACEROOT_AVAILABLE = True
except ImportError:
TRACEROOT_AVAILABLE = False
traceroot = None
# Auto-detect module name based on caller's path
def _get_module_name():
"""Automatically detect if this is being called from backend or server."""
import inspect
frame = inspect.currentframe()
try:
# Go up the stack to find the caller
caller_frame = frame.f_back.f_back if frame and frame.f_back else None
if caller_frame:
caller_file = caller_frame.f_globals.get('__file__', '')
if 'backend' in caller_file:
return 'backend'
elif 'server' in caller_file:
return 'server'
finally:
del frame
return 'unknown'
env_path = Path(__file__).resolve().parents[1] / '.env'
load_dotenv(env_path)
if TRACEROOT_AVAILABLE and traceroot.init():
from traceroot.logger import get_logger as _get_traceroot_logger
trace = traceroot.trace
def get_logger(name: str = __name__):
"""Get TraceRoot logger instance."""
return _get_traceroot_logger(name)
def is_enabled() -> bool:
"""Check if TraceRoot is enabled."""
return True
# Log successful initialization
module_name = _get_module_name()
_init_logger = _get_traceroot_logger("traceroot_wrapper")
_init_logger.info("TraceRoot initialized successfully", extra={"backend": "traceroot", "service_module": module_name})
else:
# No-op implementations when TraceRoot is not configured
def trace(*args, **kwargs):
"""No-op trace decorator."""
def decorator(func: Callable) -> Callable:
return func
return decorator
def get_logger(name: str = __name__):
"""Get standard Python logger when TraceRoot is disabled."""
logger = logging.getLogger(name)
if not logger.handlers:
# Configure basic logging if no handlers exist
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
return logger
def is_enabled() -> bool:
"""Check if TraceRoot is enabled."""
return False
# Log fallback mode
_fallback_logger = logging.getLogger("traceroot_wrapper")
if TRACEROOT_AVAILABLE:
_fallback_logger.warning("TraceRoot available but not initialized - using Python logging as fallback")
else:
_fallback_logger.warning("TraceRoot not available - using Python logging as fallback")
__all__ = ['trace', 'get_logger', 'is_enabled']