Add langfuse and update logger (#952)

Co-authored-by: bytecraftii <bytecraftii@users.noreply.github.com> Co-authored-by: Wendong-Fan <w3ndong.fan@gmail.com>
2026-05-19 07:59:39 +00:00 · 2026-01-24 16:13:07 -08:00 · 2026-01-24 16:13:07 -08:00 · 49e148a2f9
commit 49e148a2f9
parent cf571b73a4
63 changed files with 3095 additions and 2430 deletions
--- a/.env.development
+++ b/.env.development
@ -1,30 +1,7 @@
 VITE_BASE_URL=/api

 VITE_PROXY_URL=https://dev.eigent.ai
-
 VITE_USE_LOCAL_PROXY=false

 # VITE_PROXY_URL=http://localhost:3001
 # VITE_USE_LOCAL_PROXY=true
-
-TRACEROOT_TOKEN=your_traceroot_token_here
-
-TRACEROOT_SERVICE_NAME=eigent
-
-TRACEROOT_GITHUB_OWNER=eigent
-
-TRACEROOT_GITHUB_REPO_NAME=eigent-ai
-
-TRACEROOT_GITHUB_COMMIT_HASH=main
-
-TRACEROOT_ENABLE_SPAN_CLOUD_EXPORT=false
-
-TRACEROOT_ENABLE_LOG_CLOUD_EXPORT=false
-
-TRACEROOT_ENABLE_SPAN_CONSOLE_EXPORT=false
-
-TRACEROOT_ENABLE_LOG_CONSOLE_EXPORT=false
-
-TRACEROOT_TRACER_VERBOSE=false
-
-TRACEROOT_LOGGER_VERBOSE=false
--- a/.gitignore
+++ b/.gitignore
@ -46,7 +46,6 @@ public/

 # Testing
 coverage/
-.traceroot-config.yaml

 # Python
 __pycache__/
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -243,9 +243,6 @@ To run the application locally in developer mode:
 1. Configure `.env.development`:
   - Set `VITE_USE_LOCAL_PROXY=true`
   - Set `VITE_PROXY_URL=http://localhost:3001`
-   - Set `TRACEROOT_ENABLE_SPAN_CLOUD_EXPORT=false`
-   - Set `TRACEROOT_ENABLE_LOG_CLOUD_EXPORT=false`
-   - Set `TRACEROOT_ENABLE_LOG_CONSOLE_EXPORT=false`
 2. Go to the settings to specify your model key and model type.

 ## Common Actions 🔄
--- a/backend/.pre-commit-config.yaml
+++ b/backend/.pre-commit-config.yaml
@ -0,0 +1,87 @@
+ci:
+  # https://pre-commit.ci/#configuration
+  autofix_prs: true
+  autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
+  autoupdate_schedule: monthly
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v6.0.0
+    hooks:
+      - id: no-commit-to-branch
+        name: No commits to master
+      - id: end-of-file-fixer
+        name: End-of-file fixer
+      - name: mixed-line-ending
+        id: mixed-line-ending
+        args: [--fix, lf]
+      - id: trailing-whitespace
+        name: Remove trailing whitespaces
+      - id: check-toml
+        name: Check toml
+      - id: check-yaml
+        name: Check yaml
+
+  - repo: https://github.com/adrienverge/yamllint.git
+    rev: v1.37.1
+    hooks:
+      - id: yamllint
+        name: Lint yaml
+        args: [-d, '{extends: default, rules: {line-length: disable, document-start: disable, truthy: {level: error}, braces: {max-spaces-inside: 1}}}']
+
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.21.0
+    hooks:
+      - id: pyupgrade
+        name: Upgrade Python syntax
+        args: [--py38-plus]
+
+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.3.1
+    hooks:
+      - id: autoflake
+        name: Remove unused imports and variables
+        args: [
+          --remove-all-unused-imports,
+          --remove-unused-variables,
+          --remove-duplicate-keys,
+          --ignore-init-module-imports,
+          --in-place,
+        ]
+
+  - repo: https://github.com/google/yapf
+    rev: v0.43.0
+    hooks:
+      - id: yapf
+        name: Format code
+        additional_dependencies: [toml]
+
+  - repo: https://github.com/pycqa/isort
+    rev: 7.0.0
+    hooks:
+      - id: isort
+        name: Sort imports
+
+  - repo: https://github.com/PyCQA/flake8
+    rev: 7.3.0
+    hooks:
+      - id: flake8
+        name: Check PEP8
+        additional_dependencies: [Flake8-pyproject]
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.3
+    hooks:
+      - id: ruff
+        name: Ruff formatting
+        args: [--fix, --exit-non-zero-on-fix]
+
+  - repo: https://github.com/executablebooks/mdformat
+    rev: 0.7.22
+    hooks:
+      - id: mdformat
+        name: Format Markdown
+        additional_dependencies:
+          - mdformat-gfm
+          - mdformat_frontmatter
+          - mdformat_footnote
--- a/backend/app/component/environment.py
+++ b/backend/app/component/environment.py
@ -1,18 +1,4 @@
-# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
-
-from utils import traceroot_wrapper as traceroot
+import logging
 import importlib.util
 import os
 from pathlib import Path
@ -22,7 +8,7 @@ import importlib
 from typing import Any, overload
 import threading

-traceroot_logger = traceroot.get_logger("env")
+logger = logging.getLogger("env")

 # Thread-local storage for user-specific environment
 _thread_local = threading.local()
@ -31,27 +17,100 @@ _thread_local = threading.local()
 default_env_path = os.path.join(os.path.expanduser("~"), ".eigent", ".env")
 load_dotenv(dotenv_path=default_env_path)

+# Safe base directory for user environment files
+env_base_dir = os.path.join(os.path.expanduser("~"), ".eigent")
+
+
+def sanitize_env_path(env_path: str | None) -> str | None:
+    """
+    Validate and sanitize user-provided environment file path.
+
+    Security: Ensures the path stays within ~/.eigent directory and ends with .env
+    to prevent path traversal attacks and unauthorized file access.
+
+    Args:
+        env_path: User-provided environment file path
+
+    Returns:
+        Validated absolute path string if valid, None otherwise
+    """
+    if not env_path:
+        return None
+
+    try:
+        # Convert to Path object for safe manipulation
+        user_path = Path(env_path)
+
+        # Reject absolute paths outside our control
+        if user_path.is_absolute():
+            # Check if it's already within env_base_dir
+            resolved_path = user_path.resolve()
+        else:
+            # Join relative path to base directory
+            resolved_path = (Path(env_base_dir) / user_path).resolve()
+
+        # Verify the resolved path is still within env_base_dir
+        base_resolved = Path(env_base_dir).resolve()
+        try:
+            resolved_path.relative_to(base_resolved)
+        except ValueError:
+            logger.warning(
+                f"Security: Rejected env_path outside safe directory. "
+                f"Path: {env_path}, Resolved: {resolved_path}, "
+                f"Base: {base_resolved}"
+            )
+            return None
+
+        # Enforce .env file extension
+        if not resolved_path.name.endswith('.env'):
+            logger.warning(
+                f"Security: Rejected env_path with invalid extension. "
+                f"Path: {env_path}, must end with .env"
+            )
+            return None
+
+        return str(resolved_path)
+
+    except (ValueError, OSError) as e:
+        logger.warning(
+            f"Security: Invalid env_path rejected. "
+            f"Path: {env_path}, Error: {e}"
+        )
+        return None
+

 def set_user_env_path(env_path: str | None = None):
    """
    Set user-specific environment path for current thread.
    If env_path is None, uses default global environment.
-    """
-    traceroot_logger.info("Setting user environment path", extra={"env_path": env_path, "exists": env_path and os.path.exists(env_path) if env_path else None})

-    if env_path and os.path.exists(env_path):
-        _thread_local.env_path = env_path
+    Security: All paths are validated through sanitize_env_path to prevent
+    path traversal and unauthorized file access.
+    """
+    # Sanitize the path before any filesystem operations
+    safe_env_path = sanitize_env_path(env_path)
+
+    logger.info(
+        f"Setting user environment path: original={env_path}, "
+        f"sanitized={safe_env_path}, "
+        f"exists={safe_env_path and os.path.exists(safe_env_path) if safe_env_path else None}"
+    )
+
+    if safe_env_path and os.path.exists(safe_env_path):
+        _thread_local.env_path = safe_env_path
        # Load user-specific environment variables
-        load_dotenv(dotenv_path=env_path, override=True)
-        traceroot_logger.info("User-specific environment loaded", extra={"env_path": env_path})
+        load_dotenv(dotenv_path=safe_env_path, override=True)
+        logger.info(f"User-specific environment loaded: {safe_env_path}")
    else:
        # Clear thread-local env_path to fall back to global
        if hasattr(_thread_local, 'env_path'):
            delattr(_thread_local, 'env_path')
-        traceroot_logger.info("Reset to default global environment")
+        logger.info("Reset to default global environment")

-        if env_path and not os.path.exists(env_path):
-            traceroot_logger.warning("User environment path does not exist, falling back to global", extra={"env_path": env_path})
+        if env_path and not safe_env_path:
+            logger.warning(f"User environment path rejected by security validation: {env_path}")
+        elif safe_env_path and not os.path.exists(safe_env_path):
+            logger.warning(f"User environment path does not exist, falling back to global: {safe_env_path}")


 def get_current_env_path() -> str:
@ -78,35 +137,38 @@ def env(key: str, default=None):
    Get environment variable.
    First checks thread-local user-specific environment,
    then falls back to global environment.
+
+    Security: Uses sanitized path stored in _thread_local.env_path
+    which has already been validated by set_user_env_path.
    """
    # If we have a user-specific environment path, try to reload it to get latest values
+    # Note: _thread_local.env_path is already sanitized by set_user_env_path
    if hasattr(_thread_local, 'env_path') and os.path.exists(_thread_local.env_path):
        # Temporarily load user-specific env to get the latest value
        from dotenv import dotenv_values
        user_env_values = dotenv_values(_thread_local.env_path)
        if key in user_env_values:
            value = user_env_values[key] or default
-            traceroot_logger.debug("Environment variable retrieved from user-specific config", extra={"key": key, "env_path": _thread_local.env_path, "has_value": value is not None})
+            logger.debug(f"Environment variable retrieved from user-specific config: key={key}, env_path={_thread_local.env_path}, has_value={value is not None}")
            return value

    # Fall back to global environment
    value = os.getenv(key, default)
-    traceroot_logger.debug("Environment variable retrieved from global config", extra={"key": key, "has_value": value is not None, "using_default": value == default})
+    logger.debug(f"Environment variable retrieved from global config: key={key}, has_value={value is not None}, using_default={value == default}")
    return value


 def env_or_fail(key: str):
    value = env(key)
    if value is None:
-        traceroot_logger.warning(f"[ENVIRONMENT] can't get env config value for key: {key}")
+        logger.warning(f"[ENVIRONMENT] can't get env config value for key: {key}")
        raise Exception(f"can't get env config value for key: {key}")
    return value

-@traceroot.trace()
 def env_not_empty(key: str):
    value = env(key)
    if not value:
-        traceroot_logger.warning(f"[ENVIRONMENT] env config value can't be empty for key: {key}")
+        logger.warning(f"[ENVIRONMENT] env config value can't be empty for key: {key}")
        raise Exception(f"env config value can't be empty for key: {key}")
    return value

--- a/backend/app/controller/chat_controller.py
+++ b/backend/app/controller/chat_controller.py
@ -20,7 +20,7 @@ from pathlib import Path
 from dotenv import load_dotenv
 from fastapi import APIRouter, HTTPException, Request, Response
 from fastapi.responses import StreamingResponse
-from utils import traceroot_wrapper as traceroot
+import logging
 from app.component import code
 from app.exception.exception import UserException
 from app.model.chat import Chat, HumanReply, McpServers, Status, SupplementChat, AddTaskRequest, sse_json
@ -40,15 +40,15 @@ from app.service.task import (
    delete_task_lock,
    task_locks,
 )
-from app.component.environment import set_user_env_path
+from app.component.environment import set_user_env_path, sanitize_env_path
 from app.utils.workforce import Workforce
 from camel.tasks.task import Task


 router = APIRouter()

-# Create traceroot logger for chat controller
-chat_logger = traceroot.get_logger("chat_controller")
+# Logger for chat controller
+chat_logger = logging.getLogger("chat_controller")

 # SSE timeout configuration (60 minutes in seconds)
 SSE_TIMEOUT_SECONDS = 60 * 60
@ -127,7 +127,6 @@ async def timeout_stream_wrapper(stream_generator, timeout_seconds: int = SSE_TI


@router.post("/chat", name="start chat")
-@traceroot.trace()
 async def post(data: Chat, request: Request):
    chat_logger.info(
        "Starting new chat session",
@ -138,7 +137,10 @@ async def post(data: Chat, request: Request):

    # Set user-specific environment path for this thread
    set_user_env_path(data.env_path)
-    load_dotenv(dotenv_path=data.env_path)
+    # Load environment with validated path
+    safe_env_path = sanitize_env_path(data.env_path)
+    if safe_env_path:
+        load_dotenv(dotenv_path=safe_env_path)

    os.environ["file_save_path"] = data.file_save_path()
    os.environ["browser_port"] = str(data.browser_port)
@ -185,7 +187,6 @@ async def post(data: Chat, request: Request):


@router.post("/chat/{id}", name="improve chat")
-@traceroot.trace()
 def improve(id: str, data: SupplementChat):
    chat_logger.info("Chat improvement requested", extra={"task_id": id, "question_length": len(data.question)})
    task_lock = get_task_lock(id)
@ -245,7 +246,6 @@ def improve(id: str, data: SupplementChat):


@router.put("/chat/{id}", name="supplement task")
-@traceroot.trace()
 def supplement(id: str, data: SupplementChat):
    chat_logger.info("Chat supplement requested", extra={"task_id": id})
    task_lock = get_task_lock(id)
@ -257,7 +257,6 @@ def supplement(id: str, data: SupplementChat):


@router.delete("/chat/{id}", name="stop chat")
-@traceroot.trace()
 def stop(id: str):
    """stop the task"""
    chat_logger.info("=" * 80)
@ -277,7 +276,6 @@ def stop(id: str):


@router.post("/chat/{id}/human-reply")
-@traceroot.trace()
 def human_reply(id: str, data: HumanReply):
    chat_logger.info("Human reply received", extra={"task_id": id, "reply_length": len(data.reply)})
    task_lock = get_task_lock(id)
@ -287,7 +285,6 @@ def human_reply(id: str, data: HumanReply):


@router.post("/chat/{id}/install-mcp")
-@traceroot.trace()
 def install_mcp(id: str, data: McpServers):
    chat_logger.info("Installing MCP servers", extra={"task_id": id, "servers_count": len(data.get("mcpServers", {}))})
    task_lock = get_task_lock(id)
@ -297,7 +294,6 @@ def install_mcp(id: str, data: McpServers):


@router.post("/chat/{id}/add-task", name="add task to workforce")
-@traceroot.trace()
 def add_task(id: str, data: AddTaskRequest):
    """Add a new task to the workforce"""
    chat_logger.info(f"Adding task to workforce for task_id: {id}, content: {data.content[:100]}...")
@ -321,7 +317,6 @@ def add_task(id: str, data: AddTaskRequest):


@router.delete("/chat/{project_id}/remove-task/{task_id}", name="remove task from workforce")
-@traceroot.trace()
 def remove_task(project_id: str, task_id: str):
    """Remove a task from the workforce"""
    chat_logger.info(f"Removing task {task_id} from workforce for project_id: {project_id}")
@ -341,7 +336,6 @@ def remove_task(project_id: str, task_id: str):


@router.post("/chat/{project_id}/skip-task", name="skip task in workforce")
-@traceroot.trace()
 def skip_task(project_id: str):
    """
    Skip/Stop current task execution while preserving context.
--- a/backend/app/controller/health_controller.py
+++ b/backend/app/controller/health_controller.py
@ -14,9 +14,9 @@

 from fastapi import APIRouter
 from pydantic import BaseModel
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("health_controller")
+logger = logging.getLogger("health_controller")

 router = APIRouter(tags=["Health"])

--- a/backend/app/controller/model_controller.py
+++ b/backend/app/controller/model_controller.py
@ -18,9 +18,9 @@ from app.component.model_validation import create_agent
 from app.model.chat import PLATFORM_MAPPING
 from camel.types import ModelType
 from app.component.error_format import normalize_error_to_openai_format
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("model_controller")
+logger = logging.getLogger("model_controller")


 router = APIRouter()
@ -49,7 +49,6 @@ class ValidateModelResponse(BaseModel):


@router.post("/model/validate")
-@traceroot.trace()
 async def validate_model(request: ValidateModelRequest):
    """Validate model configuration and tool call support."""
    platform = request.model_platform
--- a/backend/app/controller/task_controller.py
+++ b/backend/app/controller/task_controller.py
@ -28,17 +28,16 @@ from app.service.task import (
    task_locks,
 )
 import asyncio
-from app.component.environment import set_user_env_path
-from utils import traceroot_wrapper as traceroot
+from app.component.environment import set_user_env_path, sanitize_env_path
+import logging

-logger = traceroot.get_logger("task_controller")
+logger = logging.getLogger("task_controller")


 router = APIRouter()


@router.post("/task/{id}/start", name="start task")
-@traceroot.trace()
 def start(id: str):
    task_lock = get_task_lock(id)
    logger.info("Starting task", extra={"task_id": id})
@ -48,7 +47,6 @@ def start(id: str):


@router.put("/task/{id}", name="update task")
-@traceroot.trace()
 def put(id: str, data: UpdateData):
    logger.info("Updating task", extra={"task_id": id, "task_items_count": len(data.task)})
    logger.debug("Update task data", extra={"task_id": id, "data": data.model_dump_json()})
@ -63,7 +61,6 @@ class TakeControl(BaseModel):


@router.put("/task/{id}/take-control", name="take control pause or resume")
-@traceroot.trace()
 def take_control(id: str, data: TakeControl):
    logger.info("Task control action", extra={"task_id": id, "action": data.action})
    task_lock = get_task_lock(id)
@ -73,20 +70,21 @@ def take_control(id: str, data: TakeControl):


@router.post("/task/{id}/add-agent", name="add new agent")
-@traceroot.trace()
 def add_agent(id: str, data: NewAgent):
    logger.info("Adding new agent to task", extra={"task_id": id, "agent_name": data.name})
    logger.debug("New agent data", extra={"task_id": id, "agent_data": data.model_dump_json()})
    # Set user-specific environment path for this thread
    set_user_env_path(data.env_path)
-    load_dotenv(dotenv_path=data.env_path)
+    # Load environment with validated path
+    safe_env_path = sanitize_env_path(data.env_path)
+    if safe_env_path:
+        load_dotenv(dotenv_path=safe_env_path)
    asyncio.run(get_task_lock(id).put_queue(ActionNewAgent(**data.model_dump())))
    logger.info("Agent added to task", extra={"task_id": id, "agent_name": data.name})
    return Response(status_code=204)


@router.delete("/task/stop-all", name="stop all tasks")
-@traceroot.trace()
 def stop_all():
    logger.warning("Stopping all tasks", extra={"task_count": len(task_locks)})
    for task_lock in task_locks.values():
--- a/backend/app/controller/tool_controller.py
+++ b/backend/app/controller/tool_controller.py
@ -16,7 +16,7 @@ from fastapi import APIRouter, HTTPException
 from app.utils.toolkit.notion_mcp_toolkit import NotionMCPToolkit
 from app.utils.toolkit.google_calendar_toolkit import GoogleCalendarToolkit
 from app.utils.oauth_state_manager import oauth_state_manager
-from utils import traceroot_wrapper as traceroot
+import logging
 from camel.toolkits.hybrid_browser_toolkit.hybrid_browser_toolkit_ts import (
    HybridBrowserToolkit as BaseHybridBrowserToolkit,
 )
@ -24,7 +24,7 @@ from app.utils.cookie_manager import CookieManager
 import os
 import uuid

-logger = traceroot.get_logger("tool_controller")
+logger = logging.getLogger("tool_controller")
 router = APIRouter()


--- a/backend/app/exception/handler.py
+++ b/backend/app/exception/handler.py
@ -22,9 +22,9 @@ from app.component import code
 from app.exception.exception import NoPermissionException, ProgramException, TokenException
 from app.component.pydantic.i18n import trans, get_language
 from app.exception.exception import UserException
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("exception_handler")
+logger = logging.getLogger("exception_handler")


@api.exception_handler(RequestValidationError)
--- a/backend/app/model/chat.py
+++ b/backend/app/model/chat.py
@ -19,9 +19,9 @@ import re
 from typing import Literal
 from pydantic import BaseModel, Field, field_validator
 from camel.types import ModelType, RoleType
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("chat_model")
+logger = logging.getLogger("chat_model")


 class Status(str, Enum):
--- a/backend/app/router.py
+++ b/backend/app/router.py
@ -18,9 +18,9 @@ All routers are explicitly registered here for better visibility and maintainabi
 """
 from fastapi import FastAPI
 from app.controller import chat_controller, model_controller, task_controller, tool_controller, health_controller
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("router")
+logger = logging.getLogger("router")


 def register_routers(app: FastAPI, prefix: str = "") -> None:
--- a/backend/app/service/chat_service.py
+++ b/backend/app/service/chat_service.py
@ -40,6 +40,7 @@ from app.utils.toolkit.human_toolkit import HumanToolkit
 from app.utils.toolkit.note_taking_toolkit import NoteTakingToolkit
 from app.utils.toolkit.terminal_toolkit import TerminalToolkit
 from app.utils.workforce import Workforce
+from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback
 from app.model.chat import Chat, NewAgent, Status, sse_json, TaskContent
 from camel.tasks import Task
 from app.utils.agent import (
@ -61,10 +62,10 @@ from app.service.task import Action, Agents
 from app.utils.server.sync_step import sync_step
 from camel.types import ModelPlatformType
 from camel.models import ModelProcessingError
-from utils import traceroot_wrapper as traceroot
+import logging
 import os

-logger = traceroot.get_logger("chat_service")
+logger = logging.getLogger("chat_service")


 def format_task_context(task_data: dict, seen_files: set | None = None, skip_files: bool = False) -> str:
@ -251,7 +252,6 @@ def build_context_for_workforce(task_lock: TaskLock, options: Chat) -> str:


@sync_step
-@traceroot.trace()
 async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
    start_event_loop = True

@ -1061,7 +1061,6 @@ async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
            # Continue processing other items instead of breaking


-@traceroot.trace()
 async def install_mcp(
    mcp: ListenChatAgent,
    install_mcp: ActionInstallMcpData,
@ -1092,7 +1091,8 @@ def to_sub_tasks(task: Task, summary_task_content: str):
 def tree_sub_tasks(sub_tasks: list[Task], depth: int = 0):
    if depth > 5:
        return []
-    return (
+
+    result = (
        chain(sub_tasks)
        .filter(lambda x: x.content != "")
        .map(
@ -1106,6 +1106,8 @@ def tree_sub_tasks(sub_tasks: list[Task], depth: int = 0):
        .value()
    )

+    return result
+

 def update_sub_tasks(sub_tasks: list[Task], update_tasks: dict[str, TaskContent], depth: int = 0):
    if depth > 5:  # limit the depth of the recursion
@ -1183,7 +1185,6 @@ Is this a complex task? (yes/no):"""
        return True


-@traceroot.trace()
 async def summary_task(agent: ListenChatAgent, task: Task) -> str:
    prompt = f"""The user's task is:
 ---
@ -1286,7 +1287,6 @@ async def get_task_result_with_optional_summary(task: Task, options: Chat) -> st
    return result


-@traceroot.trace()
 async def construct_workforce(options: Chat) -> tuple[Workforce, ListenChatAgent]:
    """Construct a workforce with all required agents.

@ -1403,6 +1403,12 @@ The current date is {datetime.date.today()}. For any date-related tasks, you MUS
    except (ValueError, AttributeError):
        model_platform_enum = None

+    # Create workforce metrics callback for workforce analytics
+    workforce_metrics = WorkforceMetricsCallback(
+        project_id=options.project_id,
+        task_id=options.task_id
+    )
+
    workforce = Workforce(
        options.project_id,
        "A workforce",
@ -1414,6 +1420,8 @@ The current date is {datetime.date.today()}. For any date-related tasks, you MUS
        use_structured_output_handler=False if model_platform_enum == ModelPlatformType.OPENAI else True,
    )

+    # Register workforce metrics callback
+    workforce._callbacks.append(workforce_metrics)
    workforce.add_single_agent_worker(
        "Developer Agent: A master-level coding assistant with a powerful "
        "terminal. It can write and execute code, manage files, automate "
@ -1473,7 +1481,6 @@ def format_agent_description(agent_data: NewAgent | ActionNewAgent) -> str:
    return " ".join(description_parts)


-@traceroot.trace()
 async def new_agent_model(data: NewAgent | ActionNewAgent, options: Chat):
    logger.info("Creating new agent", extra={"agent_name": data.name, "project_id": options.project_id, "task_id": options.task_id})
    logger.debug("New agent data", extra={"agent_data": data.model_dump_json()})
--- a/backend/app/service/task.py
+++ b/backend/app/service/task.py
@ -24,9 +24,9 @@ from contextlib import contextmanager
 from contextvars import ContextVar
 from datetime import datetime, timedelta
 import weakref
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("task_service")
+logger = logging.getLogger("task_service")


 class Action(str, Enum):
--- a/backend/app/utils/agent.py
+++ b/backend/app/utils/agent.py
@ -21,7 +21,7 @@ from threading import Event, Lock
 import traceback
 from typing import Any, Callable, Dict, List, Tuple
 import uuid
-from utils import traceroot_wrapper as traceroot
+import logging

 # Thread-safe reference to main event loop using contextvars
 # This ensures each request has its own event loop reference, avoiding race conditions
@ -69,7 +69,7 @@ def _schedule_async_task(coro):
            asyncio.run_coroutine_threadsafe(coro, main_loop)
        else:
            # This should not happen in normal operation - log error and skip
-            traceroot.get_logger("agent").error(
+            logging.error(
                "No event loop available for async task scheduling, task skipped. "
                "Ensure set_main_event_loop() is called before parallel agent creation."
            )
@ -129,8 +129,8 @@ import datetime
 from pydantic import BaseModel
 from app.model.chat import Chat, McpServers

-# Create traceroot logger for agent tracking
-traceroot_logger = traceroot.get_logger("agent")
+# Logger for agent tracking
+logger = logging.getLogger("agent")
 from app.service.task import (
    Action,
    ActionActivateAgentData,
@ -148,7 +148,6 @@ NOW_STR = datetime.datetime.now().strftime("%Y-%m-%d %H:00:00")


 class ListenChatAgent(ChatAgent):
-    @traceroot.trace()
    def __init__(
        self,
        api_task_id: str,
@ -218,7 +217,6 @@ class ListenChatAgent(ChatAgent):

    process_task_id: str = ""

-    @traceroot.trace()
    def step(
        self,
        input_message: BaseMessage | str,
@ -244,7 +242,7 @@ class ListenChatAgent(ChatAgent):
        error_info = None
        message = None
        res = None
-        traceroot_logger.info(
+        logger.info(
            f"Agent {self.agent_name} starting step with message: {input_message.content if isinstance(input_message, BaseMessage) else input_message}"
        )
        try:
@ -254,20 +252,16 @@ class ListenChatAgent(ChatAgent):
            error_info = e
            if "Budget has been exceeded" in str(e):
                message = "Budget has been exceeded"
-                traceroot_logger.warning(f"Agent {self.agent_name} budget exceeded")
+                logger.warning(f"Agent {self.agent_name} budget exceeded")
                asyncio.create_task(task_lock.put_queue(ActionBudgetNotEnough()))
            else:
                message = str(e)
-                traceroot_logger.error(
-                    f"Agent {self.agent_name} model processing error: {e}"
-                )
+                logger.error(f"Agent {self.agent_name} model processing error: {e}")
            total_tokens = 0
        except Exception as e:
            res = None
            error_info = e
-            traceroot_logger.error(
-                f"Agent {self.agent_name} unexpected error in step: {e}", exc_info=True
-            )
+            logger.error(f"Agent {self.agent_name} unexpected error in step: {e}", exc_info=True)
            message = f"Error processing message: {e!s}"
            total_tokens = 0

@ -314,7 +308,7 @@ class ListenChatAgent(ChatAgent):
            message = res.msg.content if res.msg else ""
            usage_info = res.info.get("usage") or res.info.get("token_usage") or {}
            total_tokens = usage_info.get("total_tokens", 0) if usage_info else 0
-            traceroot_logger.info(
+            logger.info(
                f"Agent {self.agent_name} completed step, tokens used: {total_tokens}"
            )

@ -339,7 +333,6 @@ class ListenChatAgent(ChatAgent):
        assert res is not None
        return res

-    @traceroot.trace()
    async def astep(
        self,
        input_message: BaseMessage | str,
@ -365,7 +358,7 @@ class ListenChatAgent(ChatAgent):
        error_info = None
        message = None
        res = None
-        traceroot_logger.debug(
+        logger.debug(
            f"Agent {self.agent_name} starting async step with message: {input_message.content if isinstance(input_message, BaseMessage) else input_message}"
        )

@ -378,30 +371,23 @@ class ListenChatAgent(ChatAgent):
            error_info = e
            if "Budget has been exceeded" in str(e):
                message = "Budget has been exceeded"
-                traceroot_logger.warning(f"Agent {self.agent_name} budget exceeded")
+                logger.warning(f"Agent {self.agent_name} budget exceeded")
                asyncio.create_task(task_lock.put_queue(ActionBudgetNotEnough()))
            else:
                message = str(e)
-                traceroot_logger.error(
-                    f"Agent {self.agent_name} model processing error: {e}"
-                )
+                logger.error(f"Agent {self.agent_name} model processing error: {e}")
            total_tokens = 0
        except Exception as e:
            res = None
            error_info = e
-            traceroot_logger.error(
-                f"Agent {self.agent_name} unexpected error in async step: {e}",
-                exc_info=True,
-            )
+            logger.error(f"Agent {self.agent_name} unexpected error in async step: {e}", exc_info=True)
            message = f"Error processing message: {e!s}"
            total_tokens = 0

        if res is not None:
            message = res.msg.content if res.msg else ""
            total_tokens = res.info["usage"]["total_tokens"]
-            traceroot_logger.info(
-                f"Agent {self.agent_name} completed step, tokens used: {total_tokens}"
-            )
+            logger.info(f"Agent {self.agent_name} completed step, tokens used: {total_tokens}")

        assert message is not None

@ -424,7 +410,6 @@ class ListenChatAgent(ChatAgent):
        assert res is not None
        return res

-    @traceroot.trace()
    def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord:
        func_name = tool_call_request.tool_name
        tool: FunctionTool = self._internal_tools[func_name]
@ -451,7 +436,7 @@ class ListenChatAgent(ChatAgent):
                if hasattr(tool, "_toolkit_name")
                else "mcp_toolkit"
            )
-            traceroot_logger.debug(
+            logger.debug(
                f"Agent {self.agent_name} executing tool: {func_name} from toolkit: {toolkit_name} with args: {json.dumps(args, ensure_ascii=False)}"
            )

@ -473,7 +458,7 @@ class ListenChatAgent(ChatAgent):
            # Set process_task context for all tool executions
            with set_process_task(self.process_task_id):
                raw_result = tool(**args)
-            traceroot_logger.debug(f"Tool {func_name} executed successfully")
+            logger.debug(f"Tool {func_name} executed successfully")
            if self.mask_tool_output:
                self._secure_result_store[tool_call_id] = raw_result
                result = (
@ -518,9 +503,7 @@ class ListenChatAgent(ChatAgent):
            error_msg = f"Error executing tool '{func_name}': {e!s}"
            result = f"Tool execution failed: {error_msg}"
            mask_flag = False
-            traceroot_logger.error(
-                f"Tool execution failed for {func_name}: {e}", exc_info=True
-            )
+            logger.error(f"Tool execution failed for {func_name}: {e}", exc_info=True)

        return self._record_tool_calling(
            func_name,
@ -531,7 +514,6 @@ class ListenChatAgent(ChatAgent):
            extra_content=tool_call_request.extra_content,
        )

-    @traceroot.trace()
    async def _aexecute_tool(
        self, tool_call_request: ToolCallRequest
    ) -> ToolCallingRecord:
@ -575,7 +557,7 @@ class ListenChatAgent(ChatAgent):
        if not toolkit_name:
            toolkit_name = "mcp_toolkit"

-        traceroot_logger.info(
+        logger.info(
            f"Agent {self.agent_name} executing async tool: {func_name} from toolkit: {toolkit_name} with args: {json.dumps(args, ensure_ascii=False)}"
        )

@ -645,9 +627,7 @@ class ListenChatAgent(ChatAgent):
            # Capture the error message to prevent framework crash
            error_msg = f"Error executing async tool '{func_name}': {e!s}"
            result = {"error": error_msg}
-            traceroot_logger.error(
-                f"Async tool execution failed for {func_name}: {e}", exc_info=True
-            )
+            logger.error(f"Async tool execution failed for {func_name}: {e}", exc_info=True)

        # Prepare result message with truncation
        if isinstance(result, str):
@ -684,7 +664,6 @@ class ListenChatAgent(ChatAgent):
            extra_content=tool_call_request.extra_content,
        )

-    @traceroot.trace()
    def clone(self, with_memory: bool = False) -> ChatAgent:
        """Please see super.clone()"""
        system_message = None if with_memory else self._original_system_message
@ -730,7 +709,6 @@ class ListenChatAgent(ChatAgent):
        return new_agent


-@traceroot.trace()
 def agent_model(
    agent_name: str,
    system_message: str | BaseMessage,
@ -743,9 +721,7 @@ def agent_model(
 ):
    task_lock = get_task_lock(options.project_id)
    agent_id = str(uuid.uuid4())
-    traceroot_logger.debug(
-        f"Creating agent: {agent_name} with id: {agent_id} for project: {options.project_id}"
-    )
+    logger.info(f"Creating agent: {agent_name} with id: {agent_id} for project: {options.project_id}")
    # Use thread-safe scheduling to support parallel agent creation
    _schedule_async_task(
        task_lock.put_queue(
@ -807,7 +783,7 @@ def agent_model(
            }:
                model_config["parallel_tool_calls"] = False
        except (ValueError, AttributeError):
-            traceroot_logger.error(
+            logging.error(
                f"Invalid model platform for browser agent: {options.model_platform}",
                exc_info=True,
            )
@ -837,7 +813,6 @@ def agent_model(
    )


-@traceroot.trace()
 def question_confirm_agent(options: Chat):
    return agent_model(
        "question_confirm_agent",
@ -846,7 +821,6 @@ def question_confirm_agent(options: Chat):
    )


-@traceroot.trace()
 def task_summary_agent(options: Chat):
    return agent_model(
        "task_summary_agent",
@ -855,12 +829,9 @@ def task_summary_agent(options: Chat):
    )


-@traceroot.trace()
 async def developer_agent(options: Chat):
    working_directory = get_working_directory(options)
-    traceroot_logger.info(
-        f"Creating developer agent for project: {options.project_id} in directory: {working_directory}"
-    )
+    logger.info(f"Creating developer agent for project: {options.project_id} in directory: {working_directory}")
    message_integration = ToolkitMessageIntegration(
        message_handler=HumanToolkit(
            options.project_id, Agents.developer_agent
@ -1046,13 +1017,9 @@ these tips to maximize your effectiveness:
    )


-@traceroot.trace()
 def browser_agent(options: Chat):
    working_directory = get_working_directory(options)
-    traceroot_logger.debug(
-        f"Creating browser agent for project: {options.project_id} in directory: {working_directory}"
-    )
-
+    logger.info(f"Creating browser agent for project: {options.project_id} in directory: {working_directory}")
    message_integration = ToolkitMessageIntegration(
        message_handler=HumanToolkit(
            options.project_id, Agents.browser_agent
@ -1252,12 +1219,9 @@ Your approach depends on available search tools:
    )


-@traceroot.trace()
 async def document_agent(options: Chat):
    working_directory = get_working_directory(options)
-    traceroot_logger.debug(
-        f"Creating document agent for project: {options.project_id} in directory: {working_directory}"
-    )
+    logger.info(f"Creating document agent for project: {options.project_id} in directory: {working_directory}")

    message_integration = ToolkitMessageIntegration(
        message_handler=HumanToolkit(
@ -1481,12 +1445,9 @@ supported formats including advanced spreadsheet functionality.
    )


-@traceroot.trace()
 def multi_modal_agent(options: Chat):
    working_directory = get_working_directory(options)
-    traceroot_logger.debug(
-        f"Creating multi-modal agent for project: {options.project_id} in directory: {working_directory}"
-    )
+    logger.info(f"Creating multi-modal agent for project: {options.project_id} in directory: {working_directory}")

    message_integration = ToolkitMessageIntegration(
        message_handler=HumanToolkit(
@ -1677,16 +1638,13 @@ multi-modal content across audio and visual domains.
    )


-@traceroot.trace()
 async def social_medium_agent(options: Chat):
    """
    Agent to handling tasks related to social media:
    include toolkits: WhatsApp, Twitter, LinkedIn, Reddit, Notion, Slack, Discord and Google Suite.
    """
    working_directory = get_working_directory(options)
-    traceroot_logger.info(
-        f"Creating social medium agent for project: {options.project_id} in directory: {working_directory}"
-    )
+    logger.info(f"Creating social medium agent for project: {options.project_id} in directory: {working_directory}")
    tools = [
        *WhatsAppToolkit.get_can_use_tools(options.project_id),
        *TwitterToolkit.get_can_use_tools(options.project_id),
@ -1807,9 +1765,8 @@ operations.
    )


-@traceroot.trace()
 async def mcp_agent(options: Chat):
-    traceroot_logger.info(
+    logger.info(
        f"Creating MCP agent for project: {options.project_id} with {len(options.installed_mcp['mcpServers'])} MCP servers"
    )
    tools = [
@ -1819,7 +1776,7 @@ async def mcp_agent(options: Chat):
    if len(options.installed_mcp["mcpServers"]) > 0:
        try:
            mcp_tools = await get_mcp_tools(options.installed_mcp)
-            traceroot_logger.info(
+            logger.info(
                f"Retrieved {len(mcp_tools)} MCP tools for task {options.project_id}"
            )
            if mcp_tools:
@ -1831,16 +1788,14 @@ async def mcp_agent(options: Chat):
                    )
                    for tool in mcp_tools
                ]
-                traceroot_logger.debug(f"MCP tools: {tool_names}")
+                logger.debug(f"MCP tools: {tool_names}")
            tools = [*tools, *mcp_tools]
        except Exception as e:
-            traceroot_logger.debug(repr(e))
+            logger.debug(repr(e))

    task_lock = get_task_lock(options.project_id)
    agent_id = str(uuid.uuid4())
-    traceroot_logger.info(
-        f"Creating MCP agent: {Agents.mcp_agent} with id: {agent_id} for task: {options.project_id}"
-    )
+    logger.info(f"Creating MCP agent: {Agents.mcp_agent} with id: {agent_id} for task: {options.project_id}")
    asyncio.create_task(
        task_lock.put_queue(
            ActionCreateAgentData(
@ -1883,11 +1838,8 @@ async def mcp_agent(options: Chat):
    )


-@traceroot.trace()
 async def get_toolkits(tools: list[str], agent_name: str, api_task_id: str):
-    traceroot_logger.info(
-        f"Getting toolkits for agent: {agent_name}, task: {api_task_id}, tools: {tools}"
-    )
+    logger.info(f"Getting toolkits for agent: {agent_name}, task: {api_task_id}, tools: {tools}")
    toolkits = {
        "audio_analysis_toolkit": AudioAnalysisToolkit,
        "openai_image_toolkit": OpenAIImageToolkit,
@ -1925,15 +1877,12 @@ async def get_toolkits(tools: list[str], agent_name: str, api_task_id: str):
            )
            res.extend(toolkit_tools)
        else:
-            traceroot_logger.warning(f"Toolkit {item} not found for agent {agent_name}")
+            logger.warning(f"Toolkit {item} not found for agent {agent_name}")
    return res


-@traceroot.trace()
 async def get_mcp_tools(mcp_server: McpServers):
-    traceroot_logger.info(
-        f"Getting MCP tools for {len(mcp_server['mcpServers'])} servers"
-    )
+    logger.info(f"Getting MCP tools for {len(mcp_server['mcpServers'])} servers")
    if len(mcp_server["mcpServers"]) == 0:
        return []

@ -1953,9 +1902,7 @@ async def get_mcp_tools(mcp_server: McpServers):
        mcp_toolkit = MCPToolkit(config_dict=config_dict, timeout=180)
        await mcp_toolkit.connect()

-        traceroot_logger.info(
-            f"Successfully connected to MCP toolkit with {len(mcp_server['mcpServers'])} servers"
-        )
+        logger.info(f"Successfully connected to MCP toolkit with {len(mcp_server['mcpServers'])} servers")
        tools = mcp_toolkit.get_tools()
        if tools:
            tool_names = [
@ -1966,11 +1913,11 @@ async def get_mcp_tools(mcp_server: McpServers):
                )
                for tool in tools
            ]
-            traceroot_logger.debug(f"MCP tool names: {tool_names}")
+            logging.debug(f"MCP tool names: {tool_names}")
        return tools
    except asyncio.CancelledError:
-        traceroot_logger.info("MCP connection cancelled during get_mcp_tools")
+        logger.info("MCP connection cancelled during get_mcp_tools")
        return []
    except Exception as e:
-        traceroot_logger.error(f"Failed to connect MCP toolkit: {e}", exc_info=True)
+        logger.error(f"Failed to connect MCP toolkit: {e}", exc_info=True)
        return []
--- a/backend/app/utils/cookie_manager.py
+++ b/backend/app/utils/cookie_manager.py
@ -15,11 +15,11 @@
 import sqlite3
 import os
 from typing import List, Dict, Optional
-from utils import traceroot_wrapper as traceroot
+import logging
 import shutil
 from datetime import datetime

-logger = traceroot.get_logger("cookie_manager")
+logger = logging.getLogger("cookie_manager")


 class CookieManager:
--- a/backend/app/utils/listen/toolkit_listen.py
+++ b/backend/app/utils/listen/toolkit_listen.py
@ -28,9 +28,9 @@ from app.service.task import (
 )
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
 from app.service.task import process_task
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("toolkit_listen")
+logger = logging.getLogger("toolkit_listen")


 def _safe_put_queue(task_lock, data):
--- a/backend/app/utils/oauth_state_manager.py
+++ b/backend/app/utils/oauth_state_manager.py
@ -18,8 +18,8 @@ OAuth authorization state manager for background authorization flows
 import threading
 from typing import Dict, Optional, Literal, Any
 from datetime import datetime
-from utils import traceroot_wrapper as traceroot
-logger = traceroot.get_logger("main")
+import logging
+logger = logging.getLogger("main")

 AuthStatus = Literal["pending", "authorizing", "success", "failed", "cancelled"]

--- a/backend/app/utils/server/sync_step.py
+++ b/backend/app/utils/server/sync_step.py
@ -20,9 +20,9 @@ import json
 from app.service.chat_service import Chat
 from app.component.environment import env
 from app.service.task import get_task_lock_if_exists
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("sync_step")
+logger = logging.getLogger("sync_step")


 def sync_step(func):
@ -65,6 +65,12 @@ def sync_step(func):
                    task_id = chat.task_id

            if task_id:
+                # TODO: Filter out unnecessary events to avoid database bloat
+                # - Skip "decompose_text" streaming events (sent 50-200+ times per task)
+                # - Only sync structural events: decompose_progress, task_state, create_agent, etc.
+                # - Consider batching or deduplication for high-frequency events
+                # - Extract and add task dependencies for analytics
+
                asyncio.create_task(
                    send_to_api(
                        sync_url,
--- a/backend/app/utils/single_agent_worker.py
+++ b/backend/app/utils/single_agent_worker.py
@ -16,7 +16,7 @@ import datetime
 from camel.agents.chat_agent import AsyncStreamingChatAgentResponse
 from camel.societies.workforce.single_agent_worker import SingleAgentWorker as BaseSingleAgentWorker
 from camel.tasks.task import Task, TaskState, is_task_result_insufficient
-from utils import traceroot_wrapper as traceroot
+import logging

 from app.utils.agent import ListenChatAgent
 from camel.societies.workforce.prompts import PROCESS_TASK_PROMPT
@ -24,7 +24,7 @@ from colorama import Fore
 from camel.societies.workforce.utils import TaskResult
 from camel.utils.context_utils import ContextUtility

-logger = traceroot.get_logger("single_agent_worker")
+logger = logging.getLogger("single_agent_worker")


 class SingleAgentWorker(BaseSingleAgentWorker):
--- a/backend/app/utils/telemetry/.env.example
+++ b/backend/app/utils/telemetry/.env.example
@ -0,0 +1,3 @@
+LANGFUSE_PUBLIC_KEY=pk-lf-...
+LANGFUSE_SECRET_KEY=sk-lf-...
+LANGFUSE_BASE_URL=https://us.cloud.langfuse.com  # Optional, defaults to US cloud
--- a/backend/app/utils/telemetry/README.md
+++ b/backend/app/utils/telemetry/README.md
@ -0,0 +1,204 @@
+# Workforce Telemetry
+
+OpenTelemetry-based telemetry for CAMEL workforce events, sent to Langfuse for observability.
+
+## Configuration
+
+Add the following environment variables to `~/.eigent/.env`:
+
+```bash
+LANGFUSE_PUBLIC_KEY=pk-lf-...
+LANGFUSE_SECRET_KEY=sk-lf-...
+LANGFUSE_BASE_URL=https://us.cloud.langfuse.com  # Optional, defaults to US cloud
+```
+
+**If these keys are not specified, telemetry will be disabled.**
+
+## Langfuse Setup
+
+- **Cloud**: Sign up at [Langfuse Cloud](https://cloud.langfuse.com)
+- **Self-hosted**: Use the [open-source version](https://langfuse.com/self-hosting)
+- **Documentation**: [https://langfuse.com/docs](https://langfuse.com/docs)
+
+## Privacy
+
+Only **metadata** is captured (task IDs, timings, model names, token counts, quality scores). **No PII or detailed task content** is sent to Langfuse.
+
+## Architecture
+
+### Singleton TracerProvider
+
+The `TracerProvider` is initialized once during FastAPI startup (`main.py`) to ensure only one `BatchSpanProcessor` is running, regardless of how many `WorkforceMetricsCallback` instances are created. This prevents:
+
+- Resource leaks from multiple background export threads
+- OOM issues from unbounded span queuing (max queue: 4096 spans)
+- Excessive memory usage across multiple workforce sessions
+
+The initialization happens in the startup event:
+
+```python
+@api.on_event("startup")
+async def startup_event():
+    from app.utils.telemetry.workforce_metrics import initialize_tracer_provider
+    initialize_tracer_provider()
+```
+
+### Batch Processing Configuration
+
+- `max_queue_size`: 4096 spans (drops oldest when full)
+- `export_timeout_millis`: 30000 (30s timeout for exports)
+- `schedule_delay_millis`: 3000 (exports every 3s)
+- `max_export_batch_size`: 1024 (max spans per export)
+
+## Span Structure
+
+All spans share common resource attributes and scope information:
+
+```json
+{
+  "resourceAttributes": {
+    "service.name": "eigent-workforce",
+    "eigent.project.id": "1768815931733-6575",
+    "eigent.task.id": "1768815944094-9806"
+  },
+  "scope": {
+    "name": "eigent.workforce",
+    "version": "0.2.83a9"
+  }
+}
+```
+
+### worker.created
+
+Emitted when a worker is created.
+
+```json
+{
+  "attributes": {
+    "eigent.worker.id": "73d20286-2c17-467a-8153-2a6ea8cbb6c2",
+    "eigent.worker.type": "SingleAgentWorker",
+    "eigent.worker.role": "Developer Agent: A master-level coding...",
+    "eigent.worker.agent": "developer_agent",
+    "eigent.worker.model.type": "gpt-4.1-mini"
+  }
+}
+```
+
+### task.created
+
+Emitted when a task is created.
+
+```json
+{
+  "attributes": {
+    "eigent.task.id": "1768815944094-9806.1",
+    "eigent.task.description": "Task description",
+    "eigent.project.id": "1768815931733-6575",
+    "eigent.task.parent_id": "1768815944094-9806",
+    "eigent.task.type": "task_type"
+  }
+}
+```
+
+### task.assigned
+
+Emitted when a task is assigned to a worker.
+
+```json
+{
+  "attributes": {
+    "eigent.task.id": "1768815944094-9806.1",
+    "eigent.worker.id": "0fae2d3d-7c0a-4b50-b09d-da35ae61786d",
+    "eigent.project.id": "1768815931733-6575",
+    "eigent.task.queue_time_seconds": "1.5",
+    "eigent.task.dependencies": "[\"dep_1\", \"dep_2\"]"
+  }
+}
+```
+
+### task.execution:{task_id}
+
+Long-running span tracking task execution from start to completion.
+
+```json
+{
+  "attributes": {
+    "eigent.task.id": "1768815944094-9806.1",
+    "eigent.project.id": "1768815931733-6575",
+    "eigent.task.status": "completed",
+    "eigent.worker.id": "0fae2d3d-7c0a-4b50-b09d-da35ae61786d",
+    "eigent.task.timestamp": "2026-01-19T09:46:40.045077+00:00",
+    "eigent.task.parent_id": "1768815944094-9806",
+    "eigent.task.processing_time_seconds": "10.926168203353882",
+    "eigent.task.quality_score": "80",
+    "eigent.task.token_usage.total_tokens": "37284"
+  }
+}
+```
+
+### workforce.all_tasks_completed
+
+Emitted when all tasks in the workforce are completed.
+
+```json
+{
+  "attributes": {
+    "eigent.project.id": "1768815931733-6575",
+    "eigent.task.id": "1768815944094-9806",
+    "eigent.task.timestamp": "2026-01-19T09:46:44.901068+00:00",
+    "workforce.total_tasks": "5"
+  }
+}
+```
+
+### log.message
+
+Emitted for error and critical log messages.
+
+```json
+{
+  "attributes": {
+    "log.level": "error",
+    "log.message": "Error message",
+    "eigent.project.id": "1768815931733-6575"
+  }
+}
+```
+
+## Captured Attributes Reference
+
+### Project & Task
+
+- `eigent.project.id` - Workforce/project identifier
+- `eigent.task.id` - Task identifier
+- `eigent.task.description` - Task description
+- `eigent.task.parent_id` - Parent task ID
+- `eigent.task.type` - Task type
+- `eigent.task.status` - Task status (started, completed, failed)
+- `eigent.task.timestamp` - ISO 8601 timestamp
+- `eigent.task.dependencies` - JSON array of dependency task IDs
+- `eigent.task.queue_time_seconds` - Time in queue before assignment
+- `eigent.task.processing_time_seconds` - Task execution duration
+- `eigent.task.quality_score` - Quality score (0-100)
+
+### Worker
+
+- `eigent.worker.id` - Worker UUID
+- `eigent.worker.type` - Worker class type
+- `eigent.worker.role` - Worker role description
+- `eigent.worker.agent` - Agent type (developer_agent, browser_agent, etc.)
+- `eigent.worker.model.type` - Model name (gpt-4, claude-3, etc.)
+
+### Token Usage
+
+- `eigent.task.token_usage.total_tokens` - Total tokens used
+- `eigent.task.token_usage.*` - Additional token usage metrics
+
+### Langfuse
+
+- `langfuse.session.id` - Set to project ID for grouping
+- `langfuse.tags` - ["workforce", "camel", "eigent"]
+
+### Workforce
+
+- `workforce.total_tasks` - Total number of tasks completed
--- a/backend/app/utils/telemetry/init.py
+++ b/backend/app/utils/telemetry/init.py
--- a/backend/app/utils/telemetry/workforce_metrics.py
+++ b/backend/app/utils/telemetry/workforce_metrics.py
@ -0,0 +1,560 @@
+# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
+
+import base64
+import json
+import logging
+import os
+import re
+from typing import Any, Dict
+
+import camel
+from camel.societies.workforce.events import (LogEvent, TaskAssignedEvent,
+                                              TaskCompletedEvent,
+                                              TaskCreatedEvent,
+                                              TaskFailedEvent,
+                                              TaskStartedEvent,
+                                              WorkerCreatedEvent)
+from camel.societies.workforce.workforce_metrics import WorkforceMetrics
+from opentelemetry import trace
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import \
+    OTLPSpanExporter
+from opentelemetry.sdk.resources import SERVICE_NAME, Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from opentelemetry.trace import Status, StatusCode
+
+logger = logging.getLogger(__name__)
+
+# Environment variable keys
+ENV_LANGFUSE_PUBLIC_KEY = "LANGFUSE_PUBLIC_KEY"
+ENV_LANGFUSE_SECRET_KEY = "LANGFUSE_SECRET_KEY"
+ENV_LANGFUSE_BASE_URL = "LANGFUSE_BASE_URL"
+ENV_OTEL_EXPORTER_OTLP_ENDPOINT = "OTEL_EXPORTER_OTLP_ENDPOINT"
+ENV_OTEL_EXPORTER_OTLP_HEADERS = "OTEL_EXPORTER_OTLP_HEADERS"
+
+# Default values
+DEFAULT_LANGFUSE_BASE_URL = "https://us.cloud.langfuse.com"
+DEFAULT_LANGFUSE_TAGS = ["workforce", "camel", "eigent"]
+LANGFUSE_OTEL_PATH = "/api/public/otel"
+
+# Attribute keys for eigent.project namespace
+ATTR_PROJECT_ID = "eigent.project.id"
+
+# Attribute keys for eigent.task namespace
+ATTR_TASK_ID = "eigent.task.id"
+ATTR_TASK_DESCRIPTION = "eigent.task.description"
+ATTR_TASK_PARENT_ID = "eigent.task.parent_id"
+ATTR_TASK_TYPE = "eigent.task.type"
+ATTR_TASK_STATUS = "eigent.task.status"
+ATTR_TASK_QUEUE_TIME_SECONDS = "eigent.task.queue_time_seconds"
+ATTR_TASK_PROCESSING_TIME_SECONDS = "eigent.task.processing_time_seconds"
+ATTR_TASK_QUALITY_SCORE = "eigent.task.quality_score"
+ATTR_TASK_TIMESTAMP = "eigent.task.timestamp"
+ATTR_TASK_DEPENDENCIES = "eigent.task.dependencies"
+
+# Attribute keys for eigent.worker namespace
+ATTR_WORKER_ID = "eigent.worker.id"
+ATTR_WORKER_TYPE = "eigent.worker.type"
+ATTR_WORKER_ROLE = "eigent.worker.role"
+ATTR_WORKER_AGENT = "eigent.worker.agent"
+ATTR_WORKER_MODEL_TYPE = "eigent.worker.model.type"
+
+# Attribute keys for workforce namespace
+ATTR_WORKFORCE_TOTAL_TASKS = "workforce.total_tasks"
+
+# Langfuse-specific attributes
+ATTR_LANGFUSE_SESSION_ID = "langfuse.session.id"
+ATTR_LANGFUSE_TAGS = "langfuse.tags"
+
+# OpenTelemetry service and tracer names
+SERVICE_NAME_WORKFORCE = "eigent-workforce"
+TRACER_NAME_WORKFORCE = "eigent.workforce"
+
+# Span names
+SPAN_WORKFORCE_EXECUTION = "workforce.execution"
+SPAN_WORKER_CREATED = "worker.created"
+SPAN_TASK_CREATED = "task.created"
+SPAN_TASK_ASSIGNED = "task.assigned"
+SPAN_TASK_EXECUTION = "task.execution"
+SPAN_LOG_MESSAGE = "log.message"
+SPAN_ALL_TASKS_COMPLETED = "workforce.all_tasks_completed"
+
+# Global tracer provider singleton to avoid creating multiple processors
+# This is initialized once during FastAPI startup
+_GLOBAL_TRACER_PROVIDER: TracerProvider = None
+
+
+def initialize_tracer_provider() -> None:
+    """Initialize the global TracerProvider during application startup.
+
+    Should be called once during FastAPI startup event.
+    This ensures we only have one BatchSpanProcessor running,
+    preventing resource leaks when multiple WorkforceMetricsCallback
+    instances are created.
+    """
+    global _GLOBAL_TRACER_PROVIDER
+
+    if _GLOBAL_TRACER_PROVIDER is not None:
+        logger.warning("TracerProvider already initialized, skipping")
+        return
+
+    # Get configuration from environment
+    langfuse_public_key = os.getenv(ENV_LANGFUSE_PUBLIC_KEY)
+    langfuse_secret_key = os.getenv(ENV_LANGFUSE_SECRET_KEY)
+    langfuse_base_url = os.getenv(ENV_LANGFUSE_BASE_URL,
+                                  DEFAULT_LANGFUSE_BASE_URL)
+
+    # Create resource with service information
+    resource = Resource(attributes={SERVICE_NAME: SERVICE_NAME_WORKFORCE})
+
+    # Create tracer provider
+    provider = TracerProvider(resource=resource)
+
+    # Configure OTLP exporter for Langfuse if credentials are available
+    if langfuse_public_key and langfuse_secret_key:
+        logger.info("Initializing Langfuse telemetry")
+        # Set environment variables for OTLP exporter
+        endpoint_url = _create_langfuse_endpoint(langfuse_base_url)
+        os.environ[ENV_OTEL_EXPORTER_OTLP_ENDPOINT] = endpoint_url
+        auth_header = _create_basic_auth_header(langfuse_public_key,
+                                                langfuse_secret_key)
+        os.environ[ENV_OTEL_EXPORTER_OTLP_HEADERS] = auth_header
+
+        # Create exporter using environment variables
+        exporter = OTLPSpanExporter()
+
+        # Use BatchSpanProcessor for async/non-blocking export
+        # Configure max_queue_size to prevent OOM when exporter fails
+        # Configure export_timeout to fail fast if endpoint is down
+        processor = BatchSpanProcessor(
+            exporter,
+            max_queue_size=4096,  # Drop spans if queue is full
+            export_timeout_millis=30000,  # 30s timeout
+            schedule_delay_millis=3000,  # Export every 3s
+            max_export_batch_size=1024,  # Export up to 1024 spans
+        )
+        provider.add_span_processor(processor)
+        logger.info("Langfuse telemetry initialized successfully")
+    else:
+        logger.info("Langfuse credentials not found, telemetry disabled")
+
+    _GLOBAL_TRACER_PROVIDER = provider
+
+
+def get_tracer_provider() -> TracerProvider:
+    """Get the global TracerProvider instance.
+
+    Returns:
+        TracerProvider: The global tracer provider
+
+    Raises:
+        RuntimeError: If called before initialization
+    """
+    if _GLOBAL_TRACER_PROVIDER is None:
+        raise RuntimeError(
+            "TracerProvider not initialized. "
+            "Call initialize_tracer_provider() during app startup.")
+    return _GLOBAL_TRACER_PROVIDER
+
+
+def _create_langfuse_endpoint(base_url: str) -> str:
+    """Create Langfuse OTLP endpoint URL.
+
+    Args:
+        base_url: Langfuse base URL
+
+    Returns:
+        Full OTLP endpoint URL
+    """
+    return f"{base_url}{LANGFUSE_OTEL_PATH}"
+
+
+def _create_basic_auth_header(public_key: str, secret_key: str) -> str:
+    """Create Basic Authentication header for Langfuse.
+
+    Args:
+        public_key: Langfuse public key
+        secret_key: Langfuse secret key
+
+    Returns:
+        Authorization header value (e.g., "Authorization=Basic ...")
+    """
+    credentials = f"{public_key}:{secret_key}"
+    encoded = base64.b64encode(credentials.encode()).decode()
+    return f"Authorization=Basic {encoded}"
+
+
+class WorkforceMetricsCallback(WorkforceMetrics):
+    """OpenTelemetry metrics callback for workforce events.
+
+    Sends workforce events to Langfuse via OTLP protocol.
+    Events are sent as spans with rich attributes.
+
+    Benefits:
+    - Async/non-blocking: Uses BatchSpanProcessor for background export
+    - Automatic batching: Reduces network overhead
+    - Distributed tracing: Task dependencies shown as parent-child spans
+    """
+
+    def __init__(self, project_id: str, task_id: str):
+        """Initialize OpenTelemetry metrics callback.
+
+        Uses a global shared TracerProvider to avoid creating multiple
+        BatchSpanProcessor instances, which would lead to resource leaks.
+
+        Args:
+            project_id: The project/workforce identifier
+            task_id: The task identifier
+
+        Environment variables:
+            LANGFUSE_PUBLIC_KEY: Langfuse public key (required)
+            LANGFUSE_SECRET_KEY: Langfuse secret key (required)
+            LANGFUSE_BASE_URL: Langfuse base URL
+                (optional, defaults to "https://us.cloud.langfuse.com")
+        """
+        super().__init__()
+        self.project_id = project_id
+        self.task_id = task_id
+
+        # Check if telemetry is enabled
+        langfuse_public_key = os.getenv(ENV_LANGFUSE_PUBLIC_KEY)
+        langfuse_secret_key = os.getenv(ENV_LANGFUSE_SECRET_KEY)
+        self.enabled = bool(langfuse_public_key and langfuse_secret_key)
+
+        # Initialize tracer and root_span as None by default
+        self.tracer = None
+        self.root_span = None
+
+        # Only initialize OpenTelemetry resources when telemetry is enabled
+        if self.enabled:
+            # Get the global shared tracer provider
+            # This ensures only one BatchSpanProcessor is running
+            provider = get_tracer_provider()
+
+            # Get tracer from the shared provider
+            # Use CAMEL version for instrumentation versioning
+            self.tracer = provider.get_tracer(TRACER_NAME_WORKFORCE,
+                                              camel.__version__)
+            self.root_span = self.tracer.start_span(
+                f"{SPAN_WORKFORCE_EXECUTION}:{task_id}")
+            # Langfuse-specific attributes
+            self.root_span.set_attribute(ATTR_LANGFUSE_SESSION_ID, project_id)
+            tags = json.dumps(DEFAULT_LANGFUSE_TAGS.copy())
+            self.root_span.set_attribute(ATTR_LANGFUSE_TAGS, tags)
+            # Custom attributes
+            self.root_span.set_attribute(ATTR_PROJECT_ID, project_id)
+            self.root_span.set_attribute(ATTR_TASK_ID, task_id)
+
+        # Track active spans for task execution
+        self.task_spans = {}
+
+        # Track quality scores (task_id -> quality_score)
+        self.task_quality_scores = {}
+
+    def log_worker_created(self,
+                           event: WorkerCreatedEvent,
+                           agent_class: str = None,
+                           model_type: str = None,
+                           **kwargs) -> None:
+        """Log worker creation as a span.
+
+        Args:
+            event: Worker creation event from CAMEL
+            agent_class: Agent class name (optional)
+            model_type: Model type (optional)
+            **kwargs: Additional unused arguments for compatibility
+        """
+        if not self.enabled:
+            return
+
+        # Create span as child of root span using context
+        ctx = trace.set_span_in_context(self.root_span)
+        with self.tracer.start_as_current_span(SPAN_WORKER_CREATED,
+                                               context=ctx) as span:
+            # Eigent-specific attributes
+            span.set_attribute(ATTR_WORKER_ID, event.worker_id)
+            span.set_attribute(ATTR_WORKER_TYPE, event.worker_type)
+            span.set_attribute(ATTR_WORKER_ROLE, event.role)
+
+            if agent_class:
+                span.set_attribute(ATTR_WORKER_AGENT, agent_class)
+            if model_type:
+                span.set_attribute(ATTR_WORKER_MODEL_TYPE, model_type)
+
+            span.set_status(Status(StatusCode.OK))
+
+    def log_task_created(self, event: TaskCreatedEvent) -> None:
+        """Log task creation as a span.
+
+        Args:
+            event: Task created event from CAMEL
+        """
+        if not self.enabled:
+            return
+
+        ctx = trace.set_span_in_context(self.root_span)
+        with self.tracer.start_as_current_span(SPAN_TASK_CREATED,
+                                               context=ctx) as span:
+            span.set_attribute(ATTR_TASK_ID, event.task_id)
+            span.set_attribute(ATTR_TASK_DESCRIPTION, event.description)
+            span.set_attribute(ATTR_PROJECT_ID, self.project_id)
+
+            if event.parent_task_id:
+                span.set_attribute(ATTR_TASK_PARENT_ID, event.parent_task_id)
+            if event.task_type:
+                span.set_attribute(ATTR_TASK_TYPE, event.task_type)
+
+            span.set_status(Status(StatusCode.OK))
+
+    def log_task_assigned(self, event: TaskAssignedEvent) -> None:
+        """Log task assignment as a span.
+
+        Args:
+            event: Task assignment event from CAMEL
+        """
+        if not self.enabled:
+            return
+
+        ctx = trace.set_span_in_context(self.root_span)
+        with self.tracer.start_as_current_span(SPAN_TASK_ASSIGNED,
+                                               context=ctx) as span:
+            span.set_attribute(ATTR_TASK_ID, event.task_id)
+            span.set_attribute(ATTR_WORKER_ID, event.worker_id)
+            span.set_attribute(ATTR_PROJECT_ID, self.project_id)
+
+            if event.queue_time_seconds is not None:
+                span.set_attribute(ATTR_TASK_QUEUE_TIME_SECONDS,
+                                   event.queue_time_seconds)
+
+            # Add dependencies as JSON array
+            if event.dependencies:
+                deps_json = json.dumps(event.dependencies)
+                span.set_attribute(ATTR_TASK_DEPENDENCIES, deps_json)
+
+            span.set_status(Status(StatusCode.OK))
+
+    def log_task_started(self, event: TaskStartedEvent) -> None:
+        """Log task start and create a span for the task execution.
+
+        Args:
+            event: Task started event from CAMEL
+        """
+        if not self.enabled:
+            return
+
+        # Start a long-running span for task execution as child of root span
+        ctx = trace.set_span_in_context(self.root_span)
+        span = self.tracer.start_span(f"{SPAN_TASK_EXECUTION}:{event.task_id}",
+                                      context=ctx)
+        span.set_attribute(ATTR_TASK_ID, event.task_id)
+        worker_id = event.worker_id if hasattr(event,
+                                               'worker_id') else 'unknown'
+        span.set_attribute(ATTR_WORKER_ID, worker_id)
+        span.set_attribute(ATTR_PROJECT_ID, self.project_id)
+        span.set_attribute(ATTR_TASK_STATUS, "started")
+
+        # Store span to end it later
+        self.task_spans[event.task_id] = span
+
+    def log_task_completed(self, event: TaskCompletedEvent) -> None:
+        """Log task completion and end the execution span.
+
+        Args:
+            event: Task completion event from CAMEL
+        """
+        if not self.enabled:
+            return
+
+        # End the execution span if it exists
+        if event.task_id in self.task_spans:
+            span = self.task_spans.pop(event.task_id)
+            span.set_attribute(ATTR_TASK_STATUS, "completed")
+            span.set_attribute(ATTR_WORKER_ID, event.worker_id)
+
+            # Add timestamp as ISO string
+            if hasattr(event, 'timestamp') and event.timestamp:
+                span.set_attribute(ATTR_TASK_TIMESTAMP,
+                                   event.timestamp.isoformat())
+
+            if event.parent_task_id:
+                span.set_attribute(ATTR_TASK_PARENT_ID, event.parent_task_id)
+            if event.processing_time_seconds is not None:
+                span.set_attribute(ATTR_TASK_PROCESSING_TIME_SECONDS,
+                                   event.processing_time_seconds)
+
+            # Check for quality score from parsed log messages first
+            if event.task_id in self.task_quality_scores:
+                quality_score = self.task_quality_scores.pop(event.task_id)
+                span.set_attribute(ATTR_TASK_QUALITY_SCORE, quality_score)
+            # Fallback to event attributes if available
+            elif hasattr(event,
+                         'quality_score') and event.quality_score is not None:
+                span.set_attribute(ATTR_TASK_QUALITY_SCORE,
+                                   event.quality_score)
+            elif hasattr(
+                    event, 'metadata'
+            ) and event.metadata and 'quality_score' in event.metadata:
+                span.set_attribute(ATTR_TASK_QUALITY_SCORE,
+                                   event.metadata['quality_score'])
+
+            if event.token_usage:
+                # Store all token usage as custom attributes
+                for key, value in event.token_usage.items():
+                    span.set_attribute(f"eigent.task.token_usage.{key}", value)
+
+            span.set_status(Status(StatusCode.OK))
+            span.end()
+
+    def log_task_failed(self, event: TaskFailedEvent) -> None:
+        """Log task failure and end the execution span with error status.
+
+        Args:
+            event: Task failure event from CAMEL
+        """
+        if not self.enabled:
+            return
+
+        # End the execution span with error if it exists
+        if event.task_id in self.task_spans:
+            span = self.task_spans.pop(event.task_id)
+            span.set_attribute(ATTR_TASK_STATUS, "failed")
+            # TODO: add error.message
+
+            if event.parent_task_id:
+                span.set_attribute(ATTR_TASK_PARENT_ID, event.parent_task_id)
+            if event.worker_id:
+                span.set_attribute(ATTR_WORKER_ID, event.worker_id)
+
+            span.set_status(Status(StatusCode.ERROR, event.error_message))
+            span.end()
+
+    def log_message(self, log_event: LogEvent) -> None:
+        """Log error and critical messages as span events.
+        Also parse quality scores from info-level task completion messages.
+
+        Args:
+            log_event: LogEvent from CAMEL
+        """
+        if not self.enabled:
+            return
+
+        # Parse quality score from info-level messages
+        # Pattern: "Task <task_id> completed successfully (quality score: X)."
+        # TODO: add this from the camel
+        if log_event.level == "info":
+            pattern = (r'Task\s+(\S+)\s+completed successfully'
+                       r'.*quality score:\s*(\d+)')
+            match = re.search(pattern, log_event.message)
+            if match:
+                task_id = match.group(1)
+                quality_score = int(match.group(2))
+                self.task_quality_scores[task_id] = quality_score
+
+        # Only log errors and critical messages
+        if log_event.level in ["error", "critical"]:
+            ctx = trace.set_span_in_context(self.root_span)
+            with self.tracer.start_as_current_span(SPAN_LOG_MESSAGE,
+                                                   context=ctx) as span:
+                span.set_attribute("log.level", log_event.level)
+                span.set_attribute("log.message", log_event.message)
+                span.set_attribute(ATTR_PROJECT_ID, self.project_id)
+
+                # Add metadata if available
+                if log_event.metadata:
+                    for key, value in log_event.metadata.items():
+                        span.set_attribute(f"log.{key}", str(value))
+
+                # Set span status based on log level
+                if log_event.level == "critical":
+                    span.set_status(Status(StatusCode.ERROR,
+                                           log_event.message))
+
+    def log_all_tasks_completed(self, event) -> None:
+        """Log when all tasks in the workforce are completed.
+
+        Args:
+            event: All tasks completed event from CAMEL
+        """
+        if not self.enabled:
+            return
+
+        ctx = trace.set_span_in_context(self.root_span)
+        with self.tracer.start_as_current_span(SPAN_ALL_TASKS_COMPLETED,
+                                               context=ctx) as span:
+            span.set_attribute(ATTR_PROJECT_ID, self.project_id)
+            span.set_attribute(ATTR_TASK_ID, self.task_id)
+
+            # Add timestamp as ISO string
+            if hasattr(event, 'timestamp') and event.timestamp:
+                span.set_attribute(ATTR_TASK_TIMESTAMP,
+                                   event.timestamp.isoformat())
+
+            if hasattr(event, 'total_tasks'):
+                span.set_attribute(ATTR_WORKFORCE_TOTAL_TASKS,
+                                   event.total_tasks)
+            span.set_status(Status(StatusCode.OK))
+
+        # End the root span when all tasks are completed
+        if self.root_span:
+            self.root_span.set_status(Status(StatusCode.OK))
+            self.root_span.end()
+
+    def dump_to_json(self) -> str:
+        """Dump metrics to JSON string.
+
+        Returns:
+            JSON string representation of metrics
+        """
+        return json.dumps({
+            "project_id": self.project_id,
+            "task_id": self.task_id,
+            "otel_enabled": self.enabled,
+            "active_spans": len(self.task_spans),
+        })
+
+    def get_ascii_tree_representation(self) -> str:
+        """Get ASCII tree representation of workforce metrics.
+
+        Returns:
+            ASCII tree string
+        """
+        active_count = len(self.task_spans)
+        return (f"OpenTelemetry Metrics for project {self.project_id}, "
+                f"task {self.task_id} (active spans: {active_count})")
+
+    def get_kpis(self) -> Dict[str, Any]:
+        """Get key performance indicators.
+
+        Returns:
+            Dictionary of KPIs
+        """
+        return {
+            "project_id": self.project_id,
+            "task_id": self.task_id,
+            "otel_enabled": self.enabled,
+            "active_task_spans": len(self.task_spans),
+        }
+
+    def reset_task_data(self) -> None:
+        """Reset task-specific data.
+
+        This is called when starting a new task.
+        """
+        # End any remaining open task execution spans
+        for _, span in self.task_spans.items():
+            span.set_status(Status(StatusCode.ERROR, "Task interrupted/reset"))
+            span.end()
+
+        self.task_spans.clear()
--- a/backend/app/utils/toolkit/google_calendar_toolkit.py
+++ b/backend/app/utils/toolkit/google_calendar_toolkit.py
@ -21,11 +21,11 @@ from app.service.task import Agents
 from app.utils.listen.toolkit_listen import auto_listen_toolkit
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
 from app.utils.oauth_state_manager import oauth_state_manager
-from utils import traceroot_wrapper as traceroot
+import logging

 from camel.toolkits import GoogleCalendarToolkit as BaseGoogleCalendarToolkit

-logger = traceroot.get_logger("main")
+logger = logging.getLogger("main")

 SCOPES = ['https://www.googleapis.com/auth/calendar']

--- a/backend/app/utils/toolkit/human_toolkit.py
+++ b/backend/app/utils/toolkit/human_toolkit.py
@ -19,9 +19,9 @@ from app.service.task import Action, ActionAskData, ActionNoticeData, get_task_l
 from app.utils.listen.toolkit_listen import auto_listen_toolkit, listen_toolkit
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
 from app.service.task import process_task
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("human_toolkit")
+logger = logging.getLogger("human_toolkit")


@auto_listen_toolkit(BaseToolkit)
--- a/backend/app/utils/toolkit/hybrid_browser_python_toolkit.py
+++ b/backend/app/utils/toolkit/hybrid_browser_python_toolkit.py
@ -31,9 +31,9 @@ from app.exception.exception import ProgramException
 from app.service.task import Agents
 from app.utils.listen.toolkit_listen import auto_listen_toolkit, listen_toolkit
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("hybrid_browser_python_toolkit")
+logger = logging.getLogger("hybrid_browser_python_toolkit")


 class BrowserSession(BaseHybridBrowserSession):
--- a/backend/app/utils/toolkit/hybrid_browser_toolkit.py
+++ b/backend/app/utils/toolkit/hybrid_browser_toolkit.py
@ -30,9 +30,9 @@ from app.component.environment import env
 from app.service.task import Agents
 from app.utils.listen.toolkit_listen import auto_listen_toolkit
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("hybrid_browser_toolkit")
+logger = logging.getLogger("hybrid_browser_toolkit")

 # Global navigation lock to prevent concurrent visit_page conflicts (ERR_ABORTED)
 # This is needed because multiple sessions may share the same browser via CDP
--- a/backend/app/utils/toolkit/notion_mcp_toolkit.py
+++ b/backend/app/utils/toolkit/notion_mcp_toolkit.py
@ -21,9 +21,9 @@ from camel.toolkits import FunctionTool
 from app.component.environment import env
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
 from camel.toolkits.mcp_toolkit import MCPToolkit
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("notion_mcp_toolkit")
+logger = logging.getLogger("notion_mcp_toolkit")

 def _customize_function_parameters(schema: Dict[str, Any]) -> None:
        r"""Customize function parameters for specific functions.
--- a/backend/app/utils/toolkit/search_toolkit.py
+++ b/backend/app/utils/toolkit/search_toolkit.py
@ -21,9 +21,9 @@ from app.component.environment import env, env_not_empty
 from app.service.task import Agents
 from app.utils.listen.toolkit_listen import auto_listen_toolkit, listen_toolkit
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("search_toolkit")
+logger = logging.getLogger("search_toolkit")


@auto_listen_toolkit(BaseSearchToolkit)
--- a/backend/app/utils/toolkit/slack_toolkit.py
+++ b/backend/app/utils/toolkit/slack_toolkit.py
@ -18,9 +18,9 @@ from app.component.environment import env
 from app.service.task import Agents
 from app.utils.listen.toolkit_listen import auto_listen_toolkit
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("slack_toolkit")
+logger = logging.getLogger("slack_toolkit")


@auto_listen_toolkit(BaseSlackToolkit)
--- a/backend/app/utils/toolkit/terminal_toolkit.py
+++ b/backend/app/utils/toolkit/terminal_toolkit.py
@ -29,9 +29,9 @@ from app.service.task import Action, ActionTerminalData, Agents, get_task_lock
 from app.utils.listen.toolkit_listen import auto_listen_toolkit
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
 from app.service.task import process_task
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("terminal_toolkit")
+logger = logging.getLogger("terminal_toolkit")

 # App version - should match electron app version
 # TODO: Consider getting this from a shared config
--- a/backend/app/utils/workforce.py
+++ b/backend/app/utils/workforce.py
@ -13,41 +13,39 @@
 # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========

 import asyncio
+import logging
 from typing import Generator, List, Optional
-from camel.agents import ChatAgent
-from camel.societies.workforce.workforce import (
-    Workforce as BaseWorkforce,
-    WorkforceState,
-    DEFAULT_WORKER_POOL_SIZE,
-)
-from camel.societies.workforce.utils import FailureHandlingConfig
-from camel.societies.workforce.task_channel import TaskChannel
-from camel.societies.workforce.base import BaseNode
-from camel.societies.workforce.utils import TaskAssignResult
-from camel.societies.workforce.workforce_metrics import WorkforceMetrics
-from camel.societies.workforce.events import WorkerCreatedEvent
-from camel.societies.workforce.prompts import TASK_DECOMPOSE_PROMPT
-from camel.tasks.task import Task, TaskState, validate_task_content
+
 from app.component import code
 from app.exception.exception import UserException
+from app.service.task import (Action, ActionAssignTaskData, ActionEndData,
+                              ActionTaskStateData, ActionTimeoutData,
+                              get_camel_task, get_task_lock)
 from app.utils.agent import ListenChatAgent
-from app.service.task import (
-    Action,
-    ActionAssignTaskData,
-    ActionEndData,
-    ActionTaskStateData,
-    ActionTimeoutData,
-    get_camel_task,
-    get_task_lock,
-)
 from app.utils.single_agent_worker import SingleAgentWorker
-from utils import traceroot_wrapper as traceroot
-
-logger = traceroot.get_logger("workforce")
+from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback
+from camel.agents import ChatAgent
+from camel.societies.workforce.base import BaseNode
+from camel.societies.workforce.events import (TaskAssignedEvent,
+                                              TaskCompletedEvent,
+                                              TaskCreatedEvent,
+                                              TaskFailedEvent,
+                                              WorkerCreatedEvent)
+from camel.societies.workforce.prompts import TASK_DECOMPOSE_PROMPT
+from camel.societies.workforce.task_channel import TaskChannel
+from camel.societies.workforce.utils import (FailureHandlingConfig,
+                                             TaskAssignResult)
+from camel.societies.workforce.workforce import DEFAULT_WORKER_POOL_SIZE
+from camel.societies.workforce.workforce import Workforce as BaseWorkforce
+from camel.societies.workforce.workforce import WorkforceState
+from camel.societies.workforce.workforce_metrics import WorkforceMetrics
+from camel.tasks.task import Task, TaskState, validate_task_content

+logger = logging.getLogger("workforce")


 class Workforce(BaseWorkforce):
+
    def __init__(
        self,
        api_task_id: str,
@ -62,9 +60,12 @@ class Workforce(BaseWorkforce):
    ) -> None:
        self.api_task_id = api_task_id
        logger.info("=" * 80)
-        logger.info("🏭 [WF-LIFECYCLE] Workforce.__init__ STARTED", extra={"api_task_id": api_task_id})
+        logger.info("🏭 [WF-LIFECYCLE] Workforce.__init__ STARTED",
+                    extra={"api_task_id": api_task_id})
        logger.info(f"[WF-LIFECYCLE] Workforce id will be: {id(self)}")
-        logger.info(f"[WF-LIFECYCLE] Init params: graceful_shutdown_timeout={graceful_shutdown_timeout}, share_memory={share_memory}")
+        logger.info(
+            f"[WF-LIFECYCLE] Init params: graceful_shutdown_timeout="
+            f"{graceful_shutdown_timeout}, share_memory={share_memory}")
        logger.info("=" * 80)
        super().__init__(
            description=description,
@ -77,12 +78,12 @@ class Workforce(BaseWorkforce):
            use_structured_output_handler=use_structured_output_handler,
            task_timeout_seconds=3600,  # 60 minutes
            failure_handling_config=FailureHandlingConfig(
-                enabled_strategies=["retry", "replan"],
-            ),
+                enabled_strategies=["retry", "replan"], ),
        )
        self.task_agent.stream_accumulate = True
        self.task_agent._stream_accumulate_explicit = True
-        logger.info(f"[WF-LIFECYCLE] ✅ Workforce.__init__ COMPLETED, id={id(self)}")
+        logger.info(
+            f"[WF-LIFECYCLE] ✅ Workforce.__init__ COMPLETED, id={id(self)}")

    def eigent_make_sub_tasks(
        self,
@ -91,28 +92,37 @@ class Workforce(BaseWorkforce):
        on_stream_batch=None,
        on_stream_text=None,
    ):
-        """
-        Split process_task method to eigent_make_sub_tasks and eigent_start method.
+        """Split process_task method to eigent_make_sub_tasks
+        and eigent_start method.

        Args:
            task: The main task to decompose
-            coordinator_context: Optional context ONLY for coordinator agent during decomposition.
-                                This context will NOT be passed to subtasks or worker agents.
-            on_stream_batch: Optional callback for streaming batches signature (List[Task], bool)
-            on_stream_text: Optional callback for raw streaming text chunks
+            coordinator_context: Optional context ONLY for coordinator
+                agent during decomposition. This context will NOT
+                be passed to subtasks or worker agents.
+            on_stream_batch: Optional callback for streaming
+                batches signature (List[Task], bool)
+            on_stream_text: Optional callback for raw
+                streaming text chunks
        """
-        logger.debug("[DECOMPOSE] eigent_make_sub_tasks called", extra={
-            "api_task_id": self.api_task_id,
-            "task_id": task.id
-        })
+        logger.debug("[DECOMPOSE] eigent_make_sub_tasks called",
+                     extra={
+                         "api_task_id": self.api_task_id,
+                         "task_id": task.id
+                     })

        if not validate_task_content(task.content, task.id):
            task.state = TaskState.FAILED
            task.result = "Task failed: Invalid or empty content provided"
-            logger.warning("[DECOMPOSE] Task rejected: Invalid or empty content", extra={
-                "task_id": task.id,
-                "content_preview": task.content[:50] + "..." if len(task.content) > 50 else task.content
-            })
+            logger.warning(
+                "[DECOMPOSE] Task rejected: Invalid or empty content",
+                extra={
+                    "task_id":
+                    task.id,
+                    "content_preview":
+                    task.content[:50] +
+                    "..." if len(task.content) > 50 else task.content
+                })
            raise UserException(code.error, task.result)

        self.reset()
@ -120,42 +130,43 @@ class Workforce(BaseWorkforce):
        self.set_channel(TaskChannel())
        self._state = WorkforceState.RUNNING
        task.state = TaskState.OPEN
-
        subtasks = asyncio.run(
            self.handle_decompose_append_task(
                task,
                reset=False,
                coordinator_context=coordinator_context,
                on_stream_batch=on_stream_batch,
-                on_stream_text=on_stream_text
-            )
-        )
+                on_stream_text=on_stream_text))

-        logger.info(f"[DECOMPOSE] Task decomposition completed", extra={
-            "api_task_id": self.api_task_id,
-            "task_id": task.id,
-            "subtasks_count": len(subtasks)
-        })
+        logger.info("[DECOMPOSE] Task decomposition completed",
+                    extra={
+                        "api_task_id": self.api_task_id,
+                        "task_id": task.id,
+                        "subtasks_count": len(subtasks)
+                    })
        return subtasks

    async def eigent_start(self, subtasks: list[Task]):
        """start the workforce"""
-        logger.debug(f"[WF-LIFECYCLE] eigent_start called with {len(subtasks)} subtasks", extra={
-            "api_task_id": self.api_task_id
-        })
+        logger.debug((f"[WF-LIFECYCLE] eigent_start called with "
+                      f"{len(subtasks)} subtasks"),
+                     extra={"api_task_id": self.api_task_id})
        # Clear existing pending tasks to use the user-edited task list
        # (tasks may have been added during decomposition before user edits)
        self._pending_tasks.clear()
+
        self._pending_tasks.extendleft(reversed(subtasks))
        self.save_snapshot("Initial task decomposition")

        try:
            await self.start()
        except Exception as e:
-            logger.error(f"[WF-LIFECYCLE] Error in workforce execution: {e}", extra={
-                "api_task_id": self.api_task_id,
-                "error": str(e)
-            }, exc_info=True)
+            logger.error(f"[WF-LIFECYCLE] Error in workforce execution: {e}",
+                         extra={
+                             "api_task_id": self.api_task_id,
+                             "error": str(e)
+                         },
+                         exc_info=True)
            self._state = WorkforceState.STOPPED
            raise
        finally:
@ -169,24 +180,24 @@ class Workforce(BaseWorkforce):
                content=task.content,
                child_nodes_info=self._get_child_nodes_info(),
                additional_info=task.additional_info,
-            )
-        )
+            ))

        self.task_agent.reset()
-        result = task.decompose(
-            self.task_agent, decompose_prompt, stream_callback=stream_callback
-        )
+        result = task.decompose(self.task_agent,
+                                decompose_prompt,
+                                stream_callback=stream_callback)

        if isinstance(result, Generator):
+
            def streaming_with_dependencies():
                all_subtasks = []
                for new_tasks in result:
                    all_subtasks.extend(new_tasks)
                    if new_tasks:
                        self._update_dependencies_for_decomposition(
-                            task, all_subtasks
-                        )
+                            task, all_subtasks)
                    yield new_tasks
+
            return streaming_with_dependencies()
        else:
            subtasks = result
@ -202,29 +213,32 @@ class Workforce(BaseWorkforce):
        on_stream_batch=None,
        on_stream_text=None,
    ) -> List[Task]:
-        """
-        Override to support coordinator_context parameter.
-        Handle task decomposition and validation, then append to pending tasks.
+        """Override to support coordinator_context parameter.
+        Handle task decomposition and validation,
+        then append to pending tasks.

        Args:
            task: The task to be processed
-            reset: Should trigger workforce reset (Workforce must not be running)
-            coordinator_context: Optional context ONLY for coordinator during decomposition
-            on_stream_batch: Optional callback for streaming batches signature (List[Task], bool)
+            reset: Should trigger workforce reset
+                (Workforce must not be running)
+            coordinator_context: Optional context ONLY for
+                coordinator during decomposition
+            on_stream_batch: Optional callback for streaming
+                batches signature (List[Task], bool)
            on_stream_text: Optional callback for raw streaming text chunks

        Returns:
            List[Task]: The decomposed subtasks or the original task
        """
-        logger.debug(f"[DECOMPOSE] handle_decompose_append_task called, task_id={task.id}, reset={reset}")
+        logger.debug(f"[DECOMPOSE] handle_decompose_append_task called, "
+                     f"task_id={task.id}, reset={reset}")

        if not validate_task_content(task.content, task.id):
            task.state = TaskState.FAILED
            task.result = "Task failed: Invalid or empty content provided"
-            logger.warning(
-                f"[DECOMPOSE] Task {task.id} rejected: Invalid or empty content. "
-                f"Content preview: '{task.content}'"
-            )
+            logger.warning(f"[DECOMPOSE] Task {task.id} rejected: "
+                           f"Invalid or empty content. "
+                           f"Content preview: '{task.content}'")
            return [task]

        if reset and self._state != WorkforceState.RUNNING:
@ -235,12 +249,15 @@ class Workforce(BaseWorkforce):

        if coordinator_context:
            original_content = task.content
-            task_with_context = coordinator_context + "\n=== CURRENT TASK ===\n" + original_content
+            task_with_context = (coordinator_context +
+                                 "\n=== CURRENT TASK ===\n" + original_content)
            task.content = task_with_context
-            subtasks_result = self._decompose_task(task, stream_callback=on_stream_text)
+            subtasks_result = self._decompose_task(
+                task, stream_callback=on_stream_text)
            task.content = original_content
        else:
-            subtasks_result = self._decompose_task(task, stream_callback=on_stream_text)
+            subtasks_result = self._decompose_task(
+                task, stream_callback=on_stream_text)

        if isinstance(subtasks_result, Generator):
            subtasks = []
@ -252,7 +269,8 @@ class Workforce(BaseWorkforce):
                    except Exception as e:
                        logger.warning(f"Streaming callback failed: {e}")

-            # After consuming the generator, check task.subtasks for final result as fallback
+            # After consuming the generator, check task.subtasks
+            # for final result as fallback
            if not subtasks and task.subtasks:
                subtasks = task.subtasks
        else:
@ -260,9 +278,24 @@ class Workforce(BaseWorkforce):

        if subtasks:
            self._pending_tasks.extendleft(reversed(subtasks))
+            # Log task created events
+            metrics_callbacks = [
+                cb for cb in self._callbacks
+                if isinstance(cb, WorkforceMetrics)
+            ]
+            if metrics_callbacks:
+                for subtask in subtasks:
+                    event = TaskCreatedEvent(
+                        task_id=subtask.id,
+                        description=subtask.content,
+                        parent_task_id=task.id if task else None,
+                        task_type=None,
+                    )
+                    metrics_callbacks[0].log_task_created(event)

        if not subtasks:
-            logger.warning(f"[DECOMPOSE] No subtasks returned, creating fallback task")
+            logger.warning(
+                "[DECOMPOSE] No subtasks returned, creating fallback task")
            fallback_task = Task(
                content=task.content,
                id=f"{task.id}.1",
@ -271,27 +304,73 @@ class Workforce(BaseWorkforce):
            task.subtasks = [fallback_task]
            subtasks = [fallback_task]

+            # Log fallback task created event
+            metrics_callbacks = [
+                cb for cb in self._callbacks
+                if isinstance(cb, WorkforceMetrics)
+            ]
+            if metrics_callbacks:
+                event = TaskCreatedEvent(
+                    task_id=fallback_task.id,
+                    description=fallback_task.content,
+                    parent_task_id=task.id if task else None,
+                    task_type=None,
+                )
+                metrics_callbacks[0].log_task_created(event)
+
        if on_stream_batch:
            try:
                on_stream_batch(subtasks, True)
            except Exception as e:
                logger.warning(f"Final streaming callback failed: {e}")

-        logger.debug(f"[DECOMPOSE] handle_decompose_append_task completed, returned {len(subtasks)} subtasks")
+        logger.debug(f"[DECOMPOSE] handle_decompose_append_task completed, "
+                     f"returned {len(subtasks)} subtasks")
        return subtasks

    def _get_agent_id_from_node_id(self, node_id: str) -> str | None:
-        """Map worker node_id to the actual agent_id for frontend communication.
+        """Map worker node_id to the actual agent_id for
+        frontend communication.

-        The CAMEL base class uses node_id for task assignment, but the frontend
-        uses agent_id to identify agents. This method provides the mapping.
+        The CAMEL base class uses node_id for task assignment,
+        but the frontend uses agent_id to identify agents.
+        This method provides the mapping.
        """
        for child in self._children:
            if hasattr(child, 'node_id') and child.node_id == node_id:
-                if hasattr(child, 'worker') and hasattr(child.worker, 'agent_id'):
+                if hasattr(child, 'worker') and hasattr(
+                        child.worker, 'agent_id'):
                    return child.worker.agent_id
        return None

+    def _extract_model_type(self, agent: ChatAgent) -> Optional[str]:
+        """Extract model type from agent's model_backend.
+
+        Handles both ModelManager (multiple models) and single model cases.
+
+        Args:
+            agent: The chat agent to extract model type from
+
+        Returns:
+            Model type as string, or None if not found
+        """
+        if not hasattr(agent, 'model_backend') or not agent.model_backend:
+            return None
+
+        model_obj = agent.model_backend
+
+        # Handle ModelManager case (multiple models)
+        if hasattr(model_obj, 'models') and model_obj.models:
+            first_model = model_obj.models[0] if model_obj.models else None
+            if first_model:
+                mt = getattr(first_model, 'model_type', None)
+                return str(
+                    mt.value if hasattr(mt, 'value') else mt) if mt else None
+
+        # Handle single model case
+        mt = getattr(model_obj, 'model_type', None)
+        return str(mt.value if hasattr(mt, 'value') else mt) if mt else None
+
    async def _find_assignee(self, tasks: List[Task]) -> TaskAssignResult:
        # Task assignment phase: send "waiting for execution" notification
        # to the frontend, and send "start execution" notification when the
@ -300,8 +379,10 @@ class Workforce(BaseWorkforce):

        task_lock = get_task_lock(self.api_task_id)
        for item in assigned.assignments:
-            # DEBUG ▶ Task has been assigned to which worker and its dependencies
-            logger.debug(f"[WF] ASSIGN {item.task_id} -> {item.assignee_id} deps={item.dependencies}")
+            # DEBUG ▶ Task has been assigned to which worker
+            # and its dependencies
+            logger.debug(f"[WF] ASSIGN {item.task_id} -> {item.assignee_id} "
+                         f"deps={item.dependencies}")
            # The main task itself does not need notification
            if self._task and item.task_id == self._task.id:
                continue
@ -309,33 +390,39 @@ class Workforce(BaseWorkforce):
            task_obj = get_camel_task(item.task_id, tasks)
            if task_obj is None:
                logger.warning(
-                    f"[WF] WARN: Task {item.task_id} not found in tasks list during ASSIGN phase. This may indicate a task tree inconsistency."
-                )
+                    f"[WF] WARN: Task {item.task_id} not found in "
+                    f"tasks list during ASSIGN phase. This may indicate "
+                    f"a task tree inconsistency.")
                content = ""
            else:
                content = task_obj.content

-            # Skip sending notification if this is a retry/replan for an already assigned task
-            # This prevents the frontend from showing "Reassigned" when a task is being retried
-            # with the same or different worker due to failure recovery
+            # Skip sending notification if this is a retry/replan for
+            # an already assigned task
+            # This prevents the frontend from showing "Reassigned"
+            # when a task is being retried with the same or different
+            # worker due to failure recovery
            if task_obj and task_obj.assigned_worker_id:
                logger.debug(
                    f"[WF] ASSIGN Skip notification for task {item.task_id}: "
-                    f"already has assigned_worker_id={task_obj.assigned_worker_id}, "
-                    f"new assignee={item.assignee_id} (retry/replan scenario)"
-                )
+                    f"already has assigned_worker_id="
+                    f"{task_obj.assigned_worker_id}, "
+                    f"new assignee={item.assignee_id} (retry/replan scenario)")
                continue

            # Map node_id to agent_id for frontend communication
-            # The CAMEL base class returns node_id as assignee_id, but the frontend
-            # uses agent_id to identify agents
+            # The CAMEL base class returns node_id as assignee_id,
+            # but the frontend uses agent_id to identify agents
            agent_id = self._get_agent_id_from_node_id(item.assignee_id)
            if agent_id is None:
+                workers = [
+                    c.node_id for c in self._children if hasattr(c, 'node_id')
+                ]
                logger.error(
-                    f"[WF] ERROR: Could not find agent_id for node_id={item.assignee_id}. "
-                    f"Task {item.task_id} will not be properly tracked on frontend. "
-                    f"Available workers: {[c.node_id for c in self._children if hasattr(c, 'node_id')]}"
-                )
+                    f"[WF] ERROR: Could not find agent_id for "
+                    f"node_id={item.assignee_id}. Task {item.task_id} "
+                    f"will not be properly tracked on frontend. "
+                    f"Available workers: {workers}")
                continue  # Skip sending notification for unmapped worker

            # Asynchronously send waiting notification
@ -350,28 +437,47 @@ class Workforce(BaseWorkforce):
                            "state": "waiting",  # Mark as waiting state
                            "failure_count": 0,
                        },
-                    )
-                )
-            )
+                    )))
            # Track the task for cleanup
            task_lock.add_background_task(task)
+
+            metrics_callbacks = [
+                cb for cb in self._callbacks
+                if isinstance(cb, WorkforceMetrics)
+            ]
+            if metrics_callbacks:
+                event = TaskAssignedEvent(
+                    task_id=item.task_id,
+                    worker_id=agent_id,
+                    dependencies=item.dependencies,
+                )
+                metrics_callbacks[0].log_task_assigned(event)
        return assigned

    async def _post_task(self, task: Task, assignee_id: str) -> None:
        # DEBUG ▶ Dependencies are met, the task really starts to execute
        logger.debug(f"[WF] POST  {task.id} -> {assignee_id}")
-        """Override the _post_task method to notify the frontend when the task really starts to execute"""
-        # When the dependency check is passed and the task is about to be published to the execution queue, send a notification to the frontend
+        """Override the _post_task method to notify the frontend
+        when the task really starts to execute
+        """
+        # When the dependency check is passed and the task is
+        # about to be published to the execution queue, send a
+        # notification to the frontend
        task_lock = get_task_lock(self.api_task_id)
-        if self._task and task.id != self._task.id:  # Skip the main task itself
+        if self._task and task.id != self._task.id:
+            # Skip the main task itself
            # Map node_id to agent_id for frontend communication
            agent_id = self._get_agent_id_from_node_id(assignee_id)
+            workers = [
+                c.node_id for c in self._children if hasattr(c, 'node_id')
+            ]
            if agent_id is None:
-                logger.error(
-                    f"[WF] ERROR: Could not find agent_id for node_id={assignee_id}. "
-                    f"Task {task.id} will not be properly tracked on frontend. "
-                    f"Available workers: {[c.node_id for c in self._children if hasattr(c, 'node_id')]}"
-                )
+                logger.error(f"[WF] ERROR: Could not find agent_id "
+                             f"for node_id={assignee_id}. "
+                             f"Task {task.id} will not be properly "
+                             f"tracked on frontend. "
+                             f"Available workers: "
+                             f"{workers}")
            else:
                await task_lock.put_queue(
                    ActionAssignTaskData(
@ -383,9 +489,9 @@ class Workforce(BaseWorkforce):
                            "state": "running",  # running state
                            "failure_count": task.failure_count,
                        },
-                    )
-                )
-        # Call the parent class method to continue the normal task publishing process
+                    ))
+        # Call the parent class method to continue the
+        # normal task publishing process
        await super()._post_task(task, assignee_id)

    def add_single_agent_worker(
@ -396,7 +502,9 @@ class Workforce(BaseWorkforce):
        enable_workflow_memory: bool = False,
    ) -> BaseWorkforce:
        if self._state == WorkforceState.RUNNING:
-            raise RuntimeError("Cannot add workers while workforce is running. Pause the workforce first.")
+            raise RuntimeError(
+                "Cannot add workers while workforce is running. "
+                "Pause the workforce first.")

        # Validate worker agent compatibility
        self._validate_agent_compatibility(worker, "Worker agent")
@ -422,14 +530,32 @@ class Workforce(BaseWorkforce):
        self._start_child_node_when_paused(worker_node.start())

        # Use proper CAMEL pattern for metrics logging
-        metrics_callbacks = [cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)]
+        metrics_callbacks = [
+            cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
+        ]
        if metrics_callbacks:
+            # Collect agent metadata for telemetry
+            agent_class_name = getattr(worker, 'agent_name',
+                                       worker.__class__.__name__)
+            model_type = self._extract_model_type(worker)
+
+            # Log worker created event
            event = WorkerCreatedEvent(
                worker_id=worker_node.node_id,
                worker_type="SingleAgentWorker",
                role=worker_node.description,
            )
-            metrics_callbacks[0].log_worker_created(event)
+
+            # Call log_worker_created for all callbacks
+            for cb in self._callbacks:
+                if isinstance(cb, WorkforceMetricsCallback):
+                    cb.log_worker_created(
+                        event,
+                        agent_class=agent_class_name,
+                        model_type=model_type,
+                    )
+                else:
+                    cb.log_worker_created(event)

        return self

@ -442,8 +568,11 @@ class Workforce(BaseWorkforce):
        is_main_task = self._task and task.id == self._task.id
        task_type = "MAIN TASK" if is_main_task else "SUB-TASK"
        logger.info(f"[TASK-RESULT] {task_type} COMPLETED: {task.id}")
-        logger.info(f"[TASK-RESULT] Content: {task.content[:200]}..." if len(task.content) > 200 else f"[TASK-RESULT] Content: {task.content}")
-        logger.info(f"[TASK-RESULT] Result: {task.result[:500]}..." if task.result and len(str(task.result)) > 500 else f"[TASK-RESULT] Result: {task.result}")
+        logger.info(f"[TASK-RESULT] Content: {task.content[:200]}..." if len(
+            task.content) > 200 else f"[TASK-RESULT] Content: {task.content}")
+        logger.info(f"[TASK-RESULT] Result: {task.result[:500]}..."
+                    if task.result and len(str(task.result)) > 500 else
+                    f"[TASK-RESULT] Result: {task.result}")

        task_data = {
            "task_id": task.id,
@ -452,12 +581,21 @@ class Workforce(BaseWorkforce):
            "result": task.result or "",
            "failure_count": task.failure_count,
        }
-        
-        await task_lock.put_queue(
-            ActionTaskStateData(
-                data=task_data
+
+        await task_lock.put_queue(ActionTaskStateData(data=task_data))
+
+        # Log task completion to metrics
+        metrics_callbacks = [
+            cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
+        ]
+        if metrics_callbacks:
+            # worker_id is required and cannot be None
+            worker_id = getattr(task, 'assigned_worker_id', None) or 'unknown'
+            event = TaskCompletedEvent(
+                task_id=task.id,
+                worker_id=worker_id,
            )
-        )
+            metrics_callbacks[0].log_task_completed(event)

        # IMPORTANT: Sync this subtask's result back to parent.subtasks BEFORE calling super()
        # This fixes the issue where parent.subtasks[i].result is None because CAMEL's
@ -482,17 +620,21 @@ class Workforce(BaseWorkforce):

        result = await super()._handle_failed_task(task)

-        # Only send completion report to frontend when all retries are exhausted
+        # Only send completion report to frontend when all
+        # retries are exhausted
        max_retries = self.failure_handling_config.max_retries
        if task.failure_count < max_retries:
            return result

        error_message = ""
        # Use proper CAMEL pattern for metrics logging
-        metrics_callbacks = [cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)]
+        metrics_callbacks = [
+            cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
+        ]
        if metrics_callbacks and hasattr(metrics_callbacks[0], "log_entries"):
            for entry in reversed(metrics_callbacks[0].log_entries):
-                if entry.get("event_type") == "task_failed" and entry.get("task_id") == task.id:
+                if entry.get("event_type") == "task_failed" and entry.get(
+                        "task_id") == task.id:
                    error_message = entry.get("error_message")
                    break

@ -505,9 +647,18 @@ class Workforce(BaseWorkforce):
                    "state": task.state,
                    "failure_count": task.failure_count,
                    "result": str(error_message),
-                }
-            )
-        )
+                }))
+
+        if metrics_callbacks:
+
+            event = TaskFailedEvent(task_id=task.id, )
+            # Add failure details if available
+            if hasattr(task, 'assigned_worker_id'):
+                event.worker_id = task.assigned_worker_id
+            event.error_message = error_message or str(task.result
+                                                       or "Unknown error")
+            event.failure_count = task.failure_count
+            metrics_callbacks[0].log_task_failed(event)

        return result

@ -532,77 +683,114 @@ class Workforce(BaseWorkforce):
                f"⏰ [WF-TIMEOUT] Task timeout in workforce {self.node_id}. "
                f"Timeout: {self.task_timeout_seconds}s, "
                f"Pending tasks: {len(self._pending_tasks)}, "
-                f"In-flight tasks: {self._in_flight_tasks}"
-            )
+                f"In-flight tasks: {self._in_flight_tasks}")

-            # Try to notify frontend, but don't let notification failure mask the timeout
+            # Try to notify frontend, but don't let
+            # notification failure mask the timeout
            try:
                task_lock = get_task_lock(self.api_task_id)
                timeout_minutes = self.task_timeout_seconds // 60
                await task_lock.put_queue(
                    ActionTimeoutData(
                        data={
-                            "message": f"Task execution timeout: No response received for {timeout_minutes} minutes",
-                            "in_flight_tasks": self._in_flight_tasks,
-                            "pending_tasks": len(self._pending_tasks),
-                            "timeout_seconds": self.task_timeout_seconds,
-                        }
-                    )
-                )
+                            "message":
+                            (f"Task execution timeout: No response received "
+                             f"for {timeout_minutes} minutes"),
+                            "in_flight_tasks":
+                            self._in_flight_tasks,
+                            "pending_tasks":
+                            len(self._pending_tasks),
+                            "timeout_seconds":
+                            self.task_timeout_seconds,
+                        }))
            except Exception as notify_err:
-                logger.error(f"Failed to send timeout notification: {notify_err}")
+                logger.error(
+                    f"Failed to send timeout notification: {notify_err}")
            raise
        except Exception as e:
-            logger.error(
-                f"Error getting returned task {e} in workforce {self.node_id}. "
-                f"Current pending tasks: {len(self._pending_tasks)}, "
-                f"In-flight tasks: {self._in_flight_tasks}"
-            )
+            logger.error(f"Error getting returned task {e} in "
+                         f"workforce {self.node_id}. "
+                         f"Current pending tasks: {len(self._pending_tasks)}, "
+                         f"In-flight tasks: {self._in_flight_tasks}")
            raise

    def stop(self) -> None:
        logger.info("=" * 80)
-        logger.info(f"⏹️  [WF-LIFECYCLE] stop() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
-        logger.info(f"[WF-LIFECYCLE] Current state before stop: {self._state.name}, _running: {self._running}")
+        logger.info("⏹️  [WF-LIFECYCLE] stop() CALLED",
+                    extra={
+                        "api_task_id": self.api_task_id,
+                        "workforce_id": id(self)
+                    })
+        logger.info(f"[WF-LIFECYCLE] Current state before stop: "
+                    f"{self._state.name}, _running: {self._running}")
        logger.info("=" * 80)
        super().stop()
-        logger.info(f"[WF-LIFECYCLE] super().stop() completed, new state: {self._state.name}")
+        logger.info(f"[WF-LIFECYCLE] super().stop() completed, "
+                    f"new state: {self._state.name}")
        task_lock = get_task_lock(self.api_task_id)
        task = asyncio.create_task(task_lock.put_queue(ActionEndData()))
        task_lock.add_background_task(task)
-        logger.info(f"[WF-LIFECYCLE] ✅ ActionEndData queued")
+        logger.info("[WF-LIFECYCLE] ✅ ActionEndData queued")

    def stop_gracefully(self) -> None:
        logger.info("=" * 80)
-        logger.info(f"🛑 [WF-LIFECYCLE] stop_gracefully() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
-        logger.info(f"[WF-LIFECYCLE] Current state before stop_gracefully: {self._state.name}, _running: {self._running}")
+        logger.info("🛑 [WF-LIFECYCLE] stop_gracefully() CALLED",
+                    extra={
+                        "api_task_id": self.api_task_id,
+                        "workforce_id": id(self)
+                    })
+        logger.info(f"[WF-LIFECYCLE] Current state before stop_gracefully: "
+                    f"{self._state.name}, _running: {self._running}")
        logger.info("=" * 80)
        super().stop_gracefully()
-        logger.info(f"[WF-LIFECYCLE] ✅ super().stop_gracefully() completed, new state: {self._state.name}, _running: {self._running}")
+        logger.info(
+            f"[WF-LIFECYCLE] ✅ super().stop_gracefully() completed, "
+            f"new state: {self._state.name}, _running: {self._running}")

    def skip_gracefully(self) -> None:
        logger.info("=" * 80)
-        logger.info(f"⏭️  [WF-LIFECYCLE] skip_gracefully() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
-        logger.info(f"[WF-LIFECYCLE] Current state before skip_gracefully: {self._state.name}, _running: {self._running}")
+        logger.info("⏭️  [WF-LIFECYCLE] skip_gracefully() CALLED",
+                    extra={
+                        "api_task_id": self.api_task_id,
+                        "workforce_id": id(self)
+                    })
+        logger.info(f"[WF-LIFECYCLE] Current state before skip_gracefully: "
+                    f"{self._state.name}, _running: {self._running}")
        logger.info("=" * 80)
        super().skip_gracefully()
-        logger.info(f"[WF-LIFECYCLE] ✅ super().skip_gracefully() completed, new state: {self._state.name}, _running: {self._running}")
+        logger.info(
+            f"[WF-LIFECYCLE] ✅ super().skip_gracefully() completed, "
+            f"new state: {self._state.name}, _running: {self._running}")

    def pause(self) -> None:
        logger.info("=" * 80)
-        logger.info(f"⏸️  [WF-LIFECYCLE] pause() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
-        logger.info(f"[WF-LIFECYCLE] Current state before pause: {self._state.name}, _running: {self._running}")
+        logger.info("⏸️  [WF-LIFECYCLE] pause() CALLED",
+                    extra={
+                        "api_task_id": self.api_task_id,
+                        "workforce_id": id(self)
+                    })
+        logger.info(f"[WF-LIFECYCLE] Current state before pause: "
+                    f"{self._state.name}, _running: {self._running}")
        logger.info("=" * 80)
        super().pause()
-        logger.info(f"[WF-LIFECYCLE] ✅ super().pause() completed, new state: {self._state.name}, _running: {self._running}")
+        logger.info(
+            f"[WF-LIFECYCLE] ✅ super().pause() completed, "
+            f"new state: {self._state.name}, _running: {self._running}")

    def resume(self) -> None:
        logger.info("=" * 80)
-        logger.info(f"▶️  [WF-LIFECYCLE] resume() CALLED", extra={"api_task_id": self.api_task_id, "workforce_id": id(self)})
-        logger.info(f"[WF-LIFECYCLE] Current state before resume: {self._state.name}, _running: {self._running}")
+        logger.info("▶️  [WF-LIFECYCLE] resume() CALLED",
+                    extra={
+                        "api_task_id": self.api_task_id,
+                        "workforce_id": id(self)
+                    })
+        logger.info(f"[WF-LIFECYCLE] Current state before resume: "
+                    f"{self._state.name}, _running: {self._running}")
        logger.info("=" * 80)
        super().resume()
-        logger.info(f"[WF-LIFECYCLE] ✅ super().resume() completed, new state: {self._state.name}, _running: {self._running}")
+        logger.info(
+            f"[WF-LIFECYCLE] ✅ super().resume() completed, "
+            f"new state: {self._state.name}, _running: {self._running}")

    async def cleanup(self) -> None:
        r"""Clean up resources when workforce is done"""
--- a/backend/main.py
+++ b/backend/main.py
@ -24,23 +24,27 @@ _project_root = pathlib.Path(__file__).parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))

-# 1) Load env and init traceroot BEFORE importing modules that get a logger
-from utils import traceroot_wrapper as traceroot
+import logging
+
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+
+# Disable verbose CAMEL logs
+logging.getLogger("camel").setLevel(logging.WARNING)
+logging.getLogger("camel.base_model").setLevel(logging.WARNING)
+logging.getLogger("camel.agents").setLevel(logging.WARNING)
+logging.getLogger("camel.societies").setLevel(logging.WARNING)
+
 from app import api
-
-# Only initialize traceroot if enabled
-if traceroot.is_enabled():
-    from traceroot.integrations.fastapi import connect_fastapi
-    connect_fastapi(api)
-
-# 2) Now safe to import modules that use traceroot.get_logger() at import-time
 from app.component.environment import env
 from app.router import register_routers

-
 os.environ["PYTHONIOENCODING"] = "utf-8"

-app_logger = traceroot.get_logger("main")
+app_logger = logging.getLogger("main")

 # Log application startup
 app_logger.info("Starting Eigent Multi-Agent System API")
@ -82,9 +86,19 @@ async def write_pid_file():
    app_logger.info(f"PID file written: {os.getpid()}")


-# Create task to write PID
-pid_task = asyncio.create_task(write_pid_file())
-app_logger.info("PID write task created")
+# PID task will be created on startup
+pid_task = None
+
+@api.on_event("startup")
+async def startup_event():
+    global pid_task
+    pid_task = asyncio.create_task(write_pid_file())
+    app_logger.info("PID write task created")
+
+    # Initialize telemetry tracer provider
+    from app.utils.telemetry.workforce_metrics import initialize_tracer_provider
+    initialize_tracer_provider()
+    app_logger.info("Telemetry tracer provider initialized")

 # Graceful shutdown handler
 shutdown_event = asyncio.Event()
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@ -17,10 +17,12 @@ dependencies = [
    "inflection>=0.5.1",
    "aiofiles>=24.1.0",
    "openai>=1.99.3,<2",
-    "traceroot>=0.0.7",
    "nodejs-wheel>=22.18.0",
    "numpy>=1.23.0,<2.0.0",
    "debugpy>=1.8.17",
+    "opentelemetry-api>=1.34.1",
+    "opentelemetry-sdk>=1.34.1",
+    "opentelemetry-exporter-otlp-proto-http>=1.34.1",
 ]


--- a/backend/tests/unit/component/test_environment_security.py
+++ b/backend/tests/unit/component/test_environment_security.py
@ -0,0 +1,170 @@
+import os
+import tempfile
+from pathlib import Path
+
+import pytest
+from app.component.environment import env_base_dir, sanitize_env_path
+
+
+def test_none_input_returns_none():
+    """Test that None input returns None."""
+    assert sanitize_env_path(None) is None
+
+
+def test_empty_string_returns_none():
+    """Test that empty string returns None."""
+    assert sanitize_env_path("") is None
+
+
+def test_valid_relative_path():
+    """Test that valid relative path within base dir is accepted."""
+    result = sanitize_env_path("project1.env")
+    assert result is not None
+    assert result.startswith(env_base_dir)
+    assert result.endswith("project1.env")
+
+
+def test_valid_absolute_path_within_base_dir():
+    """Test that absolute path within base directory is accepted."""
+    valid_path = os.path.join(env_base_dir, "valid.env")
+    result = sanitize_env_path(valid_path)
+    assert result == os.path.abspath(valid_path)
+
+
+def test_path_traversal_attack_rejected():
+    """Test that path traversal attempts are rejected."""
+    malicious_paths = [
+        "../../../etc/passwd",
+        "../../.ssh/id_rsa.env",
+        "../outside.env",
+        "subdir/../../outside.env",
+    ]
+    for path in malicious_paths:
+        result = sanitize_env_path(path)
+        # Path traversal should either be rejected
+        # or normalized within base_dir
+        if result:
+            assert result.startswith(env_base_dir), \
+                f"Path traversal not blocked: {path} -> {result}"
+
+
+def test_absolute_path_outside_base_dir_rejected():
+    """Test that absolute paths outside base directory are rejected."""
+    malicious_paths = [
+        "/etc/passwd",
+        "/tmp/evil.env",
+        "/root/.env",
+        str(Path.home() / "evil.env"),
+    ]
+    for path in malicious_paths:
+        result = sanitize_env_path(path)
+        assert result is None, \
+            f"Absolute path outside base dir not rejected: {path}"
+
+
+def test_non_env_extension_rejected():
+    """Test that files without .env extension are rejected."""
+    invalid_paths = [
+        "config.txt",
+        "settings.json",
+        "environment",
+        ".bashrc",
+        "script.py",
+    ]
+    for path in invalid_paths:
+        result = sanitize_env_path(path)
+        assert result is None, \
+            f"Non-.env file not rejected: {path}"
+
+
+def test_nested_valid_path():
+    """Test that valid nested paths within base dir are accepted."""
+    result = sanitize_env_path("projects/project1/config.env")
+    assert result is not None
+    assert result.startswith(env_base_dir)
+    assert result.endswith("config.env")
+
+
+def test_symlink_escape_attempt():
+    """Test that symlinks cannot be used to escape base directory."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create a symlink pointing outside base_dir
+        link_path = os.path.join(env_base_dir, "evil_link.env")
+        target_path = os.path.join(tmpdir, "outside.env")
+
+        # This test only makes sense if we can create symlinks
+        try:
+            if os.path.exists(link_path):
+                os.unlink(link_path)
+            os.symlink(target_path, link_path)
+
+            # The sanitized path should resolve the symlink
+            result = sanitize_env_path("evil_link.env")
+
+            # Either rejected or stays within base_dir after resolution
+            if result:
+                resolved = Path(result).resolve()
+                base_resolved = Path(env_base_dir).resolve()
+                # Check if resolved path is under base directory
+                try:
+                    resolved.relative_to(base_resolved)
+                    # If this succeeds, symlink stayed in base (acceptable)
+                except ValueError:
+                    # Symlink escaped - should have been rejected
+                    pytest.fail(f"Symlink escape not prevented: {result}")
+        except (OSError, NotImplementedError):
+            # Symlinks not supported on this system, skip test
+            pytest.skip("Symlinks not supported")
+        finally:
+            # Cleanup
+            if os.path.exists(link_path):
+                os.unlink(link_path)
+
+
+def test_dot_env_in_filename():
+    """Test that .env can appear in the middle of filename."""
+    result = sanitize_env_path("project.env.backup.env")
+    assert result is not None
+    assert result.endswith(".env")
+
+
+def test_case_sensitivity():
+    """Test handling of different case extensions."""
+    # Only .env (lowercase) should be accepted
+    assert sanitize_env_path("config.ENV") is None
+    assert sanitize_env_path("config.Env") is None
+    assert sanitize_env_path("config.env") is not None
+
+
+def test_special_characters_in_path():
+    """Test handling of special characters in valid paths."""
+    # These should be accepted if they end with .env and stay in base_dir
+    valid_special_chars = [
+        "my-project.env",
+        "project_name.env",
+        "project.2024.env",
+    ]
+    for path in valid_special_chars:
+        result = sanitize_env_path(path)
+        assert result is not None, (f"Valid path with special "
+                                    f"chars rejected: {path}")
+        assert result.startswith(env_base_dir)
+
+
+def test_whitespace_handling():
+    """Test handling of whitespace in paths."""
+    # Whitespace should be preserved in valid paths
+    result = sanitize_env_path("my project.env")
+    assert result is not None
+    assert "my project.env" in result
+
+
+def test_current_directory_traversal():
+    """Test that ./ and current directory references are handled."""
+    result = sanitize_env_path("./project.env")
+    assert result is not None
+    assert result.startswith(env_base_dir)
+
+    result = sanitize_env_path("././project.env")
+    assert result is not None
+    assert result.startswith(env_base_dir)
--- a/backend/tests/unit/utils/telemetry/test_workforce_metrics.py
+++ b/backend/tests/unit/utils/telemetry/test_workforce_metrics.py
@ -0,0 +1,297 @@
+"""Tests for workforce metrics telemetry."""
+from datetime import datetime
+from unittest.mock import MagicMock, Mock, patch
+
+import app.utils.telemetry.workforce_metrics as wm_module
+import pytest
+from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback
+from camel.societies.workforce.events import (LogEvent, TaskAssignedEvent,
+                                              TaskCompletedEvent,
+                                              TaskCreatedEvent,
+                                              TaskFailedEvent,
+                                              TaskStartedEvent,
+                                              WorkerCreatedEvent)
+
+
+@pytest.fixture(autouse=True)
+def reset_global_tracer_provider():
+    """Reset global tracer provider between tests for isolation."""
+    yield
+    # Reset global after each test
+    wm_module._GLOBAL_TRACER_PROVIDER = None
+
+
+@pytest.fixture
+def mock_env_vars():
+    """Mock environment variables for Langfuse."""
+    envs = {
+        "LANGFUSE_PUBLIC_KEY": "test_public_key",
+        "LANGFUSE_SECRET_KEY": "test_secret_key",
+        "LANGFUSE_BASE_URL": "https://test.langfuse.com",
+    }
+    with patch.dict(
+            "os.environ",
+            envs,
+    ):
+        yield
+
+
+@pytest.fixture
+def metrics_callback(mock_env_vars):
+    """Create a WorkforceMetricsCallback instance for testing."""
+    with patch("app.utils.telemetry.workforce_metrics.OTLPSpanExporter"):
+        # Initialize the tracer provider first
+        wm_module.initialize_tracer_provider()
+
+        callback = WorkforceMetricsCallback(project_id="test_project",
+                                            task_id="test_task")
+        # Mock the tracer and spans
+        callback.tracer = Mock()
+        callback.root_span = Mock()
+        yield callback
+
+
+def test_log_worker_created(metrics_callback):
+    """Test log_worker_created function."""
+    event = WorkerCreatedEvent(worker_id="worker_1",
+                               worker_type="test_worker",
+                               role="test_role")
+
+    mock_span = Mock()
+    metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
+        __enter__=Mock(return_value=mock_span), __exit__=Mock()))
+
+    metrics_callback.log_worker_created(event,
+                                        agent_class="TestAgent",
+                                        model_type="gpt-4")
+
+    # Verify span attributes were set
+    assert mock_span.set_attribute.called
+    assert mock_span.set_status.called
+
+
+def test_log_task_created(metrics_callback):
+    """Test log_task_created function."""
+    event = TaskCreatedEvent(
+        task_id="task_1",
+        description="Test task",
+        parent_task_id="parent_1",
+        task_type="test_type",
+    )
+
+    mock_span = Mock()
+    metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
+        __enter__=Mock(return_value=mock_span), __exit__=Mock()))
+
+    metrics_callback.log_task_created(event)
+
+    # Verify span attributes were set
+    assert mock_span.set_attribute.called
+    assert mock_span.set_status.called
+
+
+def test_log_task_assigned(metrics_callback):
+    """Test log_task_assigned function."""
+    event = TaskAssignedEvent(
+        task_id="task_1",
+        worker_id="worker_1",
+        queue_time_seconds=1.5,
+        dependencies=["dep_1", "dep_2"],
+    )
+
+    mock_span = Mock()
+    metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
+        __enter__=Mock(return_value=mock_span), __exit__=Mock()))
+
+    metrics_callback.log_task_assigned(event)
+
+    # Verify span attributes were set
+    assert mock_span.set_attribute.called
+    assert mock_span.set_status.called
+
+
+def test_log_task_started(metrics_callback):
+    """Test log_task_started function."""
+    event = TaskStartedEvent(task_id="task_1", worker_id="worker_1")
+
+    mock_span = Mock()
+    metrics_callback.tracer.start_span = Mock(return_value=mock_span)
+
+    metrics_callback.log_task_started(event)
+
+    # Verify span was created and stored
+    assert "task_1" in metrics_callback.task_spans
+    assert mock_span.set_attribute.called
+
+
+def test_log_task_completed(metrics_callback):
+    """Test log_task_completed function."""
+    # Setup: start a task first
+    task_id = "task_1"
+    mock_span = Mock()
+    metrics_callback.task_spans[task_id] = mock_span
+
+    event = TaskCompletedEvent(
+        task_id=task_id,
+        worker_id="worker_1",
+        parent_task_id="parent_1",
+        processing_time_seconds=2.5,
+        timestamp=datetime.now(),
+        token_usage={
+            "input_tokens": 100,
+            "output_tokens": 50
+        },
+    )
+
+    metrics_callback.log_task_completed(event)
+
+    # Verify span was ended and attributes were set
+    assert task_id not in metrics_callback.task_spans
+    assert mock_span.set_attribute.called
+    assert mock_span.set_status.called
+    assert mock_span.end.called
+
+
+def test_log_task_failed(metrics_callback):
+    """Test log_task_failed function."""
+    # Setup: start a task first
+    task_id = "task_1"
+    mock_span = Mock()
+    metrics_callback.task_spans[task_id] = mock_span
+
+    event = TaskFailedEvent(
+        task_id=task_id,
+        worker_id="worker_1",
+        parent_task_id="parent_1",
+        error_message="Test error",
+    )
+
+    metrics_callback.log_task_failed(event)
+
+    # Verify span was ended with error status
+    assert task_id not in metrics_callback.task_spans
+    assert mock_span.set_attribute.called
+    assert mock_span.set_status.called
+    assert mock_span.end.called
+
+
+def test_log_message_error(metrics_callback):
+    """Test log_message function with error level."""
+    event = LogEvent(level="error",
+                     message="Test error message",
+                     metadata={"key": "value"})
+
+    mock_span = Mock()
+    metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
+        __enter__=Mock(return_value=mock_span), __exit__=Mock()))
+
+    metrics_callback.log_message(event)
+
+    # Verify span was created for error message
+    assert mock_span.set_attribute.called
+
+
+def test_log_message_quality_score(metrics_callback):
+    """Test log_message function parsing quality score."""
+    event = LogEvent(
+        level="info",
+        message="Task task_1 completed successfully (quality score: 85).",
+    )
+
+    metrics_callback.log_message(event)
+
+    # Verify quality score was parsed and stored
+    assert "task_1" in metrics_callback.task_quality_scores
+    assert metrics_callback.task_quality_scores["task_1"] == 85
+
+
+def test_log_all_tasks_completed(metrics_callback):
+    """Test log_all_tasks_completed function."""
+    event = MagicMock()
+    event.timestamp = datetime.now()
+    event.total_tasks = 5
+
+    mock_span = Mock()
+    metrics_callback.tracer.start_as_current_span = Mock(return_value=Mock(
+        __enter__=Mock(return_value=mock_span), __exit__=Mock()))
+
+    metrics_callback.log_all_tasks_completed(event)
+
+    # Verify span was created and root span was ended
+    assert mock_span.set_attribute.called
+    assert mock_span.set_status.called
+    assert metrics_callback.root_span.end.called
+
+
+def test_batch_span_processor_configuration_prevents_oom(mock_env_vars):
+    """Test BatchSpanProcessor config with limits to prevent OOM."""
+    with patch("app.utils.telemetry.workforce_metrics.OTLPSpanExporter"
+               ) as mock_exporter_class, patch(
+                   "app.utils.telemetry.workforce_metrics.BatchSpanProcessor"
+               ) as mock_processor_class:
+        # Initialize tracer provider
+        wm_module.initialize_tracer_provider()
+
+        # Verify BatchSpanProcessor was called with OOM prevention config
+        mock_processor_class.assert_called_once()
+        call_args = mock_processor_class.call_args
+
+        # Verify the exporter was passed
+        assert call_args[0][0] == mock_exporter_class.return_value
+
+        # Verify kwargs have proper configuration
+        assert call_args[1]["max_queue_size"] == 4096
+        assert call_args[1]["export_timeout_millis"] == 30000
+        assert call_args[1]["schedule_delay_millis"] == 3000
+        assert call_args[1]["max_export_batch_size"] == 1024
+
+
+def test_missing_langfuse_env_vars_disables_tracing():
+    """Test that missing Langfuse env vars disables tracing."""
+    with patch.dict("os.environ", {}, clear=True), patch(
+            "app.utils.telemetry.workforce_metrics.OTLPSpanExporter"
+    ) as mock_exporter_class, patch(
+            "app.utils.telemetry.workforce_metrics.BatchSpanProcessor"
+    ) as mock_processor_class:
+        # Initialize tracer provider without credentials
+        wm_module.initialize_tracer_provider()
+
+        # Create callback without Langfuse credentials
+        callback = WorkforceMetricsCallback(project_id="test_project",
+                                            task_id="test_task")
+
+        # Verify tracing is disabled
+        assert callback.enabled is False
+
+        # Verify no exporter or processor was created
+        mock_exporter_class.assert_not_called()
+        mock_processor_class.assert_not_called()
+
+        # Verify log methods do nothing when disabled
+        event = WorkerCreatedEvent(worker_id="worker_1",
+                                   worker_type="test_worker",
+                                   role="test_role")
+        callback.log_worker_created(event)  # Should not raise errors
+
+
+def test_multiple_callbacks_share_tracer_provider(mock_env_vars):
+    """Test that multiple callbacks share the same TracerProvider."""
+    with patch("app.utils.telemetry.workforce_metrics.BatchSpanProcessor"
+               ) as mock_processor_class:
+        # Initialize tracer provider once
+        wm_module.initialize_tracer_provider()
+
+        # Create first callback
+        callback1 = WorkforceMetricsCallback(project_id="project1",
+                                             task_id="task1")
+
+        # Create second callback
+        callback2 = WorkforceMetricsCallback(project_id="project2",
+                                             task_id="task2")
+
+        # Verify BatchSpanProcessor was only called once (singleton)
+        assert mock_processor_class.call_count == 1
+
+        # Both callbacks should be enabled
+        assert callback1.enabled is True
+        assert callback2.enabled is True
--- a/backend/tests/unit/utils/test_workforce.py
+++ b/backend/tests/unit/utils/test_workforce.py
@ -383,15 +383,11 @@ class TestWorkforce:
            description="Test workforce"
        )
        
-        with patch('app.service.task.delete_task_lock', side_effect=Exception("Delete failed")), \
-             patch('traceroot.get_logger') as mock_get_logger:
-            
+        with patch('app.service.task.delete_task_lock', side_effect=Exception("Delete failed")):
+
            # Should not raise exception
            await workforce.cleanup()

-            # Should log the error
-            mock_get_logger.assert_called_once()
-

@pytest.mark.integration
 class TestWorkforceIntegration:
@ -636,15 +632,10 @@ class TestWorkforceErrorCases:
            description="Nonexistent lock test workforce"
        )
        
-        with patch('app.service.task.delete_task_lock', side_effect=Exception("Task lock not found")), \
-             patch('traceroot.get_logger') as mock_get_logger:
-            
+        with patch('app.service.task.delete_task_lock', side_effect=Exception("Task lock not found")):
            # Should handle missing task lock gracefully
            await workforce.cleanup()

-            # Should log the error
-            mock_get_logger.assert_called_once()
-
    def test_workforce_inheritance(self):
        """Test that Workforce properly inherits from BaseWorkforce."""
        from camel.societies.workforce.workforce import Workforce as BaseWorkforce
--- a/backend/uv.lock
+++ b/backend/uv.lock
--- a/server/app/component/database.py
+++ b/server/app/component/database.py
@ -14,9 +14,9 @@

 from sqlmodel import Session, create_engine
 from app.component.environment import env, env_or_fail
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("database")
+logger = logging.getLogger("database")

 logger.info("Initializing database engine", extra={
    "database_url_prefix": env_or_fail("database_url")[:20] + "...",
--- a/server/app/component/environment.py
+++ b/server/app/component/environment.py
@ -19,9 +19,9 @@ from fastapi import APIRouter, FastAPI
 from dotenv import load_dotenv
 import importlib
 from typing import Any, overload
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("environment")
+logger = logging.getLogger("environment")

 logger.info("Loading environment variables from .env file")
 load_dotenv()
--- a/server/app/controller/chat/history_controller.py
+++ b/server/app/controller/chat/history_controller.py
@ -21,17 +21,16 @@ from fastapi_babel import _
 from sqlmodel import Session, select, desc, case
 from app.component.auth import Auth, auth_must
 from app.component.database import session
-from utils import traceroot_wrapper as traceroot
+import logging
 from typing import Optional, Dict, List
 from collections import defaultdict

-logger = traceroot.get_logger("server_chat_history")
+logger = logging.getLogger("server_chat_history")

 router = APIRouter(prefix="/chat", tags=["Chat History"])


@router.post("/history", name="save chat history", response_model=ChatHistoryOut)
-@traceroot.trace()
 def create_chat_history(data: ChatHistoryIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Save new chat history."""
    user_id = auth.user.id
@ -51,7 +50,6 @@ def create_chat_history(data: ChatHistoryIn, session: Session = Depends(session)


@router.get("/histories", name="get chat history")
-@traceroot.trace()
 def list_chat_history(session: Session = Depends(session), auth: Auth = Depends(auth_must)) -> Page[ChatHistoryOut]:
    """List chat histories for current user."""
    user_id = auth.user.id
@ -75,7 +73,6 @@ def list_chat_history(session: Session = Depends(session), auth: Auth = Depends(


@router.get("/histories/grouped", name="get grouped chat history")
-@traceroot.trace()
 def list_grouped_chat_history(
    include_tasks: Optional[bool] = Query(True, description="Whether to include individual tasks in groups"),
    session: Session = Depends(session), 
@ -177,7 +174,6 @@ def list_grouped_chat_history(


@router.delete("/history/{history_id}", name="delete chat history")
-@traceroot.trace()
 def delete_chat_history(history_id: str, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Delete chat history."""
    user_id = auth.user.id
@ -203,7 +199,6 @@ def delete_chat_history(history_id: str, session: Session = Depends(session), au


@router.put("/history/{history_id}", name="update chat history", response_model=ChatHistoryOut)
-@traceroot.trace()
 def update_chat_history(
    history_id: int, data: ChatHistoryUpdate, session: Session = Depends(session), auth: Auth = Depends(auth_must)
 ):
@ -232,7 +227,6 @@ def update_chat_history(


@router.put("/project/{project_id}/name", name="update project name")
-@traceroot.trace()
 def update_project_name(
    project_id: str,
    new_name: str,
--- a/server/app/controller/chat/share_controller.py
+++ b/server/app/controller/chat/share_controller.py
@ -22,15 +22,14 @@ from starlette.responses import StreamingResponse
 from app.model.chat.chat_share import ChatHistoryShareOut, ChatShare, ChatShareIn
 from app.model.chat.chat_step import ChatStep
 from app.model.chat.chat_history import ChatHistory
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_chat_share")
+logger = logging.getLogger("server_chat_share")

 router = APIRouter(prefix="/chat", tags=["Chat Share"])


@router.get("/share/info/{token}", name="Get shared chat info", response_model=ChatHistoryShareOut)
-@traceroot.trace()
 def get_share_info(token: str, session: Session = Depends(session)):
    """
    Get shared chat history info by token, excluding sensitive data.
@ -56,7 +55,6 @@ def get_share_info(token: str, session: Session = Depends(session)):


@router.get("/share/playback/{token}", name="Playback shared chat via SSE")
-@traceroot.trace()
 async def share_playback(token: str, session: Session = Depends(session), delay_time: float = 0):
    """
    Playbacks the chat history via a sharing token (SSE).
@ -109,7 +107,6 @@ async def share_playback(token: str, session: Session = Depends(session), delay_


@router.post("/share", name="Generate sharable link for a task(1 day expiration)")
-@traceroot.trace()
 def create_share_link(data: ChatShareIn):
    """Generate sharing token with 1-day expiration for task."""
    try:
--- a/server/app/controller/chat/snapshot_controller.py
+++ b/server/app/controller/chat/snapshot_controller.py
@ -19,15 +19,14 @@ from sqlmodel import Session, select
 from app.component.database import session
 from app.component.auth import Auth, auth_must
 from fastapi_babel import _
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_chat_snapshot")
+logger = logging.getLogger("server_chat_snapshot")

 router = APIRouter(prefix="/chat", tags=["Chat Snapshot Management"])


@router.get("/snapshots", name="list chat snapshots", response_model=List[ChatSnapshot])
-@traceroot.trace()
 async def list_chat_snapshots(
    api_task_id: Optional[str] = None,
    camel_task_id: Optional[str] = None,
@ -49,7 +48,6 @@ async def list_chat_snapshots(


@router.get("/snapshots/{snapshot_id}", name="get chat snapshot", response_model=ChatSnapshot)
-@traceroot.trace()
 async def get_chat_snapshot(snapshot_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Get specific chat snapshot."""
    user_id = auth.user.id
@ -64,7 +62,6 @@ async def get_chat_snapshot(snapshot_id: int, session: Session = Depends(session


@router.post("/snapshots", name="create chat snapshot", response_model=ChatSnapshot)
-@traceroot.trace()
 async def create_chat_snapshot(
    snapshot: ChatSnapshotIn, auth: Auth = Depends(auth_must), session: Session = Depends(session)
 ):
@ -92,7 +89,6 @@ async def create_chat_snapshot(


@router.put("/snapshots/{snapshot_id}", name="update chat snapshot", response_model=ChatSnapshot)
-@traceroot.trace()
 async def update_chat_snapshot(
    snapshot_id: int,
    snapshot_update: ChatSnapshot,
@ -127,7 +123,6 @@ async def update_chat_snapshot(


@router.delete("/snapshots/{snapshot_id}", name="delete chat snapshot")
-@traceroot.trace()
 async def delete_chat_snapshot(snapshot_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Delete chat snapshot."""
    user_id = auth.user.id
--- a/server/app/controller/chat/step_controller.py
+++ b/server/app/controller/chat/step_controller.py
@ -23,15 +23,14 @@ from app.component.database import session
 from app.component.auth import Auth, auth_must
 from fastapi_babel import _
 from app.model.chat.chat_step import ChatStep, ChatStepOut, ChatStepIn
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_chat_step")
+logger = logging.getLogger("server_chat_step")

 router = APIRouter(prefix="/chat", tags=["Chat Step Management"])


@router.get("/steps", name="list chat steps", response_model=List[ChatStepOut])
-@traceroot.trace()
 async def list_chat_steps(
    task_id: str, step: Optional[str] = None, session: Session = Depends(session), auth: Auth = Depends(auth_must)
 ):
@ -49,7 +48,6 @@ async def list_chat_steps(


@router.get("/steps/playback/{task_id}", name="Playback Chat Step via SSE")
-@traceroot.trace()
 async def share_playback(
    task_id: str, delay_time: float = 0, session: Session = Depends(session), auth: Auth = Depends(auth_must)
 ):
@ -97,7 +95,6 @@ async def share_playback(


@router.get("/steps/{step_id}", name="get chat step", response_model=ChatStepOut)
-@traceroot.trace()
 async def get_chat_step(step_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Get specific chat step."""
    user_id = auth.user.id
@ -112,7 +109,6 @@ async def get_chat_step(step_id: int, session: Session = Depends(session), auth:


@router.post("/steps", name="create chat step")
-@traceroot.trace()
 async def create_chat_step(step: ChatStepIn, session: Session = Depends(session)):
    """Create new chat step. TODO: Implement request source validation."""
    try:
@ -134,7 +130,6 @@ async def create_chat_step(step: ChatStepIn, session: Session = Depends(session)


@router.put("/steps/{step_id}", name="update chat step", response_model=ChatStepOut)
-@traceroot.trace()
 async def update_chat_step(
    step_id: int, chat_step_update: ChatStep, session: Session = Depends(session), auth: Auth = Depends(auth_must)
 ):
@ -162,7 +157,6 @@ async def update_chat_step(


@router.delete("/steps/{step_id}", name="delete chat step")
-@traceroot.trace()
 async def delete_chat_step(step_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Delete chat step."""
    user_id = auth.user.id
--- a/server/app/controller/config/config_controller.py
+++ b/server/app/controller/config/config_controller.py
@ -19,15 +19,14 @@ from app.component.database import session
 from app.component.auth import Auth, auth_must
 from fastapi_babel import _
 from app.model.config.config import Config, ConfigCreate, ConfigUpdate, ConfigInfo, ConfigOut
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_config_controller")
+logger = logging.getLogger("server_config_controller")

 router = APIRouter(tags=["Config Management"])


@router.get("/configs", name="list configs", response_model=list[ConfigOut])
-@traceroot.trace()
 async def list_configs(
    config_group: Optional[str] = None, session: Session = Depends(session), auth: Auth = Depends(auth_must)
 ):
@ -44,7 +43,6 @@ async def list_configs(


@router.get("/configs/{config_id}", name="get config", response_model=ConfigOut)
-@traceroot.trace()
 async def get_config(
    config_id: int,
    session: Session = Depends(session),
@ -66,7 +64,6 @@ async def get_config(


@router.post("/configs", name="create config", response_model=ConfigOut)
-@traceroot.trace()
 async def create_config(config: ConfigCreate, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Create new configuration."""
    user_id = auth.user.id
@ -103,7 +100,6 @@ async def create_config(config: ConfigCreate, session: Session = Depends(session


@router.put("/configs/{config_id}", name="update config", response_model=ConfigOut)
-@traceroot.trace()
 async def update_config(
    config_id: int, config_update: ConfigUpdate, session: Session = Depends(session), auth: Auth = Depends(auth_must)
 ):
@ -149,7 +145,6 @@ async def update_config(


@router.delete("/configs/{config_id}", name="delete config")
-@traceroot.trace()
 async def delete_config(config_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Delete configuration."""
    user_id = auth.user.id
@ -171,7 +166,6 @@ async def delete_config(config_id: int, session: Session = Depends(session), aut


@router.get("/config/info", name="get config info")
-@traceroot.trace()
 async def get_config_info(
    show_all: bool = Query(False, description="Show all config info, including those with empty env_vars"),
 ):
--- a/server/app/controller/mcp/mcp_controller.py
+++ b/server/app/controller/mcp/mcp_controller.py
@ -27,9 +27,9 @@ from app.model.mcp.mcp_env import McpEnv, Status as McpEnvStatus
 from app.model.mcp.mcp_user import McpImportType, McpUser, Status
 from camel.toolkits.mcp_toolkit import MCPToolkit
 from app.component.environment import env
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_mcp_controller")
+logger = logging.getLogger("server_mcp_controller")

 from app.component.validator.McpServer import (
    McpRemoteServer,
@ -81,7 +81,6 @@ async def pre_instantiate_mcp_toolkit(config_dict: dict) -> bool:


@router.get("/mcps", name="mcp list")
-@traceroot.trace()
 async def gets(
    keyword: str | None = None,
    category_id: int | None = None,
@ -121,7 +120,6 @@ async def gets(


@router.get("/mcp", name="mcp detail", response_model=McpOut)
-@traceroot.trace()
 async def get(id: int, session: Session = Depends(session)):
    """Get MCP server details."""
    try:
@ -135,7 +133,6 @@ async def get(id: int, session: Session = Depends(session)):


@router.post("/mcp/install", name="mcp install")
-@traceroot.trace()
 async def install(mcp_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Install MCP server for user."""
    user_id = auth.user.id
@ -191,7 +188,6 @@ async def install(mcp_id: int, session: Session = Depends(session), auth: Auth =


@router.post("/mcp/import/{mcp_type}", name="mcp import")
-@traceroot.trace()
 async def import_mcp(
    mcp_type: McpImportType, mcp_data: dict, session: Session = Depends(session), auth: Auth = Depends(auth_must)
 ):
--- a/server/app/controller/mcp/proxy_controller.py
+++ b/server/app/controller/mcp/proxy_controller.py
@ -19,9 +19,9 @@ from app.component.environment import env_not_empty
 from app.model.mcp.proxy import ExaSearch
 from typing import Any, cast
 import requests
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_proxy_controller")
+logger = logging.getLogger("server_proxy_controller")

 from app.model.user.key import Key

@ -30,7 +30,6 @@ router = APIRouter(prefix="/proxy", tags=["Mcp Servers"])


@router.post("/exa")
-@traceroot.trace()
 def exa_search(search: ExaSearch, key: Key = Depends(key_must)):
    """Search using Exa API."""
    EXA_API_KEY = env_not_empty("EXA_API_KEY")
@ -100,7 +99,6 @@ def exa_search(search: ExaSearch, key: Key = Depends(key_must)):


@router.get("/google")
-@traceroot.trace()
 def google_search(query: str, search_type: str = "web", key: Key = Depends(key_must)):
    """Search using Google Custom Search API."""
    # https://developers.google.com/custom-search/v1/overview
--- a/server/app/controller/mcp/user_controller.py
+++ b/server/app/controller/mcp/user_controller.py
@ -23,9 +23,9 @@ from app.model.mcp.mcp_user import McpUser, McpUserIn, McpUserOut, McpUserUpdate
 from app.model.mcp.mcp import Mcp
 from camel.toolkits.mcp_toolkit import MCPToolkit
 from app.component.environment import env
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_mcp_user_controller")
+logger = logging.getLogger("server_mcp_user_controller")

 router = APIRouter(tags=["McpUser Management"])

@ -70,7 +70,6 @@ async def pre_instantiate_mcp_toolkit(config_dict: dict) -> bool:


@router.get("/mcp/users", name="list mcp users", response_model=List[McpUserOut])
-@traceroot.trace()
 async def list_mcp_users(
    mcp_id: Optional[int] = None,
    session: Session = Depends(session),
@ -89,7 +88,6 @@ async def list_mcp_users(


@router.get("/mcp/users/{mcp_user_id}", name="get mcp user", response_model=McpUserOut)
-@traceroot.trace()
 async def get_mcp_user(mcp_user_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Get MCP user details."""
    query = select(McpUser).where(McpUser.id == mcp_user_id)
@ -102,7 +100,6 @@ async def get_mcp_user(mcp_user_id: int, session: Session = Depends(session), au


@router.post("/mcp/users", name="create mcp user", response_model=McpUserOut)
-@traceroot.trace()
 async def create_mcp_user(mcp_user: McpUserIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Create MCP user installation."""
    user_id = auth.user.id
@ -145,7 +142,6 @@ async def create_mcp_user(mcp_user: McpUserIn, session: Session = Depends(sessio


@router.put("/mcp/users/{id}", name="update mcp user")
-@traceroot.trace()
 async def update_mcp_user(
    id: int,
    update_item: McpUserUpdate,
@ -175,7 +171,6 @@ async def update_mcp_user(


@router.delete("/mcp/users/{mcp_user_id}", name="delete mcp user")
-@traceroot.trace()
 async def delete_mcp_user(mcp_user_id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Delete MCP user installation."""
    user_id = auth.user.id
--- a/server/app/controller/oauth/oauth_controller.py
+++ b/server/app/controller/oauth/oauth_controller.py
@ -17,15 +17,14 @@ from fastapi.responses import RedirectResponse, JSONResponse, HTMLResponse
 from app.component.environment import env
 from app.component.oauth_adapter import OauthCallbackPayload, get_oauth_adapter
 from typing import Optional
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_oauth_controller")
+logger = logging.getLogger("server_oauth_controller")

 router = APIRouter(prefix="/oauth", tags=["Oauth Servers"])


@router.get("/{app}/login", name="OAuth Login Redirect")
-@traceroot.trace()
 def oauth_login(app: str, request: Request, state: Optional[str] = None):
    """Redirect user to OAuth provider's authorization endpoint."""
    try:
@ -50,7 +49,6 @@ def oauth_login(app: str, request: Request, state: Optional[str] = None):


@router.get("/{app}/callback", name="OAuth Callback")
-@traceroot.trace()
 def oauth_callback(app: str, request: Request, code: Optional[str] = None, state: Optional[str] = None):
    """Handle OAuth provider callback and redirect to client app."""
    if not code:
@ -78,7 +76,6 @@ def oauth_callback(app: str, request: Request, code: Optional[str] = None, state


@router.post("/{app}/token", name="OAuth Fetch Token")
-@traceroot.trace()
 def fetch_token(app: str, request: Request, data: OauthCallbackPayload):
    """Exchange authorization code for access token."""
    try:
--- a/server/app/controller/provider/provider_controller.py
+++ b/server/app/controller/provider/provider_controller.py
@ -24,15 +24,14 @@ from sqlalchemy.exc import SQLAlchemyError
 from app.component.database import session
 from app.component.auth import Auth, auth_must
 from app.model.provider.provider import Provider, ProviderIn, ProviderOut, ProviderPreferIn
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_provider_controller")
+logger = logging.getLogger("server_provider_controller")

 router = APIRouter(tags=["Provider Management"])


@router.get("/providers", name="list providers", response_model=Page[ProviderOut])
-@traceroot.trace()
 async def gets(
    keyword: str | None = None,
    prefer: Optional[bool] = Query(None, description="Filter by prefer status"),
@ -52,7 +51,6 @@ async def gets(


@router.get("/provider", name="get provider detail", response_model=ProviderOut)
-@traceroot.trace()
 async def get(id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Get provider details."""
    user_id = auth.user.id
@ -66,7 +64,6 @@ async def get(id: int, session: Session = Depends(session), auth: Auth = Depends


@router.post("/provider", name="create provider", response_model=ProviderOut)
-@traceroot.trace()
 async def post(data: ProviderIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Create a new provider."""
    user_id = auth.user.id
@ -81,7 +78,6 @@ async def post(data: ProviderIn, session: Session = Depends(session), auth: Auth


@router.put("/provider/{id}", name="update provider", response_model=ProviderOut)
-@traceroot.trace()
 async def put(id: int, data: ProviderIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Update provider details."""
    user_id = auth.user.id
@ -109,7 +105,6 @@ async def put(id: int, data: ProviderIn, session: Session = Depends(session), au


@router.delete("/provider/{id}", name="delete provider")
-@traceroot.trace()
 async def delete(id: int, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Delete a provider."""
    user_id = auth.user.id
@ -130,7 +125,6 @@ async def delete(id: int, session: Session = Depends(session), auth: Auth = Depe


@router.post("/provider/prefer", name="set provider prefer")
-@traceroot.trace()
 async def set_prefer(data: ProviderPreferIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Set preferred provider for user."""
    user_id = auth.user.id
--- a/server/app/controller/user/login_controller.py
+++ b/server/app/controller/user/login_controller.py
@ -29,16 +29,15 @@ from app.model.user.user import (
    RegisterIn,
 )
 from app.component.environment import env
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_login_controller")
+logger = logging.getLogger("server_login_controller")


 router = APIRouter(tags=["Login/Registration"])


@router.post("/login", name="login by email or password")
-@traceroot.trace()
 async def by_password(
    data: LoginByPasswordIn, session: Session = Depends(session)
 ) -> LoginResponse:
@ -63,7 +62,6 @@ async def by_password(


@router.post("/dev_login", name="OAuth2 password flow login (for Swagger UI)")
-@traceroot.trace()
 async def dev_login(
    username: str = Form(...),  # OAuth2 uses 'username' but we accept email
    password: str = Form(...),
@ -96,7 +94,6 @@ async def dev_login(


@router.post("/login-by_stack", name="login by stack")
-@traceroot.trace()
 async def by_stack_auth(
    token: str,
    type: str = "signup",
@ -169,7 +166,6 @@ async def by_stack_auth(


@router.post("/register", name="register by email/password")
-@traceroot.trace()
 async def register(data: RegisterIn, session: Session = Depends(session)):
    email = data.email

--- a/server/app/controller/user/user_controller.py
+++ b/server/app/controller/user/user_controller.py
@ -25,15 +25,14 @@ from app.model.mcp.mcp_user import McpUser
 from app.model.config.config import Config
 from app.model.chat.chat_snpshot import ChatSnapshot
 from app.model.user.user_credits_record import UserCreditsRecord
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_user_controller")
+logger = logging.getLogger("server_user_controller")

 router = APIRouter(tags=["User"])


@router.get("/user", name="user info", response_model=UserOut)
-@traceroot.trace()
 def get(auth: Auth = Depends(auth_must), session: Session = Depends(session)):
    """Get current user information and refresh credits."""
    user: User = auth.user
@ -43,7 +42,6 @@ def get(auth: Auth = Depends(auth_must), session: Session = Depends(session)):


@router.put("/user", name="update user info", response_model=UserOut)
-@traceroot.trace()
 def put(data: UserIn, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Update user basic information."""
    model = auth.user
@ -54,7 +52,6 @@ def put(data: UserIn, session: Session = Depends(session), auth: Auth = Depends(


@router.put("/user/profile", name="update user profile", response_model=UserProfile)
-@traceroot.trace()
 def put_profile(data: UserProfile, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Update user profile details."""
    model = auth.user
@ -67,7 +64,6 @@ def put_profile(data: UserProfile, session: Session = Depends(session), auth: Au


@router.get("/user/privacy", name="get user privacy")
-@traceroot.trace()
 def get_privacy(session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Get user privacy settings."""
    user_id = auth.user.id
@ -83,7 +79,6 @@ def get_privacy(session: Session = Depends(session), auth: Auth = Depends(auth_m


@router.put("/user/privacy", name="update user privacy")
-@traceroot.trace()
 def put_privacy(data: UserPrivacySettings, session: Session = Depends(session), auth: Auth = Depends(auth_must)):
    """Update user privacy settings."""
    user_id = auth.user.id
@ -104,7 +99,6 @@ def put_privacy(data: UserPrivacySettings, session: Session = Depends(session),


@router.get("/user/current_credits", name="get user current credits")
-@traceroot.trace()
 def get_user_credits(auth: Auth = Depends(auth_must), session: Session = Depends(session)):
    """Get user's current credit balance."""
    user = auth.user
@ -121,7 +115,6 @@ def get_user_credits(auth: Auth = Depends(auth_must), session: Session = Depends


@router.get("/user/stat", name="get user stat", response_model=UserStatOut)
-@traceroot.trace()
 def get_user_stat(auth: Auth = Depends(auth_must), session: Session = Depends(session)):
    """Get current user's operation statistics."""
    user_id = auth.user.id
@ -152,7 +145,6 @@ def get_user_stat(auth: Auth = Depends(auth_must), session: Session = Depends(se


@router.post("/user/stat", name="record user stat")
-@traceroot.trace()
 def record_user_stat(
    data: UserStatActionIn,
    auth: Auth = Depends(auth_must),
--- a/server/app/controller/user/user_password_controller.py
+++ b/server/app/controller/user/user_password_controller.py
@ -22,15 +22,14 @@ from app.component.encrypt import password_hash, password_verify
 from app.exception.exception import UserException
 from app.model.user.user import UpdatePassword, UserOut
 from fastapi_babel import _
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("server_password_controller")
+logger = logging.getLogger("server_password_controller")

 router = APIRouter(tags=["User"])


@router.put("/user/update-password", name="update password", response_model=UserOut)
-@traceroot.trace()
 def update_password(data: UpdatePassword, auth: Auth = Depends(auth_must), session: Session = Depends(session)):
    """Update user password after verifying current password."""
    user_id = auth.user.id
--- a/server/app/model/abstract/model.py
+++ b/server/app/model/abstract/model.py
@ -24,9 +24,9 @@ from fastapi_babel import _
 from app.exception.exception import UserException
 from app.component.database import engine
 from convert_case import snake_case
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("abstract_model")
+logger = logging.getLogger("abstract_model")


 class AbstractModel(SQLModel):
--- a/server/app/model/provider/provider.py
+++ b/server/app/model/provider/provider.py
@ -15,7 +15,7 @@
 from enum import IntEnum
 from typing import Optional
 from pydantic import BaseModel
-from sqlalchemy import Boolean, Column, SmallInteger, String
+from sqlalchemy import Boolean, Column, SmallInteger
 from sqlalchemy.orm import Mapped
 from sqlmodel import Field, JSON
 from sqlalchemy_utils import ChoiceType
--- a/server/app/model/user/user_credits_record.py
+++ b/server/app/model/user/user_credits_record.py
@ -22,9 +22,9 @@ from app.model.abstract.model import AbstractModel, DefaultTimes
 from datetime import date, datetime, timedelta
 from app.model.user.key import ModelType
 from app.component.database import session_make
-from utils import traceroot_wrapper as traceroot
+import logging

-logger = traceroot.get_logger("user_credits_record")
+logger = logging.getLogger("user_credits_record")


 class CreditsChannel(IntEnum):
--- a/server/main.py
+++ b/server/main.py
@ -21,7 +21,7 @@ _project_root = pathlib.Path(__file__).parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))

-from utils import traceroot_wrapper as traceroot
+import logging
 from app import api
 from app.component.environment import auto_include_routers, env
 from fastapi.staticfiles import StaticFiles
@ -32,13 +32,7 @@ import app.middleware  # noqa: F401
 # Import exception handlers to register them
 import app.exception.handler  # noqa: F401

-# Only initialize traceroot if enabled
-if traceroot.is_enabled():
-    from traceroot.integrations.fastapi import connect_fastapi
-
-    connect_fastapi(api)
-
-logger = traceroot.get_logger("server_main")
+logger = logging.getLogger("server_main")

 prefix = env("url_prefix", "")
 auto_include_routers(api, prefix, "app/controller")
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@ -34,7 +34,6 @@ dependencies = [
    "cryptography>=45.0.4",
    "sqids>=0.5.2",
    "exa-py>=1.14.16",
-    "traceroot>=0.0.7",
 ]

 [tool.ruff]
--- a/server/uv.lock
+++ b/server/uv.lock
--- a/utils/init.py
+++ b/utils/init.py
@ -1,17 +0,0 @@
-# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
-
-from . import traceroot_wrapper
-
-__all__ = ['traceroot_wrapper']
--- a/utils/traceroot_wrapper.py
+++ b/utils/traceroot_wrapper.py
@ -1,99 +0,0 @@
-# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
-
-from pathlib import Path
-from typing import Callable
-import logging
-from dotenv import load_dotenv
-
-# Try to import traceroot, but handle gracefully if not available
-try:
-    import traceroot
-    TRACEROOT_AVAILABLE = True
-except ImportError:
-    TRACEROOT_AVAILABLE = False
-    traceroot = None
-
-# Auto-detect module name based on caller's path
-def _get_module_name():
-    """Automatically detect if this is being called from backend or server."""
-    import inspect
-    frame = inspect.currentframe()
-    try:
-        # Go up the stack to find the caller
-        caller_frame = frame.f_back.f_back if frame and frame.f_back else None
-        if caller_frame:
-            caller_file = caller_frame.f_globals.get('__file__', '')
-            if 'backend' in caller_file:
-                return 'backend'
-            elif 'server' in caller_file:
-                return 'server'
-    finally:
-        del frame
-    return 'unknown'
-
-env_path = Path(__file__).resolve().parents[1] / '.env'
-
-load_dotenv(env_path)
-
-if TRACEROOT_AVAILABLE and traceroot.init():
-    from traceroot.logger import get_logger as _get_traceroot_logger
-
-    trace = traceroot.trace
-
-    def get_logger(name: str = __name__):
-        """Get TraceRoot logger instance."""
-        return _get_traceroot_logger(name)
-
-    def is_enabled() -> bool:
-        """Check if TraceRoot is enabled."""
-        return True
-
-    # Log successful initialization
-    module_name = _get_module_name()
-    _init_logger = _get_traceroot_logger("traceroot_wrapper")
-    _init_logger.info("TraceRoot initialized successfully", extra={"backend": "traceroot", "service_module": module_name})
-else:
-    # No-op implementations when TraceRoot is not configured
-    def trace(*args, **kwargs):
-        """No-op trace decorator."""
-        def decorator(func: Callable) -> Callable:
-            return func
-        return decorator
-
-    def get_logger(name: str = __name__):
-        """Get standard Python logger when TraceRoot is disabled."""
-        logger = logging.getLogger(name)
-        if not logger.handlers:
-            # Configure basic logging if no handlers exist
-            handler = logging.StreamHandler()
-            formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-            handler.setFormatter(formatter)
-            logger.addHandler(handler)
-            logger.setLevel(logging.INFO)
-        return logger
-
-    def is_enabled() -> bool:
-        """Check if TraceRoot is enabled."""
-        return False
-
-    # Log fallback mode
-    _fallback_logger = logging.getLogger("traceroot_wrapper")
-    if TRACEROOT_AVAILABLE:
-        _fallback_logger.warning("TraceRoot available but not initialized - using Python logging as fallback")
-    else:
-        _fallback_logger.warning("TraceRoot not available - using Python logging as fallback")
-
-
-__all__ = ['trace', 'get_logger', 'is_enabled']