diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 42e80ad5..0cdfe3e6 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -12,7 +12,7 @@ body: id: version attributes: label: What version of eigent are you using? - placeholder: E.g., 0.0.82 + placeholder: E.g., 0.0.84 validations: required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c533dfbf..53ec5ae9 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,8 +1,25 @@ +### Related Issue + + + + +Closes # + ### Description - + + +### Testing Evidence (REQUIRED) + + + + + +- [ ] I have included human-verified testing evidence in this PR. +- [ ] This PR includes frontend/UI changes, and I attached screenshot(s) or screen recording(s). +- [ ] No frontend/UI changes in this PR. ### What is the purpose of this pull request? @@ -10,3 +27,7 @@ - [ ] New Feature - [ ] Documentation update - [ ] Other + +### Contribution Guidelines Acknowledgement + +- [ ] I have read and agree to the [Eigent Contribution Guideline](https://github.com/eigent-ai/eigent/blob/main/CONTRIBUTING.md#eigent-contribution-guideline) diff --git a/.github/workflows/build-view.yml b/.github/workflows/build-view.yml index 23b56b91..714520c7 100644 --- a/.github/workflows/build-view.yml +++ b/.github/workflows/build-view.yml @@ -84,16 +84,26 @@ jobs: sudo apt-get update sudo apt-get install -y libfuse2 + # Install LLVM 20 for macOS Intel - llvmlite 0.46.0 only supports LLVM 20 (not 21) + - name: Install LLVM 20 (macOS Intel) + if: runner.os == 'macOS' && matrix.arch == 'x64' + run: | + brew install llvm@20 + echo "LLVM_DIR=$(brew --prefix llvm@20)/lib/cmake/llvm" >> $GITHUB_ENV + echo "CMAKE_PREFIX_PATH=$(brew --prefix llvm@20)/lib/cmake/llvm" >> $GITHUB_ENV + # Step for macOS builds with signing - name: Build Release Files (macOS with signing) if: runner.os == 'macOS' timeout-minutes: 90 run: | - # Increase file descriptor limit to prevent EMFILE errors during signing - # This is needed because electron-builder signs all files recursively, - # and Python venvs contain thousands of files - ulimit -n 65536 || ulimit -n 10240 - echo "File descriptor limit set to: $(ulimit -n)" + # Set file descriptor limit to system maximum (hard limit) to prevent EMFILE during signing + HARD=$(ulimit -Hn 2>/dev/null) + if [ -n "$HARD" ] && [ "$HARD" != "unlimited" ]; then + ulimit -n "$HARD" 2>/dev/null || true + fi + ulimit -n 65536 2>/dev/null || ulimit -n 10240 2>/dev/null || true + echo "File descriptor limit: $(ulimit -n) (hard: $(ulimit -Hn 2>/dev/null || echo 'N/A'))" npm run build -- --arch ${{ matrix.arch }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index eb1c069a..4256d53c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,6 +29,9 @@ jobs: - os: macos-latest arch: arm64 artifact_name: macos-arm64 + - os: macos-15-intel + arch: x64 + artifact_name: macos-intel - os: windows-latest arch: x64 artifact_name: windows-latest @@ -93,16 +96,26 @@ jobs: sudo apt-get update sudo apt-get install -y libfuse2 + # Install LLVM 20 for macOS Intel - llvmlite 0.46.0 only supports LLVM 20 (not 21) + - name: Install LLVM 20 (macOS Intel) + if: runner.os == 'macOS' && matrix.arch == 'x64' + run: | + brew install llvm@20 + echo "LLVM_DIR=$(brew --prefix llvm@20)/lib/cmake/llvm" >> $GITHUB_ENV + echo "CMAKE_PREFIX_PATH=$(brew --prefix llvm@20)/lib/cmake/llvm" >> $GITHUB_ENV + # Step for macOS builds with signing - name: Build Release Files (macOS with signing) if: runner.os == 'macOS' timeout-minutes: 90 run: | - # Increase file descriptor limit to prevent EMFILE errors during signing - # This is needed because electron-builder signs all files recursively, - # and Python venvs contain thousands of files - ulimit -n 65536 || ulimit -n 10240 - echo "File descriptor limit set to: $(ulimit -n)" + # Set file descriptor limit to system maximum (hard limit) to prevent EMFILE during signing + HARD=$(ulimit -Hn 2>/dev/null) + if [ -n "$HARD" ] && [ "$HARD" != "unlimited" ]; then + ulimit -n "$HARD" 2>/dev/null || true + fi + ulimit -n 65536 2>/dev/null || ulimit -n 10240 2>/dev/null || true + echo "File descriptor limit: $(ulimit -n) (hard: $(ulimit -Hn 2>/dev/null || echo 'N/A'))" npm run build -- --arch ${{ matrix.arch }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -205,7 +218,7 @@ jobs: steps: - name: Create directories run: | - mkdir -p release/mac-arm64 release/win-x64 release/linux-x64 + mkdir -p release/mac-arm64 release/mac-intel release/win-x64 release/linux-x64 - name: Download mac-arm64 artifact uses: actions/download-artifact@v7 @@ -213,6 +226,12 @@ jobs: name: release-macos-arm64-arm64 path: temp-mac-arm64 + - name: Download mac-intel artifact + uses: actions/download-artifact@v7 + with: + name: release-macos-intel-x64 + path: temp-mac-intel + - name: Download win-x64 artifact uses: actions/download-artifact@v7 with: @@ -236,6 +255,13 @@ jobs: find temp-mac-arm64 \( -name "*.dmg" -o -name "*.dmg.blockmap" -o -name "*.zip" -o -name "*.zip.blockmap" -o -name "latest*.yml" \) -exec mv {} release/mac-arm64/ \; || true fi + # mac-intel - move dmg, zip, blockmap, and yml files + if [ -d "temp-mac-intel/release" ]; then + find temp-mac-intel/release \( -name "*.dmg" -o -name "*.dmg.blockmap" -o -name "*.zip" -o -name "*.zip.blockmap" -o -name "latest*.yml" \) -exec mv {} release/mac-intel/ \; || true + else + find temp-mac-intel \( -name "*.dmg" -o -name "*.dmg.blockmap" -o -name "*.zip" -o -name "*.zip.blockmap" -o -name "latest*.yml" \) -exec mv {} release/mac-intel/ \; || true + fi + # win-x64 - move exe, blockmap, and yml files if [ -d "temp-win-x64/release" ]; then find temp-win-x64/release \( -name "*.exe" -o -name "*.exe.blockmap" -o -name "latest*.yml" \) -exec mv {} release/win-x64/ \; || true @@ -251,17 +277,74 @@ jobs: fi # Create GitHub Release + - name: Prepare GitHub Release assets + if: startsWith(github.ref, 'refs/tags/') + shell: bash + run: | + # GitHub release assets must have unique filenames. + # Both mac folders contain latest-mac.yml, so stage assets with + # channel-specific manifest names for macOS and keep one compatibility file. + rm -rf gh-release-assets + mkdir -p gh-release-assets + + copy_file() { + local src_file="$1" + local dst_name="$2" + [ -f "$src_file" ] || return 0 + + if [ -e "gh-release-assets/$dst_name" ]; then + echo "Duplicate release asset name detected: $dst_name" + echo " existing: gh-release-assets/$dst_name" + echo " incoming: $src_file" + exit 1 + fi + + cp -f "$src_file" "gh-release-assets/$dst_name" + } + + copy_assets() { + local src_dir="$1" + local skip_name="${2:-}" + [ -d "$src_dir" ] || return 0 + + while IFS= read -r -d '' file; do + local name + name="$(basename "$file")" + + if [ -n "$skip_name" ] && [ "$name" = "$skip_name" ]; then + continue + fi + + copy_file "$file" "$name" + done < <(find "$src_dir" -maxdepth 1 -type f -print0) + } + + # Stage all normal artifacts (exclude duplicate mac manifest names first). + copy_assets "release/mac-arm64" "latest-mac.yml" + copy_assets "release/mac-intel" "latest-mac.yml" + copy_assets "release/win-x64" + copy_assets "release/linux-x64" + + # macOS updater channels configured in electron/main/update.ts: + # arm64 -> latest-arm64-mac.yml, x64 -> latest-x64-mac.yml + copy_file "release/mac-arm64/latest-mac.yml" "latest-arm64-mac.yml" + copy_file "release/mac-intel/latest-mac.yml" "latest-x64-mac.yml" + + # Compatibility manifest for clients still using default latest-mac.yml. + copy_file "release/mac-intel/latest-mac.yml" "latest-mac.yml" + + echo "Prepared GitHub release assets:" + ls -1 gh-release-assets + - name: Create GitHub Release if: startsWith(github.ref, 'refs/tags/') uses: softprops/action-gh-release@v2 with: token: ${{ secrets.GITHUB_TOKEN }} files: | - release/mac-arm64/* - release/win-x64/* - release/linux-x64/* + gh-release-assets/* - # Extract version from tag (e.g., v0.0.82 -> 0.0.82) + # Extract version from tag (e.g., v0.0.84 -> 0.0.84) - name: Extract version if: startsWith(github.ref, 'refs/tags/') id: version diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 9281f5bb..c342812b 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -30,33 +30,6 @@ jobs: run: uv sync --group dev - name: Run pre-commit - run: | - uv run pre-commit run --files \ - $(find \ - app/agent \ - app/controller \ - app/exception \ - app/middleware \ - app/model \ - app/service \ - tests/app \ - -type f ! -path '*__pycache__*') \ - app/__init__.py \ - app/router.py \ - app/component/__init__.py \ - app/component/pydantic/__init__.py \ - app/utils/listen/__init__.py \ - app/utils/server/__init__.py \ - app/utils/toolkit/__init__.py \ - app/utils/toolkit/google_calendar_toolkit.py \ - app/utils/toolkit/google_gmail_mcp_toolkit.py \ - app/utils/toolkit/linkedin_toolkit.py \ - app/utils/toolkit/reddit_toolkit.py \ - app/utils/toolkit/slack_toolkit.py \ - app/utils/toolkit/twitter_toolkit.py \ - app/utils/toolkit/whatsapp_toolkit.py \ - app/utils/workforce.py \ - app/utils/single_agent_worker.py \ - tests/conftest.py + run: uv run pre-commit run --files $(git ls-files .) env: SKIP: no-commit-to-branch diff --git a/.markdownlintignore b/.markdownlintignore index c2be28e4..524e7e62 100644 --- a/.markdownlintignore +++ b/.markdownlintignore @@ -7,3 +7,4 @@ README_PT-BR.md server/README_CN.md server/README_EN.md docs/troubleshooting/bug.md +backend/benchmark/answer/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9140db80..a0b8f4e1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,20 +1,54 @@ # 🐫 Welcome to Eigent! 🐫 Thank you for your interest in contributing to the Eigent project! 🎉 -We're excited to have your support. As an open-source product build on +We're excited to have your support. As an open-source product built on CAMEL in a rapidly evolving and open-ended field, we wholeheartedly welcome contributions of all kinds. Whether you want to introduce new -features, enhance the infrastructure, improve documentation, asking +features, enhance the infrastructure, improve documentation, raise issues, or fix bugs, we appreciate your enthusiasm and efforts. 🙌 You are welcome to join our [discord](https://discord.com/invite/CNcNpquyDc) for more efficient communication. 💬 +--- + +## Eigent Contribution Guideline + +Eigent is a multi-agent system designed to deliver a high-quality open source Cowork experience for users. We welcome developers who genuinely use Eigent to solve real-world problems to engage with us and build together. + +**Our goals are:** + +1. Pursue quality over quantity — in both code and features design within the Eigent repository. +2. Welcome any developer or user who truly uses Eigent, or shares our mission and vision, to discuss product and technology with us and bring the multi-agent open source Cowork system to more real users. + +### Why This Policy Exists + +As AI coding capabilities grow, an increasing number of AI coding bots or vibe code are introducing significant noise and risk to open-source repositories: + +1. **Code quality risks.** AI-generated code may contain subtle bugs or hallucinations. An excessive volume of LLM-generated code is presumed to be polluted code and dramatically increases heavy and meaningless maintenance costs. +2. **Community culture.** For Eigent's community, we uphold the core value of human collaboration and oppose low-effort, low-signal spamming. + +### Contribution Requirements + +We are taking the following precautionary steps to maintain the integrity of this open-source repository: + +1. **PRs must reference a prior discussion.** Every PR must link to a previously discussed and accepted issue, Discord thread, or equivalent. Drive-by PRs with no associated accepted issue will be closed. +2. **No unreviewed LLM-generated submissions.** We will close PRs directly that are primarily generated by LLMs or chatbots and submitted without meaningful human review especially "vibe-coded" submissions. +3. **Human-verified testing is required.** Do not submit code that is "theoretically correct but untested." Every PR must include proof of testing (e.g., screenshots, screen recordings, test output logs). Very important! +4. **AI-assisted drafts are acceptable for issues, discussions, and prototypes**, but they must be reviewed and edited by a human to reduce verbosity and noise. + +### Enforcement: Grounds for Immediate Ban + +The following abusive behaviors will result in an immediate ban (PR submission privileges revoked): + +1. **Inauthentic contribution activity.** Using AI tools to artificially inflate open-source contribution metrics for personal or commercial gain. +2. **Bulk, low-quality, irrelevant, or misleading AI-generated content.** + +--- + ## Join Our Community 🌍 ### Developer Meeting Time & Link 💻 -- English speakers: Mondays at 8 PM PDT. Join via Discord: - [Meeting Link](https://meet.google.com/sez-aomy-ebm?authuser=0&hs=122&ijlm=1753634732982) - Chinese Speakers: Mondays at 9 PM UTC+8. Join via TecentMeeting: [Meeting Link](https://meeting.tencent.com/dm/057wap1eeCSY) @@ -63,8 +97,9 @@ contribution you're making: - Add a demo script in the `examples` directory. We're a small team focused on building great things. If you have -something in mind that you'd like to add or modify, opening a pull -request is the ideal way to catch our attention. 🚀 +something in mind that you'd like to add or modify, please first open +an issue or start a discussion to align with the team before submitting +a pull request. 🚀 ### Contributing to Code Reviews 🔍 @@ -86,28 +121,25 @@ our coding standards. - If changes are necessary, the reviewer should leave constructive feedback. - The contributor addresses feedback and updates the PR. - The reviewer re-reviews the updated code. -- Once the code is approved by at least two reviewer, it can be merged into the main branch. +- Once the code is approved by at least two reviewers, it can be merged into the main branch. - Merging should be done by a maintainer or an authorized contributor. #### Code Review Checklist - Functionality - - Correctness: Does the code perform the intended task? Are edge cases handled? - Testing: Is there sufficient test coverage? Do all tests pass? - Security: Are there any security vulnerabilities introduced by the change? - Performance: Does the code introduce any performance regressions? - Code Quality - - Readability: Is the code easy to read and understand? Is it well-commented where necessary? - Maintainability: Is the code structured in a way that makes future changes easy? - Style: Does the code follow the project’s style guidelines? - Currently we use Ruff for format check and take [Google Python Style Guide](%22https://google.github.io/styleguide/pyguide.html%22) as reference. + Currently we use Ruff for format check and take [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html) as reference. - Documentation: Are public methods, classes, and any complex logic well-documented? - Design - - Consistency: Does the code follow established design patterns and project architecture? - Modularity: Are the changes modular and self-contained? Does the code avoid unnecessary duplication? - Dependencies: Are dependencies minimized and used appropriately? @@ -173,7 +205,7 @@ response generation. Defaults to :obj:`OpenAIModel` with #### Naming Principle: Avoid Abbreviations in Naming -- Abbreviations can lead to ambiguity, especially since variable names and code in CAMEL are directly used by agents. +- Abbreviations can lead to ambiguity, especially since variable names and code in Eigent are directly used by agents. - Use clear, descriptive names that convey meaning without requiring additional explanation. This improves both human readability and the agent's ability to interpret the code. Examples: @@ -181,7 +213,7 @@ Examples: - Bad: msg_win_sz - Good: message_window_size -By adhering to this principle, we ensure that CAMEL remains accessible and unambiguous for both developers and AI agents. +By adhering to this principle, we ensure that Eigent remains accessible and unambiguous for both developers and AI agents. ### Board Item Create Workflow 🛠️ @@ -234,7 +266,7 @@ npm install npm run dev # In a separate terminal, start the backend server -cd eigent/server +cd server docker compose up -d # Stream the logs if you needed docker compose logs -f @@ -245,7 +277,7 @@ To run the application locally in developer mode: 1. Configure `.env.development`: - Set `VITE_USE_LOCAL_PROXY=true` - Set `VITE_PROXY_URL=http://localhost:3001` -1. Go to the settings to specify your model key and model type. +2. Go to the settings to specify your model key and model type. ## Common Actions 🔄 diff --git a/backend/.pre-commit-config.yaml b/backend/.pre-commit-config.yaml index b67f11a6..46b88942 100644 --- a/backend/.pre-commit-config.yaml +++ b/backend/.pre-commit-config.yaml @@ -38,8 +38,10 @@ repos: - id: ruff name: Ruff lint (auto-fix) args: [--fix] + exclude: 'benchmark/answer/' - id: ruff-format name: Ruff format + exclude: 'benchmark/answer/' # Security scanning - repo: https://github.com/PyCQA/bandit @@ -56,6 +58,7 @@ repos: hooks: - id: mdformat name: Format Markdown + exclude: 'benchmark/answer/' additional_dependencies: - mdformat-gfm - mdformat_frontmatter diff --git a/backend/app/agent/listen_chat_agent.py b/backend/app/agent/listen_chat_agent.py index 137c7580..6bdb61ca 100644 --- a/backend/app/agent/listen_chat_agent.py +++ b/backend/app/agent/listen_chat_agent.py @@ -608,16 +608,8 @@ class ListenChatAgent(ChatAgent): with set_process_task(self.process_task_id): # Try different invocation paths in order of preference if hasattr(tool, "func") and hasattr(tool.func, "async_call"): - # Case: FunctionTool wrapping an MCP tool - # Check if wrapped tool is sync to avoid run_in_executor - if hasattr(tool, "is_async") and not tool.is_async: - # Sync tool: call directly to preserve ContextVar - result = tool(**args) - if asyncio.iscoroutine(result): - result = await result - else: - # Async tool: use async_call - result = await tool.func.async_call(**args) + # MCP FunctionTool: always use async_call (sync wrapper can timeout) + result = await tool.func.async_call(**args) elif hasattr(tool, "async_call") and callable(tool.async_call): # Case: tool itself has async_call diff --git a/backend/app/agent/toolkit/hybrid_browser_toolkit.py b/backend/app/agent/toolkit/hybrid_browser_toolkit.py index e0e80038..dd9b8d71 100644 --- a/backend/app/agent/toolkit/hybrid_browser_toolkit.py +++ b/backend/app/agent/toolkit/hybrid_browser_toolkit.py @@ -599,6 +599,17 @@ class HybridBrowserToolkit(BaseHybridBrowserToolkit, AbstractToolkit): # Use typing_extensions.TypedDict for Pydantic <3.12 compatibility. return await super().browser_sheet_input(cells=cells) + def get_tools(self): + tools = super().get_tools() + for tool in tools: + if not getattr(tool.func, "__listen_toolkit__", False): + cls_method = getattr(type(self), tool.func.__name__, None) + if cls_method and getattr( + cls_method, "__listen_toolkit__", False + ): + tool.func.__listen_toolkit__ = True + return tools + @classmethod def toolkit_name(cls) -> str: return "Browser Toolkit" diff --git a/backend/app/agent/toolkit/terminal_toolkit.py b/backend/app/agent/toolkit/terminal_toolkit.py index ce35dfcd..e7451dd4 100644 --- a/backend/app/agent/toolkit/terminal_toolkit.py +++ b/backend/app/agent/toolkit/terminal_toolkit.py @@ -41,7 +41,7 @@ logger = logging.getLogger("terminal_toolkit") # App version - should match electron app version # TODO: Consider getting this from a shared config -APP_VERSION = "0.0.82" +APP_VERSION = "0.0.84" def get_terminal_base_venv_path() -> str: diff --git a/backend/app/model/chat.py b/backend/app/model/chat.py index dac358df..58d0e8bf 100644 --- a/backend/app/model/chat.py +++ b/backend/app/model/chat.py @@ -45,7 +45,7 @@ class QuestionAnalysisResult(BaseModel): McpServers = dict[Literal["mcpServers"], dict[str, dict]] PLATFORM_MAPPING = { - "Z.ai": "openai-compatible-model", + "z.ai": "openai-compatible-model", "ModelArk": "openai-compatible-model", } diff --git a/backend/app/utils/telemetry/workforce_metrics.py b/backend/app/utils/telemetry/workforce_metrics.py index 15b099e2..55035778 100644 --- a/backend/app/utils/telemetry/workforce_metrics.py +++ b/backend/app/utils/telemetry/workforce_metrics.py @@ -168,20 +168,18 @@ def initialize_tracer_provider() -> None: _GLOBAL_TRACER_PROVIDER = provider -def get_tracer_provider() -> TracerProvider: +def get_tracer_provider() -> TracerProvider | None: """Get the global TracerProvider instance. Returns: - TracerProvider: The global tracer provider - - Raises: - RuntimeError: If called before initialization + TracerProvider if initialized, None otherwise """ if _GLOBAL_TRACER_PROVIDER is None: - raise RuntimeError( + logger.warning( "TracerProvider not initialized. " "Call initialize_tracer_provider() during app startup." ) + return None return _GLOBAL_TRACER_PROVIDER @@ -258,22 +256,28 @@ class WorkforceMetricsCallback(WorkforceMetrics): # Get the global shared tracer provider # This ensures only one BatchSpanProcessor is running provider = get_tracer_provider() - - # Get tracer from the shared provider - # Use CAMEL version for instrumentation versioning - self.tracer = provider.get_tracer( - TRACER_NAME_WORKFORCE, camel.__version__ - ) - self.root_span = self.tracer.start_span( - f"{SPAN_WORKFORCE_EXECUTION}:{task_id}" - ) - # Langfuse-specific attributes - self.root_span.set_attribute(ATTR_LANGFUSE_SESSION_ID, project_id) - tags = json.dumps(DEFAULT_LANGFUSE_TAGS.copy()) - self.root_span.set_attribute(ATTR_LANGFUSE_TAGS, tags) - # Custom attributes - self.root_span.set_attribute(ATTR_PROJECT_ID, project_id) - self.root_span.set_attribute(ATTR_TASK_ID, task_id) + if provider is None: + # TracerProvider not initialized (e.g., app startup not + # completed or running in test environment) + self.enabled = False + else: + # Get tracer from the shared provider + # Use CAMEL version for instrumentation versioning + self.tracer = provider.get_tracer( + TRACER_NAME_WORKFORCE, camel.__version__ + ) + self.root_span = self.tracer.start_span( + f"{SPAN_WORKFORCE_EXECUTION}:{task_id}" + ) + # Langfuse-specific attributes + self.root_span.set_attribute( + ATTR_LANGFUSE_SESSION_ID, project_id + ) + tags = json.dumps(DEFAULT_LANGFUSE_TAGS.copy()) + self.root_span.set_attribute(ATTR_LANGFUSE_TAGS, tags) + # Custom attributes + self.root_span.set_attribute(ATTR_PROJECT_ID, project_id) + self.root_span.set_attribute(ATTR_TASK_ID, task_id) # Track active spans for task execution self.task_spans = {} diff --git a/backend/benchmark/.env.example b/backend/benchmark/.env.example new file mode 100644 index 00000000..45fd5ee8 --- /dev/null +++ b/backend/benchmark/.env.example @@ -0,0 +1,4 @@ +BENCHMARK_MODEL_PLATFORM="openai" +BENCHMARK_MODEL_TYPE="gpt-5.2" +BENCHMARK_API_KEY="" +BENCHMARK_API_URL="https://api.openai.com/v1" diff --git a/backend/benchmark/README.md b/backend/benchmark/README.md index 29caa560..71d4a6eb 100644 --- a/backend/benchmark/README.md +++ b/backend/benchmark/README.md @@ -76,7 +76,29 @@ The `metadata` field (optional) provides information about the benchmark: - `description`: Brief explanation of what skills or capabilities the benchmark tests - `tags`: Array of keywords for filtering and organization -`model_platform` and `model_type` default to `"openai"` and `"gpt-4o"`. `api_key` defaults to `$OPENAI_API_KEY`. Set `api_url` for custom endpoints. +The `model_kwargs` field is optional. Defaults come from `BENCHMARK_*` environment variables (see below), falling back to `openai` / `gpt-5.2` / `$OPENAI_API_KEY`. Per-benchmark JSON values override the environment defaults. + +### Custom model providers + +You can override the model for all benchmarks via environment variables (see `.env.example`): + +```bash +export BENCHMARK_MODEL_PLATFORM="openai-compatible-model" +export BENCHMARK_MODEL_TYPE="" +export BENCHMARK_API_KEY="" +export BENCHMARK_API_URL="" +``` + +| Variable | Default | Description | +| -------------------------- | --------------------------- | --------------------------------------------------------------------------- | +| `BENCHMARK_MODEL_PLATFORM` | `openai` | Provider name. Use `openai-compatible-model` for any OpenAI-compatible API. | +| `BENCHMARK_MODEL_TYPE` | `gpt-5.2` | Model identifier passed to the provider. | +| `BENCHMARK_API_KEY` | `$OPENAI_API_KEY` | API key for the provider. | +| `BENCHMARK_API_URL` | `https://api.openai.com/v1` | Base URL for the provider's API. | + +> **Important:** If the model is served through an OpenAI-compatible API (e.g. DeepSeek, MiniMax, Ollama, vLLM, LiteLLM, or any other non-OpenAI provider), set `BENCHMARK_MODEL_PLATFORM` to `openai-compatible-model` — **not** `openai`. The `openai` platform value is reserved for the official OpenAI API only. + +To override a single benchmark, add `model_kwargs` to its JSON config — these take priority over environment variables. 2. Create `benchmark/checker/.py` with a `check(working_directory: str) -> bool` function. diff --git a/backend/benchmark/__init__.py b/backend/benchmark/__init__.py index 3a4d90c0..fa7455a0 100644 --- a/backend/benchmark/__init__.py +++ b/backend/benchmark/__init__.py @@ -11,4 +11,3 @@ # See the License for the specific language governing permissions and # limitations under the License. # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= - diff --git a/backend/benchmark/answer/0/hello_world.py b/backend/benchmark/answer/0/hello_world.py new file mode 100644 index 00000000..2ece742a --- /dev/null +++ b/backend/benchmark/answer/0/hello_world.py @@ -0,0 +1,25 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= + +@lambda _: _() +class _: + def __format__(_, __): + _.__class__._ = property(lambda _: print(__)) + return "" + + +def __() -> f"{_:Hello, WORLD!}": ... + + +_._ diff --git a/backend/benchmark/answer/1/python313_features.md b/backend/benchmark/answer/1/python313_features.md new file mode 100644 index 00000000..e138a906 --- /dev/null +++ b/backend/benchmark/answer/1/python313_features.md @@ -0,0 +1,7 @@ +# warnings + +PEP 702: The new `warnings.deprecated()` decorator provides a way to communicate deprecations to a static type checker and to warn on usage of deprecated classes and functions. A `DeprecationWarning` may also be emitted when a decorated function or class is used at runtime. (Contributed by Jelle Zijlstra in `gh-104003`.) + +# multiprocessing + +The default number of worker threads and processes is now selected using `os.process_cpu_count()` instead of `os.cpu_count()`. (Contributed by Victor Stinner in `gh-109649`.) diff --git a/backend/benchmark/answer/2/yc_w25_b2b_ai.csv b/backend/benchmark/answer/2/yc_w25_b2b_ai.csv new file mode 100644 index 00000000..ed817330 --- /dev/null +++ b/backend/benchmark/answer/2/yc_w25_b2b_ai.csv @@ -0,0 +1,77 @@ +company_name,product_description,ai_category +fira,Agentic AI platform for investment firms,ai-fintech +assistant-ui,Open-source React.js library for AI chat,ai-developer-tools +artifact,Collaborative AI-native IDE for hardware engineers,ai-developer-tools +axal,AI observability for modular codebase architecture,ai-developer-tools +trainloop,Reasoning fine-tuning platform for AI models,ai-infrastructure +tally,AI agents for accounting firms automating repetitive tasks,ai-agents +sammy labs,AI that maps every click path in software for user onboarding,ai-customer-support +mercura,AI quoting for distributors and manufacturers,ai-sales +cedar,In-product AI copilot for any app,ai-productivity +browser use,Open-source web agents automating browser workflows,ai-agents +tamlabs,AI-native document editor for Microsoft Word,ai-productivity +copycat,Next-gen RPA powered by browser agents,ai-agents +wildcard,Make APIs work for AI agents,ai-infrastructure +mastra,JavaScript framework for building AI agents,ai-developer-tools +afterquery,High-quality datasets and benchmarks for AI model training,ai-data +fuse ai,AI agents to replace Salesforce,ai-sales +peppr,Self-improving knowledge base synthesizing company data,ai-productivity +sennu ai,AI agents automating the tech consulting market,ai-agents +mesh,AI finance co-worker providing real-time insights,ai-fintech +outlit,AI agents for enterprise deal creation,ai-sales +tire swing,AI for healthcare compliance,ai-healthcare +calltree ai,Enterprise-grade AI support reps for call centers,ai-customer-support +operand,B2B knowledge management platform with AI search,ai-data +gulp information services,Real-time self-improvement infrastructure for AI agents,ai-infrastructure +zeroentropy,High accuracy search API over unstructured data,ai-infrastructure +cardamon,AI compliance co-pilot for regulated financial businesses,ai-fintech +tergle,AI agents for audit workflows,ai-fintech +carecycle,Voice AI teams for Medicare agencies,ai-customer-support +sift dev,AI-powered fraud decisioning for digital businesses,ai-security +maive,AI-native manufacturing execution system for factory operations,ai-other +weave,AI to measure and analyze engineering work,ai-analytics +caseflood,AI inbound sales team for law firms,ai-legal +tejas ai,Risk decisioning platform for banks powered by AI,ai-fintech +vora ai,AI recruiter for hiring managers,ai-hr +a0.dev,AI-powered mobile app builder,ai-coding +general agency company,AI coworkers that can learn and act like humans,ai-agents +a1base,Twilio for AI agents,ai-infrastructure +verbiflow,AI-powered CRM that finds leads and closes deals,ai-sales +contrario,Fully autonomous AI recruiting agency,ai-hr +ovlo,Conversational AI for e-commerce sales,ai-sales +truffle ai,AWS for AI agents,ai-infrastructure +superglue,Self-healing integration agent for enterprise workflows,ai-infrastructure +conntour,AI to monitor thousands of security cameras,ai-security +promptless,AI teammate that auto-updates customer-facing docs,ai-productivity +stamp,AI-native email client for professionals,ai-productivity +guse,Prompt-to-automation platform for business workflows,ai-agents +subimage,AI-powered infrastructure mapping and security platform,ai-security +casixty,Reddit marketing agent for technical audiences,ai-marketing +leaping ai,Self-improving voice AI agents for call center automation,ai-customer-support +vetnio,AI copilot automating admin work for veterinary pros,ai-healthcare +trace,Voice AI customer support for financial services,ai-customer-support +quantstruct,AI documentation engineer for product docs,ai-developer-tools +onlook,AI-powered visual editor for designers,ai-developer-tools +pig,API for automating Windows apps with AI,ai-developer-tools +vantel,AI software for commercial insurance brokers,ai-fintech +agentin ai,AI agents automating enterprise software processes,ai-agents +solidroad,AI agents for sales and support team training,ai-customer-support +trata,AI-powered research desk for hedge funds,ai-analytics +sophris,AI engineer for electronic design automation,ai-developer-tools +mundo ai,High quality multilingual training data for AI models,ai-data +athenahq,AI-powered brand discovery optimization for ChatGPT,ai-marketing +lopus ai,AI agents for revenue intelligence,ai-sales +harbera,AI healthcare provider credentialing software,ai-healthcare +augento,Improving AI agents through reinforcement learning,ai-infrastructure +macadamia,AI mechanical engineer that detects and fixes design errors,ai-other +asteroid,Browser agents for regulated industries,ai-agents +gale,AI-powered immigration law firm,ai-legal +olive,Build internal tools with natural language and AI,ai-developer-tools +cuckoo labs,Real-time AI translator for sales and marketing teams,ai-marketing +mosaic,AI agents for video editing workflows,ai-agents +oki,Track company progress with AI analytics,ai-analytics +amby health,AI copilot for ambulance agencies,ai-healthcare +g lnk,AI collaboration platform for healthcare organizations,ai-healthcare +artificial societies,AI simulation of target audiences for marketing predictions,ai-marketing +overstand labs,AI insights from customer communications across channels,ai-analytics +lucidic ai,Analytics and simulation tools for AI agents,ai-analytics diff --git a/backend/benchmark/checker/0.py b/backend/benchmark/checker/0.py index 1f4225f5..d584b969 100644 --- a/backend/benchmark/checker/0.py +++ b/backend/benchmark/checker/0.py @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= -"""Checker for benchmark 0: hello_world.py should print 'Hello, World!'""" +"""Checker for benchmark 0: hello_world.py should print 'Hello, WORLD!'""" import subprocess import sys @@ -33,11 +33,11 @@ def check(working_directory: str) -> bool: ) output = result.stdout.strip() - if output == "Hello, World!": + if output == "Hello, WORLD!": print("PASS") return True else: - print(f"FAIL: expected 'Hello, World!', got '{output}'") + print(f"FAIL: expected 'Hello, WORLD!', got '{output}'") return False diff --git a/backend/benchmark/checker/1.py b/backend/benchmark/checker/1.py new file mode 100644 index 00000000..5afeecd1 --- /dev/null +++ b/backend/benchmark/checker/1.py @@ -0,0 +1,61 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +"""Checker for benchmark 1: python313_features.md with warnings and +multiprocessing sections.""" + +import re +import sys +from pathlib import Path + + +def check(working_directory: str) -> bool: + md_file = Path(working_directory) / "python313_features.md" + + if not md_file.exists(): + print(f"FAIL: {md_file} does not exist") + return False + + content = md_file.read_text() + + if len(content.strip()) < 50: + print("FAIL: file content is too short") + return False + + # Check for at least 2 heading sections (# warnings, # multiprocessing) + h1_sections = re.findall(r"^# .+", content, re.MULTILINE) + if len(h1_sections) < 2: + print( + f"FAIL: expected at least 2 # sections, found {len(h1_sections)}" + ) + return False + + lower = content.lower() + if "warnings" not in lower: + print("FAIL: missing warnings section") + return False + + if "multiprocessing" not in lower: + print("FAIL: missing multiprocessing section") + return False + + print("PASS") + return True + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + success = check(sys.argv[1]) + sys.exit(0 if success else 1) diff --git a/backend/benchmark/checker/2.py b/backend/benchmark/checker/2.py new file mode 100644 index 00000000..dc43abf5 --- /dev/null +++ b/backend/benchmark/checker/2.py @@ -0,0 +1,92 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +"""Checker for benchmark 2: yc_w25_b2b_ai.csv with B2B AI companies.""" + +import csv +import sys +from pathlib import Path + +VALID_CATEGORIES = { + "ai-agents", + "ai-infrastructure", + "ai-developer-tools", + "ai-analytics", + "ai-security", + "ai-healthcare", + "ai-sales", + "ai-productivity", + "ai-customer-support", + "ai-coding", + "ai-data", + "ai-fintech", + "ai-legal", + "ai-hr", + "ai-marketing", + "ai-other", +} + +REQUIRED_COLUMNS = {"company_name", "product_description", "ai_category"} + + +def check(working_directory: str) -> bool: + csv_file = Path(working_directory) / "yc_w25_b2b_ai.csv" + + if not csv_file.exists(): + print(f"FAIL: {csv_file} does not exist") + return False + + with open(csv_file, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + headers = set(reader.fieldnames or []) + + missing = REQUIRED_COLUMNS - headers + if missing: + print(f"FAIL: missing columns: {missing}") + return False + + rows = list(reader) + + if len(rows) < 5: + print(f"FAIL: expected at least 5 companies, got {len(rows)}") + return False + + for i, row in enumerate(rows): + name = row.get("company_name", "") + if name != name.lower(): + print(f"FAIL: row {i}: company_name '{name}' is not lowercase") + return False + + desc = row.get("product_description", "") + if len(desc) > 100: + print( + f"FAIL: row {i}: product_description exceeds 100 chars " + f"({len(desc)})" + ) + return False + + cat = row.get("ai_category", "") + if cat not in VALID_CATEGORIES: + print(f"FAIL: row {i}: invalid ai_category '{cat}'") + return False + + print("PASS") + return True + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + success = check(sys.argv[1]) + sys.exit(0 if success else 1) diff --git a/backend/benchmark/dataset/0.json b/backend/benchmark/dataset/0.json index e1074cf3..25dec2e1 100644 --- a/backend/benchmark/dataset/0.json +++ b/backend/benchmark/dataset/0.json @@ -1,18 +1,20 @@ { "metadata": { "difficulty": "easy", - "description": "Google a specific blog post on mathspp.com about obfuscated Python, read and understand the code tricks, then faithfully reproduce the exact program as hello_world.py.", - "tags": ["browser", "coding", "python", "target-searching"] + "description": "1) search tool usage and choosing the appropriate website from results, 2) interpreting advanced obfuscated Python code patterns (requires deep coding comprehension), 3) strict instruction following with implicit output modification instead of directly copying code from the website.", + "tags": [ + "instruction-following", + "browser", + "coding", + "python", + "target-searching" + ] }, "data": { "name": "0", - "question": "Google search 'The most obscure Hello, world! program', choose the link from the website mathspp, read the page, and write a Python script named 'hello_world.py' that faithfully reproduces the exact obfuscated Hello World program shown on that page. Do not simplify or rewrite it — copy the same structure, tricks, and naming conventions used by the author. The script must print 'Hello, World!' when run.", + "question": "Find 'obscure hello world program' from mathspp, read the page, and write a Python script named 'hello_world.py' that faithfully reproduces the obfuscated Hello World program shown on that page. Do not simplify or rewrite it, just use the same structure, tricks, and naming conventions used by the author. Notice that the script MUST print 'Hello, WORLD!' when run.", "env": {} }, - "model_kwargs": { - "model_platform": "openai", - "model_type": "gpt-5.2" - }, "tests": { "checker": ["benchmark/checker/0.py"], "grader": ["benchmark/grader/0.py"] diff --git a/backend/benchmark/dataset/1.json b/backend/benchmark/dataset/1.json new file mode 100644 index 00000000..01dec03a --- /dev/null +++ b/backend/benchmark/dataset/1.json @@ -0,0 +1,22 @@ +{ + "metadata": { + "difficulty": "easy", + "description": "1) agent autonomously triggers search/browser to retrieve real data instead of hallucinating, 2) browser use with scrolling to locate specific modules, 3) instruction following for file creation with specific name and format.", + "tags": [ + "browser", + "research", + "markdown", + "instruction-following", + "code-related" + ] + }, + "data": { + "name": "1", + "question": "Find what's new in Python 3.13 for the `warnings` and `multiprocessing` modules. Create a markdown file named 'python313_features.md' with each module name as a heading (#) and the exact text description from the official documentation as the content below each heading. Only make sure any code or script references are wrapped in backticks.", + "env": {} + }, + "tests": { + "checker": ["benchmark/checker/1.py"], + "grader": ["benchmark/grader/1.py"] + } +} diff --git a/backend/benchmark/dataset/2.json b/backend/benchmark/dataset/2.json new file mode 100644 index 00000000..0d99e688 --- /dev/null +++ b/backend/benchmark/dataset/2.json @@ -0,0 +1,16 @@ +{ + "metadata": { + "difficulty": "medium", + "description": "1) benchmark browser use capability with in-depth browser operations, 2) document generation with strict format constraints on the CSV generation, 3) implicit classification for each company's category.", + "tags": ["browser", "research", "data-extraction", "csv", "multi-step"] + }, + "data": { + "name": "2", + "question": "Identify all B2B companies in the Y Combinator Winter 2025 batch whose product is related to AI. After you obtain the full company list, independently investigate each company's product information in detail and consolidate all findings into a clean, well-structured CSV file named 'yc_w25_b2b_ai.csv' with columns: company_name (in lowercase), product_description (100 chars max), ai_category (use a consistent set of values including 'ai-agents', 'ai-infrastructure', 'ai-developer-tools', 'ai-analytics', 'ai-security', 'ai-healthcare', 'ai-sales', 'ai-productivity', 'ai-customer-support', 'ai-coding', 'ai-data', 'ai-fintech', 'ai-legal', 'ai-hr', 'ai-marketing', and 'ai-other').", + "env": {} + }, + "tests": { + "checker": ["benchmark/checker/2.py"], + "grader": ["benchmark/grader/2.py"] + } +} diff --git a/backend/benchmark/environment.py b/backend/benchmark/environment.py index 8bdaa860..6c61ca63 100644 --- a/backend/benchmark/environment.py +++ b/backend/benchmark/environment.py @@ -16,11 +16,16 @@ import json import os from pathlib import Path -from dotenv import dotenv_values +from dotenv import dotenv_values, load_dotenv from pydantic import BaseModel from app.model.chat import Chat, McpServers +# Load benchmark env files (.env takes priority over .env.development) +_BENCHMARK_DIR = Path(__file__).resolve().parent +load_dotenv(_BENCHMARK_DIR / ".env") +load_dotenv(_BENCHMARK_DIR / ".env.development") + class Env(BaseModel): # TODO: add more environment variables @@ -37,10 +42,12 @@ class Tests(BaseModel): class ModelKwargs(BaseModel): - model_platform: str = "openai" - model_type: str = "gpt-4o" - api_key: str | None = None - api_url: str | None = None + model_platform: str = os.environ.get("BENCHMARK_MODEL_PLATFORM", "openai") + model_type: str = os.environ.get("BENCHMARK_MODEL_TYPE", "gpt-5.2") + api_key: str | None = os.environ.get("BENCHMARK_API_KEY") + api_url: str = os.environ.get( + "BENCHMARK_API_URL", "https://api.openai.com/v1" + ) class Metadata(BaseModel): @@ -64,7 +71,11 @@ class BenchmarkData(BaseModel): server_env.update(env_vars) server_cfg["env"] = server_env - api_key = model_kwargs.api_key or os.environ["OPENAI_API_KEY"] + api_key = ( + model_kwargs.api_key + or os.environ.get("BENCHMARK_API_KEY") + or os.environ["OPENAI_API_KEY"] + ) self._chat = Chat( task_id=f"benchmark_{self.name}", diff --git a/backend/benchmark/grader/0.py b/backend/benchmark/grader/0.py index 8c74c7df..a707990a 100644 --- a/backend/benchmark/grader/0.py +++ b/backend/benchmark/grader/0.py @@ -16,6 +16,7 @@ import ast import json import sys from pathlib import Path +from urllib.parse import urlparse BROWSER_LOG_DIR = Path(__file__).resolve().parents[2] / "browser_log" @@ -63,63 +64,103 @@ def grade(working_directory: str) -> tuple[int, int]: # 1. Visited mathspp.com blog page visited = _visited_urls() if any( - "mathspp.com/blog/the-most-obscure-hello-world" in u for u in visited + (p := urlparse(u)).hostname is not None + and ( + p.hostname == "mathspp.com" or p.hostname.endswith(".mathspp.com") + ) + and "/blog/the-most-obscure-hello-world" in p.path + for u in visited ): completed += 1 + else: + print( + "MISS [1]: did not visit " + "mathspp.com/blog/the-most-obscure-hello-world" + ) script = Path(working_directory) / "hello_world.py" if not script.exists(): + print("MISS [2-7]: hello_world.py does not exist") return completed, total source = script.read_text() tree = ast.parse(source) - # 1. Uses a decorator that immediately instantiates a class + # 2. Uses a decorator that immediately instantiates a class + found = False for node in ast.walk(tree): if isinstance(node, ast.ClassDef) and node.decorator_list: + found = True completed += 1 break + if not found: + print("MISS [2]: no decorated class definition found") - # 2. Overloads __format__ + # 3. Overloads __format__ + found = False for node in ast.walk(tree): if isinstance(node, ast.FunctionDef) and node.name == "__format__": + found = True completed += 1 break + if not found: + print("MISS [3]: no __format__ method found") - # 3. Uses property injection on the class + # 4. Uses property injection on the class if "property" in source: completed += 1 + else: + print("MISS [4]: no 'property' usage found in source") - # 4. __format__ returns an empty string + # 5. __format__ returns an empty string + found = False for node in ast.walk(tree): if isinstance(node, ast.FunctionDef) and node.name == "__format__": for child in ast.walk(node): - if isinstance(child, ast.Return - ) and isinstance(child.value, ast.Constant): + if isinstance(child, ast.Return) and isinstance( + child.value, ast.Constant + ): if child.value.value == "": + found = True completed += 1 break break + if not found: + print('MISS [5]: __format__ does not return an empty string ""') - # 5. Uses function annotation to trigger f-string evaluation + # 6. Uses function annotation to trigger f-string evaluation + found = False for node in ast.walk(tree): if isinstance(node, ast.FunctionDef) and node.returns is not None: if isinstance(node.returns, ast.JoinedStr): + found = True completed += 1 break + if not found: + print( + "MISS [6]: no function annotation with f-string (JoinedStr) found" + ) - # 6. Uses _ as both class name and instance variable + # 7. Uses _ as both class name and instance variable has_class_underscore = False has_attr_underscore = False for node in ast.walk(tree): if isinstance(node, ast.ClassDef) and node.name == "_": has_class_underscore = True - if isinstance(node, - ast.Attribute) and isinstance(node.value, ast.Name): + if isinstance(node, ast.Attribute) and isinstance( + node.value, ast.Name + ): if node.value.id == "_" and node.attr == "_": has_attr_underscore = True if has_class_underscore and has_attr_underscore: completed += 1 + else: + parts = [] + if not has_class_underscore: + parts.append("no class named '_'") + if not has_attr_underscore: + parts.append("no _._ attribute access") + print(f"MISS [7]: {', '.join(parts)}") return completed, total diff --git a/backend/benchmark/grader/1.py b/backend/benchmark/grader/1.py new file mode 100644 index 00000000..4e63b002 --- /dev/null +++ b/backend/benchmark/grader/1.py @@ -0,0 +1,139 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +"""Grader for benchmark 1: evaluate python313_features.md milestones.""" + +import json +import re +import sys +from pathlib import Path +from urllib.parse import urlparse + +BROWSER_LOG_DIR = Path(__file__).resolve().parents[2] / "browser_log" + + +def _visited_urls() -> set[str]: + """Extract all URLs seen in browser logs.""" + urls: set[str] = set() + if not BROWSER_LOG_DIR.exists(): + return urls + for log_file in BROWSER_LOG_DIR.glob("hybrid_browser_toolkit_ws_*.log"): + decoder = json.JSONDecoder() + raw = log_file.read_text() + pos = 0 + while pos < len(raw): + stripped = raw[pos:].lstrip() + if not stripped: + break + pos = len(raw) - len(stripped) + try: + obj, end = decoder.raw_decode(raw, pos) + pos = end + if not isinstance(obj, dict): + continue + action = obj.get("action", "") + if action == "visit_page": + args = obj.get("inputs", {}).get("args", []) + if args: + urls.add(args[0]) + except (json.JSONDecodeError, ValueError): + pos += 1 + return urls + + +def grade(working_directory: str) -> tuple[int, int]: + total = 7 + completed = 0 + + md_file = Path(working_directory) / "python313_features.md" + + # 1. Visited the Python 3.13 What's New page + visited = _visited_urls() + if any( + (p := urlparse(u)).hostname is not None + and ( + p.hostname == "docs.python.org" + or p.hostname.endswith(".docs.python.org") + ) + and "3.13" in p.path + for u in visited + ): + completed += 1 + else: + print("MISS [1]: did not visit docs.python.org/3.13 What's New page") + + if not md_file.exists(): + print("MISS [2-7]: python313_features.md does not exist") + return completed, total + + content = md_file.read_text() + lower = content.lower() + + # 2. Has a # warnings heading + if re.search(r"^# warnings\b", content, re.MULTILINE | re.IGNORECASE): + completed += 1 + else: + print("MISS [2]: no '# warnings' heading found") + + # 3. Has a # multiprocessing heading + if re.search( + r"^# multiprocessing\b", content, re.MULTILINE | re.IGNORECASE + ): + completed += 1 + else: + print("MISS [3]: no '# multiprocessing' heading found") + + # 4. Mentions warnings.deprecated() with backticks + if "`warnings.deprecated()`" in content or ( + "warnings.deprecated" in lower and "`" in content + ): + completed += 1 + else: + print( + "MISS [4]: missing `warnings.deprecated()` " + "(expected backtick-wrapped reference)" + ) + + # 5. Mentions PEP 702 + if "pep 702" in lower: + completed += 1 + else: + print("MISS [5]: no mention of PEP 702") + + # 6. Mentions os.process_cpu_count() with backticks + if "`os.process_cpu_count()`" in content or ( + "os.process_cpu_count" in lower and "`" in content + ): + completed += 1 + else: + print( + "MISS [6]: missing `os.process_cpu_count()` " + "(expected backtick-wrapped reference)" + ) + + # 7. Mentions os.cpu_count() (the old default being replaced) + if "os.cpu_count" in lower: + completed += 1 + else: + print("MISS [7]: no mention of os.cpu_count()") + + return completed, total + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + completed, total = grade(sys.argv[1]) + print(f"{completed}/{total}") + sys.exit(0 if completed == total else 1) diff --git a/backend/benchmark/grader/2.py b/backend/benchmark/grader/2.py new file mode 100644 index 00000000..3132b8f4 --- /dev/null +++ b/backend/benchmark/grader/2.py @@ -0,0 +1,261 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +"""Grader for benchmark 2: evaluate yc_w25_b2b_ai.csv milestones.""" + +import csv +import json +import sys +from collections import Counter +from pathlib import Path +from urllib.parse import urlparse + +BROWSER_LOG_DIR = Path(__file__).resolve().parents[2] / "browser_log" +ANSWER_CSV = ( + Path(__file__).resolve().parents[1] / "answer" / "2" / "yc_w25_b2b_ai.csv" +) + +VALID_CATEGORIES = { + "ai-agents", + "ai-infrastructure", + "ai-developer-tools", + "ai-analytics", + "ai-security", + "ai-healthcare", + "ai-sales", + "ai-productivity", + "ai-customer-support", + "ai-coding", + "ai-data", + "ai-fintech", + "ai-legal", + "ai-hr", + "ai-marketing", + "ai-other", +} + +REQUIRED_COLUMNS = {"company_name", "product_description", "ai_category"} + + +def _visited_urls() -> set[str]: + """Extract all URLs seen in browser logs.""" + urls: set[str] = set() + if not BROWSER_LOG_DIR.exists(): + return urls + for log_file in BROWSER_LOG_DIR.glob("hybrid_browser_toolkit_ws_*.log"): + decoder = json.JSONDecoder() + raw = log_file.read_text() + pos = 0 + while pos < len(raw): + stripped = raw[pos:].lstrip() + if not stripped: + break + pos = len(raw) - len(stripped) + try: + obj, end = decoder.raw_decode(raw, pos) + pos = end + if not isinstance(obj, dict): + continue + action = obj.get("action", "") + if action == "visit_page": + args = obj.get("inputs", {}).get("args", []) + if args: + urls.add(args[0]) + except (json.JSONDecodeError, ValueError): + pos += 1 + return urls + + +def _load_answer() -> tuple[int, Counter]: + """Load expected company count and category distribution from answer CSV.""" + cat_counts: Counter = Counter() + count = 0 + if not ANSWER_CSV.exists(): + return 0, cat_counts + with open(ANSWER_CSV, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + count += 1 + cat = row.get("ai_category", "") + if cat: + cat_counts[cat] += 1 + return count, cat_counts + + +def _category_overlap(expected: Counter, actual: Counter) -> float: + """Compute distribution overlap between expected and actual categories. + + Normalizes both to proportions, then sums min(expected_pct, actual_pct) + for each category. Returns a value between 0.0 and 1.0. + """ + exp_total = sum(expected.values()) + act_total = sum(actual.values()) + if exp_total == 0 or act_total == 0: + return 0.0 + all_cats = set(expected.keys()) | set(actual.keys()) + overlap = 0.0 + for cat in all_cats: + exp_pct = expected.get(cat, 0) / exp_total + act_pct = actual.get(cat, 0) / act_total + overlap += min(exp_pct, act_pct) + return overlap + + +def grade(working_directory: str) -> tuple[int, int]: + total = 10 + completed = 0 + + csv_file = Path(working_directory) / "yc_w25_b2b_ai.csv" + + # 1. Visited YC W25 companies page + visited = _visited_urls() + if any( + (p := urlparse(u)).hostname is not None + and ( + p.hostname == "ycombinator.com" + or p.hostname.endswith(".ycombinator.com") + ) + and "W25" in u + for u in visited + ): + completed += 1 + else: + print("MISS [1]: did not visit ycombinator.com W25 companies page") + + # 2. CSV file exists + if not csv_file.exists(): + print(f"MISS [2-10]: {csv_file.name} does not exist") + return completed, total + completed += 1 + + try: + with open(csv_file, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + headers = set(reader.fieldnames or []) + rows = list(reader) + except Exception as e: + print(f"MISS [3-10]: failed to parse CSV: {e}") + return completed, total + + # 3. Has correct columns + if REQUIRED_COLUMNS.issubset(headers): + completed += 1 + else: + missing = REQUIRED_COLUMNS - headers + print(f"MISS [3]: missing columns: {missing}") + + # 4. All company_name values are lowercase + non_lower = [ + row.get("company_name", "") + for row in rows + if row.get("company_name", "") != row.get("company_name", "").lower() + ] + if rows and not non_lower: + completed += 1 + else: + print( + f"MISS [4]: {len(non_lower)} company_name(s) not lowercase, " + f"e.g. {non_lower[:3]}" + ) + + # 5. All product_description values are <= 100 chars + too_long = [ + (i, len(row.get("product_description", ""))) + for i, row in enumerate(rows) + if len(row.get("product_description", "")) > 100 + ] + if rows and not too_long: + completed += 1 + else: + print( + f"MISS [5]: {len(too_long)} description(s) exceed 100 chars, " + f"e.g. row {too_long[0][0]} has {too_long[0][1]} chars" + if too_long + else "MISS [5]: no rows found" + ) + + # 6. All ai_category values are valid enums + invalid_cats = [ + (i, row.get("ai_category", "")) + for i, row in enumerate(rows) + if row.get("ai_category", "") not in VALID_CATEGORIES + ] + if rows and not invalid_cats: + completed += 1 + else: + print( + f"MISS [6]: {len(invalid_cats)} invalid category value(s), " + f"e.g. row {invalid_cats[0][0]}: '{invalid_cats[0][1]}'" + if invalid_cats + else "MISS [6]: no rows found" + ) + + # Load answer for approximate matching + expected_count, expected_cats = _load_answer() + actual_count = len(rows) + + # 7-8. Company count within 50% → +1, within 25% → +1 more + if expected_count > 0 and actual_count > 0: + ratio = actual_count / expected_count + if 0.5 <= ratio <= 1.5: + completed += 1 + if 0.75 <= ratio <= 1.25: + completed += 1 + else: + print( + f"MISS [8]: count {actual_count} is within 50% but not " + f"25% of expected {expected_count} (ratio={ratio:.2f})" + ) + else: + print( + f"MISS [7-8]: count {actual_count} is not within 50% of " + f"expected {expected_count} (ratio={ratio:.2f})" + ) + else: + print( + f"MISS [7-8]: expected_count={expected_count}, " + f"actual_count={actual_count}" + ) + + # 9-10. Category distribution overlap >= 50% → +1, >= 75% → +1 more + actual_cats: Counter = Counter() + for row in rows: + cat = row.get("ai_category", "") + if cat: + actual_cats[cat] += 1 + overlap = _category_overlap(expected_cats, actual_cats) + if overlap >= 0.50: + completed += 1 + if overlap >= 0.75: + completed += 1 + else: + print( + f"MISS [10]: category overlap {overlap:.2%} >= 50% but < 75%" + ) + else: + print( + f"MISS [9-10]: category overlap {overlap:.2%} < 50%. " + f"Expected dist: {dict(expected_cats)}, " + f"actual dist: {dict(actual_cats)}" + ) + + return completed, total + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + completed, total = grade(sys.argv[1]) + print(f"{completed}/{total}") + sys.exit(0 if completed == total else 1) diff --git a/backend/benchmark/main.py b/backend/benchmark/main.py index 8151a8d7..21cf77e5 100644 --- a/backend/benchmark/main.py +++ b/backend/benchmark/main.py @@ -15,21 +15,21 @@ import asyncio import csv import importlib.util +import shutil import sys from datetime import datetime from pathlib import Path from benchmark.client import BenchmarkClient -from benchmark.environment import BenchmarkConfig +from benchmark.environment import BenchmarkConfig, ModelKwargs DATASET_DIR = Path(__file__).parent / "dataset" RESULTS_DIR = Path(__file__).parent +BROWSER_LOG_DIR = Path(__file__).parent.parent / "browser_log" async def run_benchmark( - client: BenchmarkClient, - benchmark_path: Path, - verbose: bool = False + client: BenchmarkClient, benchmark_path: Path, verbose: bool = False ) -> dict: """Load a benchmark config and run it. @@ -43,15 +43,28 @@ async def run_benchmark( dict: Results including benchmark name, model, checker and grader outcomes. """ + # Clear browser logs so previous benchmark visits don't leak into this run + if BROWSER_LOG_DIR.exists(): + for log_file in BROWSER_LOG_DIR.iterdir(): + if log_file.is_file(): + log_file.unlink() + config = BenchmarkConfig.from_json(benchmark_path) data = config.data model_kwargs = config.model_kwargs model = f"{model_kwargs.model_platform}/{model_kwargs.model_type}" + + # Clear previous working directory so results are from a fresh run + working_dir_path = Path(data.get_working_directory(model_kwargs)) + if working_dir_path.exists(): + shutil.rmtree(working_dir_path) + working_dir_path.mkdir(parents=True, exist_ok=True) + print(f"--- Benchmark: {data.name} ---") print(f"Question: {data.question}") print(f"Model: {model}") - print(f"Working directory: {data.get_working_directory(model_kwargs)}") + print(f"Working directory: {working_dir_path}") print(f"Checkers: {config.tests.checker}") print(f"Graders: {config.tests.grader}") @@ -133,6 +146,13 @@ async def main() -> None: print(f"No benchmark configs found in {DATASET_DIR}") return + defaults = ModelKwargs() + print("=== Benchmark Model Configuration ===") + print(f" Platform: {defaults.model_platform}") + print(f" Model: {defaults.model_type}") + print(f" API URL: {defaults.api_url}") + print() + all_results = [] async with BenchmarkClient() as client: for path in paths: diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 8a0c5ccb..2cbbbf34 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = ">=3.11,<3.12" dependencies = [ "pip>=23.0", - "camel-ai[eigent]==0.2.85a0", + "camel-ai[eigent]==0.2.90a1", "fastapi>=0.115.12", "fastapi-babel>=1.0.0", "uvicorn[standard]>=0.34.2", @@ -38,6 +38,7 @@ dev = [ [tool.ruff] line-length = 79 target-version = "py311" +exclude = ["benchmark/answer"] [tool.ruff.lint] select = [ @@ -70,7 +71,7 @@ quote-style = "double" indent-style = "space" [tool.bandit] -exclude_dirs = ["tests", ".venv", "venv"] +exclude_dirs = ["tests", ".venv", "venv", "benchmark/answer"] skips = [ "B101", # assert_used - OK in non-production code "B105", # hardcoded_password_string - false positive on env var names diff --git a/backend/tests/app/agent/factory/test_browser.py b/backend/tests/app/agent/factory/test_browser.py index e52a5a46..ea564302 100644 --- a/backend/tests/app/agent/factory/test_browser.py +++ b/backend/tests/app/agent/factory/test_browser.py @@ -35,6 +35,9 @@ def test_browser_agent_creation(sample_chat_data): _mod = "app.agent.factory.browser" with ( patch(f"{_mod}.agent_model") as mock_agent_model, + patch( + f"{_mod}.get_working_directory", return_value="/tmp/test_workdir" + ), patch("asyncio.create_task"), patch(f"{_mod}.HumanToolkit") as mock_human_toolkit, patch(f"{_mod}.HybridBrowserToolkit") as mock_browser_toolkit, diff --git a/backend/tests/app/agent/factory/test_developer.py b/backend/tests/app/agent/factory/test_developer.py index 0a4e55ce..597061ce 100644 --- a/backend/tests/app/agent/factory/test_developer.py +++ b/backend/tests/app/agent/factory/test_developer.py @@ -36,6 +36,9 @@ async def test_developer_agent_creation(sample_chat_data): _mod = "app.agent.factory.developer" with ( patch(f"{_mod}.agent_model") as mock_agent_model, + patch( + f"{_mod}.get_working_directory", return_value="/tmp/test_workdir" + ), patch("asyncio.create_task"), patch(f"{_mod}.HumanToolkit") as mock_human_toolkit, patch(f"{_mod}.NoteTakingToolkit") as mock_note_toolkit, @@ -82,6 +85,9 @@ async def test_developer_agent_with_multiple_toolkits(sample_chat_data): _mod = "app.agent.factory.developer" with ( patch(f"{_mod}.agent_model") as mock_agent_model, + patch( + f"{_mod}.get_working_directory", return_value="/tmp/test_workdir" + ), patch("asyncio.create_task"), patch(f"{_mod}.HumanToolkit") as mock_human_toolkit, patch(f"{_mod}.NoteTakingToolkit") as mock_note_toolkit, diff --git a/backend/tests/app/agent/factory/test_document.py b/backend/tests/app/agent/factory/test_document.py index 034a1696..0fa32b99 100644 --- a/backend/tests/app/agent/factory/test_document.py +++ b/backend/tests/app/agent/factory/test_document.py @@ -36,6 +36,9 @@ async def test_document_agent_creation(sample_chat_data): _mod = "app.agent.factory.document" with ( patch(f"{_mod}.agent_model") as mock_agent_model, + patch( + f"{_mod}.get_working_directory", return_value="/tmp/test_workdir" + ), patch("asyncio.create_task"), patch(f"{_mod}.HumanToolkit") as mock_human_toolkit, patch(f"{_mod}.FileToolkit") as mock_file_toolkit, diff --git a/backend/tests/app/agent/factory/test_multi_modal.py b/backend/tests/app/agent/factory/test_multi_modal.py index 308def0a..8e3d145c 100644 --- a/backend/tests/app/agent/factory/test_multi_modal.py +++ b/backend/tests/app/agent/factory/test_multi_modal.py @@ -35,6 +35,9 @@ def test_multi_modal_agent_creation(sample_chat_data): _mod = "app.agent.factory.multi_modal" with ( patch(f"{_mod}.agent_model") as mock_agent_model, + patch( + f"{_mod}.get_working_directory", return_value="/tmp/test_workdir" + ), patch("asyncio.create_task"), patch(f"{_mod}.HumanToolkit") as mock_human_toolkit, patch(f"{_mod}.VideoDownloaderToolkit") as mock_video_toolkit, diff --git a/backend/tests/app/agent/factory/test_social_media.py b/backend/tests/app/agent/factory/test_social_media.py index 56b3ee42..6eb9b98b 100644 --- a/backend/tests/app/agent/factory/test_social_media.py +++ b/backend/tests/app/agent/factory/test_social_media.py @@ -36,6 +36,9 @@ async def test_social_media_agent_creation(sample_chat_data): mod = "app.agent.factory.social_media" with ( patch(f"{mod}.agent_model") as mock_agent_model, + patch( + f"{mod}.get_working_directory", return_value="/tmp/test_workdir" + ), patch("asyncio.create_task"), patch(f"{mod}.WhatsAppToolkit") as mock_whatsapp_toolkit, patch(f"{mod}.TwitterToolkit") as mock_twitter_toolkit, diff --git a/backend/tests/unit/component/test_environment_security.py b/backend/tests/app/component/test_environment.py similarity index 100% rename from backend/tests/unit/component/test_environment_security.py rename to backend/tests/app/component/test_environment.py diff --git a/backend/tests/unit/controller/test_chat_controller.py b/backend/tests/app/controller/test_chat_controller.py similarity index 92% rename from backend/tests/unit/controller/test_chat_controller.py rename to backend/tests/app/controller/test_chat_controller.py index 0cf3f3c7..5366b6dc 100644 --- a/backend/tests/unit/controller/test_chat_controller.py +++ b/backend/tests/app/controller/test_chat_controller.py @@ -13,7 +13,7 @@ # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= import os -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest from fastapi import Response @@ -50,13 +50,14 @@ class TestChatController: with ( patch( - "app.controller.chat_controller.create_task_lock", + "app.controller.chat_controller.get_or_create_task_lock", return_value=mock_task_lock, ), patch( "app.controller.chat_controller.step_solve" ) as mock_step_solve, patch("app.controller.chat_controller.load_dotenv"), + patch("app.controller.chat_controller.set_current_task_id"), patch("pathlib.Path.mkdir"), patch("pathlib.Path.home", return_value=MagicMock()), ): @@ -71,9 +72,6 @@ class TestChatController: assert isinstance(response, StreamingResponse) assert response.media_type == "text/event-stream" - mock_step_solve.assert_called_once_with( - chat_data, mock_request, mock_task_lock - ) @pytest.mark.asyncio async def test_post_chat_sets_environment_variables( @@ -84,13 +82,14 @@ class TestChatController: with ( patch( - "app.controller.chat_controller.create_task_lock", + "app.controller.chat_controller.get_or_create_task_lock", return_value=mock_task_lock, ), patch( "app.controller.chat_controller.step_solve" ) as mock_step_solve, patch("app.controller.chat_controller.load_dotenv"), + patch("app.controller.chat_controller.set_current_task_id"), patch("pathlib.Path.mkdir"), patch("pathlib.Path.home", return_value=MagicMock()), patch.dict(os.environ, {}, clear=True), @@ -133,18 +132,24 @@ class TestChatController: # put_queue is invoked when creating the coroutine passed to asyncio.run mock_task_lock.put_queue.assert_called_once() - def test_improve_chat_task_done_error(self, mock_task_lock): - """Test improvement fails when task is done.""" + def test_improve_chat_task_done_resets_to_confirming(self, mock_task_lock): + """Test improvement when task is done resets status to confirming.""" task_id = "test_task_123" supplement_data = SupplementChat(question="Improve this code") mock_task_lock.status = Status.done - with patch( - "app.controller.chat_controller.get_task_lock", - return_value=mock_task_lock, + with ( + patch( + "app.controller.chat_controller.get_task_lock", + return_value=mock_task_lock, + ), + patch("asyncio.run") as mock_run, ): - with pytest.raises(UserException): - improve(task_id, supplement_data) + response = improve(task_id, supplement_data) + + assert mock_task_lock.status == Status.confirming + assert isinstance(response, Response) + assert response.status_code == 201 def test_supplement_chat_success(self, mock_task_lock): """Test successful chat supplementation.""" @@ -244,16 +249,18 @@ class TestChatControllerIntegration: """Test chat endpoint through FastAPI test client.""" with ( patch( - "app.controller.chat_controller.create_task_lock" + "app.controller.chat_controller.get_or_create_task_lock" ) as mock_create_lock, patch( "app.controller.chat_controller.step_solve" ) as mock_step_solve, patch("app.controller.chat_controller.load_dotenv"), + patch("app.controller.chat_controller.set_current_task_id"), patch("pathlib.Path.mkdir"), patch("pathlib.Path.home", return_value=MagicMock()), ): mock_task_lock = MagicMock() + mock_task_lock.put_queue = AsyncMock() mock_create_lock.return_value = mock_task_lock async def mock_generator(): @@ -455,8 +462,12 @@ class TestChatControllerErrorCases: with ( patch( - "app.controller.chat_controller.create_task_lock" + "app.controller.chat_controller.get_or_create_task_lock" ) as mock_create_lock, + patch( + "app.controller.chat_controller.sanitize_env_path", + return_value="/tmp/fake.env", + ), patch( "app.controller.chat_controller.load_dotenv", side_effect=Exception("Env load failed"), diff --git a/backend/tests/unit/controller/test_model_controller.py b/backend/tests/app/controller/test_model_controller.py similarity index 81% rename from backend/tests/unit/controller/test_model_controller.py rename to backend/tests/app/controller/test_model_controller.py index 1d59d88d..4a17e3d9 100644 --- a/backend/tests/unit/controller/test_model_controller.py +++ b/backend/tests/app/controller/test_model_controller.py @@ -15,6 +15,7 @@ from unittest.mock import MagicMock, patch import pytest +from fastapi import HTTPException from fastapi.testclient import TestClient from app.controller.model_controller import ( @@ -73,11 +74,9 @@ class TestModelController: "app.controller.model_controller.create_agent", side_effect=Exception("Invalid model configuration"), ): - response = await validate_model(request_data) - assert isinstance(response, ValidateModelResponse) - assert response.is_valid is False - assert response.is_tool_calls is False - assert "Invalid model name" in response.message + with pytest.raises(HTTPException) as exc_info: + await validate_model(request_data) + assert exc_info.value.status_code == 400 @pytest.mark.asyncio async def test_validate_model_step_failure(self): @@ -93,12 +92,9 @@ class TestModelController: "app.controller.model_controller.create_agent", return_value=mock_agent, ): - response = await validate_model(request_data) - - assert isinstance(response, ValidateModelResponse) - assert response.is_valid is False - assert response.is_tool_calls is False - assert "API call failed" in response.message + with pytest.raises(HTTPException) as exc_info: + await validate_model(request_data) + assert exc_info.value.status_code == 400 @pytest.mark.asyncio async def test_validate_model_tool_calls_false(self): @@ -130,8 +126,10 @@ class TestModelController: @pytest.mark.asyncio async def test_validate_model_with_minimal_parameters(self): - """Test model validation with minimal parameters.""" - request_data = ValidateModelRequest() # Uses default values + """Test model validation with minimal parameters (no API key).""" + request_data = ( + ValidateModelRequest() + ) # Uses default values, api_key is None mock_agent = MagicMock() mock_response = MagicMock() @@ -144,12 +142,12 @@ class TestModelController: "app.controller.model_controller.create_agent", return_value=mock_agent, ): + # api_key is None by default, which passes the empty string check + # The agent step succeeds, so validation should pass response = await validate_model(request_data) assert isinstance(response, ValidateModelResponse) - assert response.is_valid is False - assert response.is_tool_calls is False - assert response.error_code is not None - assert response.error is not None + assert response.is_valid is True + assert response.is_tool_calls is True @pytest.mark.asyncio async def test_validate_model_no_response(self): @@ -222,13 +220,7 @@ class TestModelControllerIntegration: ): response = client.post("/model/validate", json=request_data) - assert ( - response.status_code == 200 - ) # Returns 200 with error in response body - response_data = response.json() - assert response_data["is_valid"] is False - assert response_data["is_tool_calls"] is False - assert "Invalid model name" in response_data["message"] + assert response.status_code == 400 @pytest.mark.model_backend @@ -267,10 +259,9 @@ class TestModelControllerErrorCases: "app.controller.model_controller.create_agent", side_effect=ValueError("Invalid configuration"), ): - response = await validate_model(request_data) - - assert response.is_valid is False - assert "Invalid configuration" in response.message + with pytest.raises(HTTPException) as exc_info: + await validate_model(request_data) + assert exc_info.value.status_code == 400 @pytest.mark.asyncio async def test_validate_model_with_network_error(self): @@ -288,10 +279,9 @@ class TestModelControllerErrorCases: "app.controller.model_controller.create_agent", return_value=mock_agent, ): - response = await validate_model(request_data) - - assert response.is_valid is False - assert "Network unreachable" in response.message + with pytest.raises(HTTPException) as exc_info: + await validate_model(request_data) + assert exc_info.value.status_code == 400 @pytest.mark.asyncio async def test_validate_model_with_malformed_tool_calls_response(self): @@ -346,36 +336,21 @@ class TestModelControllerErrorCases: api_key="", # Empty API key ) - response = await validate_model(request_data) - - assert response.is_valid is False - assert response.is_tool_calls is False - assert response.message == "Invalid key. Validation failed." - assert response.error_code == "invalid_api_key" - assert response.error is not None - assert response.error["message"] == "Invalid key. Validation failed." - assert response.error["type"] == "invalid_request_error" - assert response.error["code"] == "invalid_api_key" + with pytest.raises(HTTPException) as exc_info: + await validate_model(request_data) + assert exc_info.value.status_code == 400 + detail = exc_info.value.detail + assert detail["error_code"] == "invalid_api_key" @pytest.mark.asyncio async def test_validate_model_invalid_model_type(self): - """Test model validation with invalid model type.""" + """Test model validation with invalid model type raises HTTPException.""" request_data = ValidateModelRequest( model_platform="openai", model_type="INVALID_MODEL_TYPE", api_key="test_key", ) - response = await validate_model(request_data) - assert response.is_valid is False - assert response.is_tool_calls is False - assert response.message == "Invalid model name. Validation failed." - assert response.error_code is not None - assert "model_not_found" in response.error_code - assert response.error is not None - assert ( - response.error["message"] - == "Invalid model name. Validation failed." - ) - assert response.error["type"] == "invalid_request_error" - assert response.error["code"] == "model_not_found" + with pytest.raises(HTTPException) as exc_info: + await validate_model(request_data) + assert exc_info.value.status_code == 400 diff --git a/backend/tests/unit/controller/test_task_controller.py b/backend/tests/app/controller/test_task_controller.py similarity index 100% rename from backend/tests/unit/controller/test_task_controller.py rename to backend/tests/app/controller/test_task_controller.py diff --git a/backend/tests/unit/controller/test_tool_controller.py b/backend/tests/app/controller/test_tool_controller.py similarity index 74% rename from backend/tests/unit/controller/test_tool_controller.py rename to backend/tests/app/controller/test_tool_controller.py index b30acfdf..22b78e8e 100644 --- a/backend/tests/unit/controller/test_tool_controller.py +++ b/backend/tests/app/controller/test_tool_controller.py @@ -15,6 +15,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest +from fastapi import HTTPException from fastapi.testclient import TestClient from app.controller.tool_controller import install_tool @@ -37,14 +38,18 @@ class TestToolController: return_value=mock_toolkit, ): result = await install_tool(tool_name) - assert result == ["create_page", "update_page"] + assert result["success"] is True + assert result["tools"] == ["create_page", "update_page"] + assert result["count"] == 2 + assert result["toolkit_name"] == "NotionMCPToolkit" mock_toolkit.connect.assert_called_once() mock_toolkit.disconnect.assert_called_once() @pytest.mark.asyncio async def test_install_unknown_tool(self): - result = await install_tool("unknown_tool") - assert result == {"error": "Tool not found"} + with pytest.raises(HTTPException) as exc_info: + await install_tool("unknown_tool") + assert exc_info.value.status_code == 404 @pytest.mark.asyncio async def test_install_notion_tool_connection_failure(self): @@ -54,8 +59,11 @@ class TestToolController: "app.controller.tool_controller.NotionMCPToolkit", return_value=mock_toolkit, ): - with pytest.raises(Exception, match="Connection failed"): - await install_tool("notion") + result = await install_tool("notion") + assert result["success"] is True + assert result["tools"] == [] + assert result["count"] == 0 + assert "warning" in result @pytest.mark.asyncio async def test_install_notion_tool_get_tools_failure(self): @@ -67,8 +75,11 @@ class TestToolController: "app.controller.tool_controller.NotionMCPToolkit", return_value=mock_toolkit, ): - with pytest.raises(Exception, match="Failed to get tools"): - await install_tool("notion") + result = await install_tool("notion") + assert result["success"] is True + assert result["tools"] == [] + assert result["count"] == 0 + assert "warning" in result @pytest.mark.asyncio async def test_install_notion_tool_disconnect_failure(self): @@ -81,8 +92,11 @@ class TestToolController: "app.controller.tool_controller.NotionMCPToolkit", return_value=mock_toolkit, ): - with pytest.raises(Exception, match="Disconnect failed"): - await install_tool("notion") + result = await install_tool("notion") + assert result["success"] is True + assert result["tools"] == [] + assert result["count"] == 0 + assert "warning" in result @pytest.mark.asyncio async def test_install_notion_tool_empty_tools(self): @@ -93,7 +107,9 @@ class TestToolController: return_value=mock_toolkit, ): result = await install_tool("notion") - assert result == [] + assert result["success"] is True + assert result["tools"] == [] + assert result["count"] == 0 mock_toolkit.connect.assert_called_once() mock_toolkit.disconnect.assert_called_once() @@ -117,7 +133,9 @@ class TestToolController: return_value=mock_toolkit, ): result = await install_tool("notion") - assert result == names + assert result["success"] is True + assert result["tools"] == names + assert result["count"] == 4 mock_toolkit.connect.assert_called_once() mock_toolkit.disconnect.assert_called_once() @@ -145,7 +163,10 @@ class TestToolControllerIntegration: response = client.post(f"/install/tool/{tool_name}") assert response.status_code == 200 - assert response.json() == ["create_page", "update_page"] + data = response.json() + assert data["success"] is True + assert data["tools"] == ["create_page", "update_page"] + assert data["count"] == 2 def test_install_unknown_tool_endpoint_integration( self, client: TestClient @@ -155,8 +176,7 @@ class TestToolControllerIntegration: response = client.post(f"/install/tool/{tool_name}") - assert response.status_code == 200 - assert response.json() == {"error": "Tool not found"} + assert response.status_code == 404 def test_install_notion_tool_endpoint_with_connection_error( self, client: TestClient @@ -171,9 +191,12 @@ class TestToolControllerIntegration: "app.controller.tool_controller.NotionMCPToolkit", return_value=mock_toolkit, ): - # The exception should be raised by the endpoint since there's no error handling - with pytest.raises(Exception, match="Connection failed"): - client.post(f"/install/tool/{tool_name}") + response = client.post(f"/install/tool/{tool_name}") + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["tools"] == [] + assert "warning" in data @pytest.mark.model_backend @@ -211,8 +234,11 @@ class TestToolControllerErrorCases: "app.controller.tool_controller.NotionMCPToolkit", return_value=mock_toolkit, ): - with pytest.raises(AttributeError): - await install_tool("notion") + # Inner except catches the AttributeError and returns success with empty tools + result = await install_tool("notion") + assert result["success"] is True + assert result["tools"] == [] + assert "warning" in result @pytest.mark.asyncio async def test_install_tool_with_none_toolkit(self): @@ -220,23 +246,29 @@ class TestToolControllerErrorCases: "app.controller.tool_controller.NotionMCPToolkit", return_value=None, ): - with pytest.raises(AttributeError): - await install_tool("notion") + # Inner except catches AttributeError on None.connect() + result = await install_tool("notion") + assert result["success"] is True + assert result["tools"] == [] + assert "warning" in result @pytest.mark.asyncio async def test_install_tool_with_special_characters_in_name(self): - result = await install_tool("notion@#$%") - assert result == {"error": "Tool not found"} + with pytest.raises(HTTPException) as exc_info: + await install_tool("notion@#$%") + assert exc_info.value.status_code == 404 @pytest.mark.asyncio async def test_install_tool_with_empty_string_name(self): - result = await install_tool("") - assert result == {"error": "Tool not found"} + with pytest.raises(HTTPException) as exc_info: + await install_tool("") + assert exc_info.value.status_code == 404 @pytest.mark.asyncio async def test_install_tool_with_none_name(self): - result = await install_tool(None) - assert result == {"error": "Tool not found"} + with pytest.raises(HTTPException) as exc_info: + await install_tool(None) + assert exc_info.value.status_code == 404 @pytest.mark.asyncio async def test_install_notion_tool_partial_failure(self): @@ -252,5 +284,8 @@ class TestToolControllerErrorCases: "app.controller.tool_controller.NotionMCPToolkit", return_value=mock_toolkit, ): - with pytest.raises(AttributeError): - await install_tool("notion") + # Inner except catches the AttributeError from tools[2].func + result = await install_tool("notion") + assert result["success"] is True + assert result["tools"] == [] + assert "warning" in result diff --git a/backend/tests/unit/model/test_agent_model_config.py b/backend/tests/app/model/test_chat.py similarity index 100% rename from backend/tests/unit/model/test_agent_model_config.py rename to backend/tests/app/model/test_chat.py diff --git a/backend/tests/unit/service/test_chat_service.py b/backend/tests/app/service/test_chat_service.py similarity index 91% rename from backend/tests/unit/service/test_chat_service.py rename to backend/tests/app/service/test_chat_service.py index ee96b258..8e28ab4c 100644 --- a/backend/tests/unit/service/test_chat_service.py +++ b/backend/tests/app/service/test_chat_service.py @@ -71,7 +71,6 @@ class TestCollectPreviousTaskContext: assert "Previous Task Result:" in result assert "Successfully created script.py" in result assert "=== END OF PREVIOUS TASK CONTEXT ===" in result - assert "=== NEW TASK ===" in result def test_collect_previous_task_context_with_generated_files( self, temp_dir @@ -208,7 +207,6 @@ class TestCollectPreviousTaskContext: # Should still have the structural elements assert "=== CONTEXT FROM PREVIOUS TASK ===" in result assert "=== END OF PREVIOUS TASK CONTEXT ===" in result - assert "=== NEW TASK ===" in result # Should not have content sections for empty inputs assert "Previous Task:" not in result @@ -289,7 +287,6 @@ class TestBuildContextForWorkforce: # Create mock TaskLock task_lock = MagicMock(spec=TaskLock) task_lock.conversation_history = [ - {"role": "user", "content": "Create a Python script"}, { "role": "assistant", "content": "I will create a Python script for you", @@ -304,14 +301,10 @@ class TestBuildContextForWorkforce: result = build_context_for_workforce(task_lock, options) - # Should include conversation history + # Should include conversation history header assert "=== CONVERSATION HISTORY ===" in result - assert "user: Create a Python script" in result - assert "assistant: I will create a Python script for you" in result - - # Should include previous task context - assert "=== CONTEXT FROM PREVIOUS TASK ===" in result - assert "Script created successfully" in result + # build_conversation_context only processes assistant and task_result roles + assert "I will create a Python script for you" in result def test_build_context_for_workforce_empty_history(self, temp_dir): """Test build_context_for_workforce with empty conversation history.""" @@ -329,15 +322,17 @@ class TestBuildContextForWorkforce: assert result == "" def test_build_context_for_workforce_task_result_role(self, temp_dir): - """Test build_context_for_workforce handles 'task_result' role specially.""" + """Test build_context_for_workforce handles 'task_result' role.""" task_lock = MagicMock(spec=TaskLock) task_lock.conversation_history = [ - {"role": "user", "content": "First question"}, { "role": "task_result", "content": "Full task context from previous task", }, - {"role": "user", "content": "Second question"}, + { + "role": "assistant", + "content": "Task completed successfully", + }, ] task_lock.last_task_result = "Final result" task_lock.last_task_summary = "Task summary" @@ -347,22 +342,18 @@ class TestBuildContextForWorkforce: result = build_context_for_workforce(task_lock, options) - # Should simplify task_result display - assert "[Previous Task Completed]" in result - assert ( - "Full task context from previous task" not in result - ) # Should not show full content - assert "user: First question" in result - assert "user: Second question" in result + # build_conversation_context appends string task_result content directly + assert "Full task context from previous task" in result + assert "Task completed successfully" in result def test_build_context_for_workforce_with_last_task_result(self, temp_dir): - """Test build_context_for_workforce includes last task result context.""" - # Create some files in temp directory - (temp_dir / "output.txt").write_text("Task output") - + """Test build_context_for_workforce with assistant entries.""" task_lock = MagicMock(spec=TaskLock) task_lock.conversation_history = [ - {"role": "user", "content": "Test question"} + { + "role": "assistant", + "content": "Task completed with output.txt", + }, ] task_lock.last_task_result = "Task completed with output.txt" task_lock.last_task_summary = "File creation task" @@ -372,13 +363,9 @@ class TestBuildContextForWorkforce: result = build_context_for_workforce(task_lock, options) - # Should include conversation history and task context + # Should include conversation history assert "=== CONVERSATION HISTORY ===" in result - assert "user: Test question" in result - assert "=== CONTEXT FROM PREVIOUS TASK ===" in result assert "Task completed with output.txt" in result - assert "File creation task" in result - assert "output.txt" in result # Generated file should be listed @pytest.mark.unit @@ -586,17 +573,14 @@ class TestChatServiceAgentOperations: @pytest.mark.asyncio async def test_question_confirm_simple_query(self, mock_camel_agent): - """Test question_confirm with simple query that gets direct response.""" - mock_camel_agent.step.return_value.msgs[ - 0 - ].content = "Hello! How can I help you today?" + """Test question_confirm with simple query returns False.""" + mock_camel_agent.step.return_value.msgs[0].content = "no" mock_camel_agent.chat_history = [] result = await question_confirm(mock_camel_agent, "hello") - # Should return SSE formatted response for simple queries - assert "wait_confirm" in result - assert "Hello! How can I help you today?" in result + # Should return False for simple queries (no "yes" in response) + assert result is False @pytest.mark.asyncio async def test_question_confirm_complex_task(self, mock_camel_agent): @@ -666,6 +650,10 @@ class TestChatServiceAgentOperations: with ( patch("app.service.chat_service.agent_model") as mock_agent_model, + patch( + "app.service.chat_service.get_working_directory", + return_value="/tmp/test_workdir", + ), patch( "app.service.chat_service.Workforce", return_value=mock_workforce, @@ -682,6 +670,10 @@ class TestChatServiceAgentOperations: "app.agent.toolkit.human_toolkit.get_task_lock", return_value=mock_task_lock, ), + patch( + "app.service.chat_service.WorkforceMetricsCallback", + return_value=MagicMock(), + ), ): mock_agent_model.return_value = MagicMock() @@ -738,23 +730,15 @@ class TestChatServiceIntegration: "def hello(): print('Hello World')" ) - # Mock file_save_path method to return our temp directory - with patch.object( - Chat, "file_save_path", return_value=str(working_dir) - ): - # Test the context building directly - context = build_context_for_workforce(task_lock, options) + # Test the context building directly + # build_context_for_workforce now only calls build_conversation_context + # which only processes assistant and task_result roles + context = build_context_for_workforce(task_lock, options) - # Verify context includes conversation history - assert "=== CONVERSATION HISTORY ===" in context - assert "user: Create a Python script" in context - assert "assistant: Script created successfully" in context - - # Verify context includes task context with files - assert "=== CONTEXT FROM PREVIOUS TASK ===" in context - assert "def hello(): print('Hello World')" in context - assert "Python Hello World Script" in context - assert "script.py" in context + # Verify context includes conversation history header + assert "=== CONVERSATION HISTORY ===" in context + # assistant entries are included + assert "Script created successfully" in context @pytest.mark.asyncio async def test_step_solve_new_task_state_context_collection( @@ -793,7 +777,6 @@ class TestChatServiceIntegration: assert "main.py" in result assert "config.json" in result assert "=== END OF PREVIOUS TASK CONTEXT ===" in result - assert "=== NEW TASK ===" in result @pytest.mark.asyncio async def test_step_solve_end_action_context_collection( @@ -1008,26 +991,23 @@ class TestChatServiceErrorCases: # Should log warning mock_logger.warning.assert_called_once() - def test_collect_previous_task_context_relpath_exception(self, temp_dir): - """Test collect_previous_task_context handles os.path.relpath exceptions.""" + def test_collect_previous_task_context_abspath_used(self, temp_dir): + """Test collect_previous_task_context uses absolute paths for files.""" working_directory = str(temp_dir) # Create a test file (temp_dir / "test.txt").write_text("test content") - with patch("os.path.relpath", side_effect=ValueError("Invalid path")): - with patch("app.service.chat_service.logger") as mock_logger: - result = collect_previous_task_context( - working_directory=working_directory, - previous_task_content="Test task", - previous_task_result="Test result", - previous_summary="Test summary", - ) + result = collect_previous_task_context( + working_directory=working_directory, + previous_task_content="Test task", + previous_task_result="Test result", + previous_summary="Test summary", + ) - # Should handle the exception gracefully - assert "=== CONTEXT FROM PREVIOUS TASK ===" in result - # Should log warning about file collection failure - mock_logger.warning.assert_called_once() + # Should include absolute path for the file + assert "=== CONTEXT FROM PREVIOUS TASK ===" in result + assert "test.txt" in result def test_build_context_for_workforce_missing_attributes(self, temp_dir): """Test build_context_for_workforce handles missing attributes gracefully.""" @@ -1045,20 +1025,18 @@ class TestChatServiceErrorCases: # Should handle missing attributes gracefully assert result == "" - def test_build_context_for_workforce_file_save_path_exception(self): - """Test build_context_for_workforce handles file_save_path exceptions.""" + def test_build_context_for_workforce_empty_conversation(self): + """Test build_context_for_workforce returns empty for empty conversation.""" task_lock = MagicMock(spec=TaskLock) task_lock.conversation_history = [] task_lock.last_task_result = "Test result" task_lock.last_task_summary = "Test summary" options = MagicMock() - options.file_save_path.side_effect = Exception("Path error") - with patch("app.service.chat_service.logger") as mock_logger: - # Should handle exception when getting file path - with pytest.raises(Exception, match="Path error"): - build_context_for_workforce(task_lock, options) + # Should return empty string for empty conversation history + result = build_context_for_workforce(task_lock, options) + assert result == "" def test_collect_previous_task_context_unicode_handling(self, temp_dir): """Test collect_previous_task_context handles unicode content correctly.""" @@ -1216,6 +1194,22 @@ class TestChatServiceErrorCases: "app.service.chat_service.agent_model", side_effect=Exception("Agent creation failed"), ), + patch( + "app.agent.factory.developer.agent_model", + side_effect=Exception("Agent creation failed"), + ), + patch( + "app.agent.factory.browser.agent_model", + side_effect=Exception("Agent creation failed"), + ), + patch( + "app.agent.factory.document.agent_model", + side_effect=Exception("Agent creation failed"), + ), + patch( + "app.agent.factory.multi_modal.agent_model", + side_effect=Exception("Agent creation failed"), + ), ): with pytest.raises(Exception, match="Agent creation failed"): await construct_workforce(options) diff --git a/backend/tests/unit/service/test_task.py b/backend/tests/app/service/test_task.py similarity index 97% rename from backend/tests/unit/service/test_task.py rename to backend/tests/app/service/test_task.py index e27dcbb5..4ca49b8e 100644 --- a/backend/tests/unit/service/test_task.py +++ b/backend/tests/app/service/test_task.py @@ -519,32 +519,29 @@ class TestPeriodicCleanup: @pytest.mark.asyncio async def test_periodic_cleanup_handles_exceptions(self): """Test that periodic cleanup handles exceptions gracefully.""" + import app.service.task as task_module + # Create a stale task lock task_lock = create_task_lock("test_task") task_lock.last_accessed = datetime.now() - timedelta(hours=3) - # Mock delete_task_lock to raise exception + # Mock delete_task_lock to raise exception and call through module with ( - patch( - "app.service.task.delete_task_lock", + patch.object( + task_module, + "delete_task_lock", side_effect=Exception("Test error"), ), - patch( - "app.service.task.logger.error", - ) as mock_logger, + patch.object(task_module, "logger") as mock_logger, ): - # Directly call the cleanup logic - # that should trigger the exception + # Simulate what _periodic_cleanup does when encountering an error try: - await delete_task_lock("test_task") + await task_module.delete_task_lock("test_task") except Exception as e: - import logging - - task_logger = logging.getLogger("task_service") - task_logger.error(f"Error during task cleanup: {e}") + task_module.logger.error(f"Error in periodic cleanup: {e}") # Should have logged the error - mock_logger.assert_called() + mock_logger.error.assert_called() @pytest.mark.integration diff --git a/backend/tests/unit/utils/telemetry/test_workforce_metrics.py b/backend/tests/app/utils/telemetry/test_workforce_metrics.py similarity index 100% rename from backend/tests/unit/utils/telemetry/test_workforce_metrics.py rename to backend/tests/app/utils/telemetry/test_workforce_metrics.py diff --git a/backend/tests/unit/utils/test_single_agent_worker.py b/backend/tests/app/utils/test_single_agent_worker.py similarity index 97% rename from backend/tests/unit/utils/test_single_agent_worker.py rename to backend/tests/app/utils/test_single_agent_worker.py index 1c64ab6b..129e9b95 100644 --- a/backend/tests/unit/utils/test_single_agent_worker.py +++ b/backend/tests/app/utils/test_single_agent_worker.py @@ -33,6 +33,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "test_worker" mock_worker.agent_id = "worker_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test worker description", @@ -57,6 +58,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "test_worker" mock_worker.agent_id = "worker_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test worker", @@ -123,6 +125,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "test_worker" mock_worker.agent_id = "worker_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test worker", @@ -178,6 +181,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "test_worker" mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test worker", @@ -247,6 +251,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "test_worker" mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test worker", worker=mock_worker @@ -280,6 +285,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "test_worker" mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test worker", @@ -333,6 +339,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "test_worker" mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test worker", @@ -382,6 +389,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "test_worker" mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test worker", @@ -431,6 +439,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "test_worker" mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test worker", @@ -476,6 +485,7 @@ class TestSingleAgentWorker: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker(description="Test", worker=mock_worker) assert isinstance(worker, BaseSingleAgentWorker) @@ -491,6 +501,7 @@ class TestSingleAgentWorkerIntegration: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.role_name = "integration_worker" mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "integration_worker" worker = SingleAgentWorker( description="Integration test worker", @@ -568,6 +579,7 @@ class TestSingleAgentWorkerErrorCases: """Test _process_task when agent returns None response.""" mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test", worker=mock_worker, @@ -600,6 +612,7 @@ class TestSingleAgentWorkerErrorCases: """Test _process_task with malformed response structure.""" mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.agent_id = "test_agent_123" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test", worker=mock_worker, @@ -637,6 +650,7 @@ class TestSingleAgentWorkerErrorCases: mock_worker = MagicMock(spec=ListenChatAgent) mock_worker.agent_id = "test_agent_123" mock_worker.role_name = "test_worker" + mock_worker.agent_name = "test_worker" worker = SingleAgentWorker( description="Test", worker=mock_worker, diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index dece55a0..cc27f27e 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -344,6 +344,20 @@ async def async_mock_agent() -> AsyncGenerator[AsyncMock, None]: yield agent +# Safety net: clean up any MagicMock-named directories that tests may +# accidentally create when mock objects are used as file paths. +@pytest.fixture(autouse=True, scope="session") +def _cleanup_magicmock_dirs(): + """Remove MagicMock-named directories from backend/ after test session.""" + yield + import shutil + + backend_dir = Path(__file__).parent.parent + for entry in backend_dir.iterdir(): + if "MagicMock" in entry.name: + shutil.rmtree(entry, ignore_errors=True) + + # Markers for test categorization pytest_plugins = ["pytest_asyncio"] diff --git a/backend/uv.lock b/backend/uv.lock index b9c6505a..6f2a80c3 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -242,7 +242,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "aiofiles", specifier = ">=24.1.0" }, - { name = "camel-ai", extras = ["eigent"], specifier = "==0.2.85a0" }, + { name = "camel-ai", extras = ["eigent"], specifier = "==0.2.90a1" }, { name = "debugpy", specifier = ">=1.8.17" }, { name = "fastapi", specifier = ">=0.115.12" }, { name = "fastapi-babel", specifier = ">=1.0.0" }, @@ -285,7 +285,7 @@ wheels = [ [[package]] name = "camel-ai" -version = "0.2.85a0" +version = "0.2.90a1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "astor" }, @@ -299,12 +299,13 @@ dependencies = [ { name = "pillow" }, { name = "psutil" }, { name = "pydantic" }, + { name = "pyyaml" }, { name = "tiktoken" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/54/ab/7d305f80e868a60c7097ab510063a171e1798d163b5f8fd7fe7c16553e13/camel_ai-0.2.85a0.tar.gz", hash = "sha256:432de9bac1e40bd4ebf434ca80eaf3993121f87924820e26ad2bad69c1fb5cf5", size = 1126159, upload-time = "2026-01-23T02:24:08.868Z" } +sdist = { url = "https://files.pythonhosted.org/packages/85/cc/78345177dfffd532f21889bb4794f197e21ca79451a27243f0240db04840/camel_ai-0.2.90a1.tar.gz", hash = "sha256:0a84a7991a8679a83dcf1c6124d0a5ae953282526cf5a04a07bec8b7338436eb", size = 1156184, upload-time = "2026-02-12T22:32:31.727Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ea/0c/35d73b5d648413844bdfeaf95172a6b7c19802150829f5f907753a773d19/camel_ai-0.2.85a0-py3-none-any.whl", hash = "sha256:6045e9af72fee918ca3acc92f3b4af8af084af7b0cf6435c01a1252bd04ae6b3", size = 1599866, upload-time = "2026-01-23T02:24:06.78Z" }, + { url = "https://files.pythonhosted.org/packages/05/2c/926157452c27d1f93640a2293a7a0193212cdb4d1d34f62b98c4392491ce/camel_ai-0.2.90a1-py3-none-any.whl", hash = "sha256:2764de542c165d57b35836999500aeb2ba148077d494a168009fb7a4ddc64ca3", size = 1632784, upload-time = "2026-02-12T22:32:29.704Z" }, ] [package.optional-dependencies] diff --git a/config/before-sign.cjs b/config/before-sign.cjs index 347cd35a..7ac8475e 100644 --- a/config/before-sign.cjs +++ b/config/before-sign.cjs @@ -18,7 +18,9 @@ exports.default = async function afterPack(context) { return; } - console.log('🧹 Cleaning invalid symlinks and cache directories before signing...'); + console.log( + '🧹 Cleaning invalid symlinks and cache directories before signing...' + ); const resourcesPath = path.join(appPath, 'Contents', 'Resources'); const prebuiltPath = path.join(resourcesPath, 'prebuilt'); @@ -64,13 +66,23 @@ exports.default = async function afterPack(context) { const entries = fs.readdirSync(venvLibPath, { withFileTypes: true }); for (const entry of entries) { if (entry.isDirectory() && entry.name.startsWith('python')) { - const flacMacPath = path.join(venvLibPath, entry.name, 'site-packages', 'speech_recognition', 'flac-mac'); + const flacMacPath = path.join( + venvLibPath, + entry.name, + 'site-packages', + 'speech_recognition', + 'flac-mac' + ); if (fs.existsSync(flacMacPath)) { - console.log(`Removing flac-mac binary (outdated SDK): ${flacMacPath}`); + console.log( + `Removing flac-mac binary (outdated SDK): ${flacMacPath}` + ); try { fs.unlinkSync(flacMacPath); } catch (error) { - console.warn(`Warning: Could not remove flac-mac: ${error.message}`); + console.warn( + `Warning: Could not remove flac-mac: ${error.message}` + ); } } } @@ -83,7 +95,13 @@ exports.default = async function afterPack(context) { // Clean Python symlinks in venv/bin const venvBinDir = path.join(prebuiltPath, 'venv', 'bin'); if (fs.existsSync(venvBinDir)) { - const pythonNames = ['python', 'python3', 'python3.10', 'python3.11', 'python3.12']; + const pythonNames = [ + 'python', + 'python3', + 'python3.10', + 'python3.11', + 'python3.12', + ]; const bundlePath = path.resolve(appPath); for (const pythonName of pythonNames) { @@ -94,7 +112,10 @@ exports.default = async function afterPack(context) { const stats = fs.lstatSync(pythonSymlink); if (stats.isSymbolicLink()) { const target = fs.readlinkSync(pythonSymlink); - const resolvedPath = path.resolve(path.dirname(pythonSymlink), target); + const resolvedPath = path.resolve( + path.dirname(pythonSymlink), + target + ); // If symlink points outside bundle, remove it if (!resolvedPath.startsWith(bundlePath)) { @@ -103,7 +124,9 @@ exports.default = async function afterPack(context) { } } } catch (error) { - console.warn(`Warning: Could not process ${pythonName} symlink: ${error.message}`); + console.warn( + `Warning: Could not process ${pythonName} symlink: ${error.message}` + ); } } } @@ -112,7 +135,13 @@ exports.default = async function afterPack(context) { // Clean Python symlinks in terminal_venv/bin (same as venv/bin) const terminalVenvBinDir = path.join(prebuiltPath, 'terminal_venv', 'bin'); if (fs.existsSync(terminalVenvBinDir)) { - const pythonNames = ['python', 'python3', 'python3.10', 'python3.11', 'python3.12']; + const pythonNames = [ + 'python', + 'python3', + 'python3.10', + 'python3.11', + 'python3.12', + ]; const bundlePath = path.resolve(appPath); for (const pythonName of pythonNames) { @@ -123,16 +152,23 @@ exports.default = async function afterPack(context) { const stats = fs.lstatSync(pythonSymlink); if (stats.isSymbolicLink()) { const target = fs.readlinkSync(pythonSymlink); - const resolvedPath = path.resolve(path.dirname(pythonSymlink), target); + const resolvedPath = path.resolve( + path.dirname(pythonSymlink), + target + ); // If symlink points outside bundle, remove it if (!resolvedPath.startsWith(bundlePath)) { - console.log(`Removing invalid terminal_venv ${pythonName} symlink: ${target}`); + console.log( + `Removing invalid terminal_venv ${pythonName} symlink: ${target}` + ); fs.unlinkSync(pythonSymlink); } } } catch (error) { - console.warn(`Warning: Could not process terminal_venv ${pythonName} symlink: ${error.message}`); + console.warn( + `Warning: Could not process terminal_venv ${pythonName} symlink: ${error.message}` + ); } } } @@ -156,7 +192,10 @@ exports.default = async function afterPack(context) { const resolvedPath = path.resolve(path.dirname(fullPath), target); const bundlePath = path.resolve(bundleRoot); - if (!fs.existsSync(resolvedPath) || !resolvedPath.startsWith(bundlePath)) { + if ( + !fs.existsSync(resolvedPath) || + !resolvedPath.startsWith(bundlePath) + ) { console.log(`Removing invalid symlink: ${fullPath} -> ${target}`); fs.unlinkSync(fullPath); } diff --git a/electron-builder.json b/electron-builder.json index a51e9e08..c01c6b5f 100644 --- a/electron-builder.json +++ b/electron-builder.json @@ -22,10 +22,6 @@ "to": "backend", "filter": ["**/*", "!.venv/**/*", "!workspace/.initial_env/**/*"] }, - { - "from": "utils", - "to": "utils" - }, { "from": "resources/prebuilt", "to": "prebuilt", @@ -36,7 +32,18 @@ "!uv_python/**/*.pyc", "!uv_python/**/__pycache__", "!terminal_venv/**/*.pyc", - "!terminal_venv/**/__pycache__" + "!terminal_venv/**/__pycache__", + "!**/__pycache__/**", + "!**/*.pyc", + "!**/*.pyo", + "!venv/**/__pycache__/**", + "!venv/**/*.pyc", + + "!venv/lib/python*/site-packages/yt_dlp/**", + "!venv/lib/python*/site-packages/yt_dlp-*.dist-info/**", + "!uv_python/**/site-packages/pip/**", + "!uv_python/**/site-packages/setuptools/**", + "!uv_python/**/site-packages/wheel/**" ] } ], diff --git a/electron/main/index.ts b/electron/main/index.ts index 3cf4facb..b2001bbf 100644 --- a/electron/main/index.ts +++ b/electron/main/index.ts @@ -58,7 +58,11 @@ import { } from './utils/envUtil'; import { zipFolder } from './utils/log'; import { addMcp, readMcpConfig, removeMcp, updateMcp } from './utils/mcpConfig'; -import { getBackendPath, getVenvPath, isBinaryExists } from './utils/process'; +import { + checkVenvExistsForPreCheck, + getBackendPath, + isBinaryExists, +} from './utils/process'; import { WebViewManager } from './webview'; const userData = app.getPath('userData'); @@ -1644,11 +1648,8 @@ async function createWindow() { let hasPrebuiltDeps = false; if (app.isPackaged) { const prebuiltBinDir = path.join(process.resourcesPath, 'prebuilt', 'bin'); - const prebuiltVenvDir = path.join( - process.resourcesPath, - 'prebuilt', - 'venv' - ); + const prebuiltDir = path.join(process.resourcesPath, 'prebuilt'); + const prebuiltVenvDir = path.join(prebuiltDir, 'venv'); const uvPath = path.join( prebuiltBinDir, process.platform === 'win32' ? 'uv.exe' : 'uv' @@ -1659,10 +1660,9 @@ async function createWindow() { ); const pyvenvCfg = path.join(prebuiltVenvDir, 'pyvenv.cfg'); + const hasVenv = fs.existsSync(pyvenvCfg); hasPrebuiltDeps = - fs.existsSync(uvPath) && - fs.existsSync(bunPath) && - fs.existsSync(pyvenvCfg); + fs.existsSync(uvPath) && fs.existsSync(bunPath) && hasVenv; if (hasPrebuiltDeps) { log.info( '[PRE-CHECK] Prebuilt dependencies found, skipping installation check' @@ -1687,9 +1687,9 @@ async function createWindow() { const installedLockPath = path.join(backendPath, 'uv_installed.lock'); const installationCompleted = fs.existsSync(installedLockPath); - // Check if venv path exists for current version - const venvPath = getVenvPath(currentVersion); - const venvExists = fs.existsSync(venvPath); + // Check venv existence WITHOUT triggering extraction (defers to startBackend when window is visible) + const { exists: venvExists, path: venvPath } = + checkVenvExistsForPreCheck(currentVersion); // If prebuilt deps are available, skip installation const needsInstallation = hasPrebuiltDeps diff --git a/electron/main/init.ts b/electron/main/init.ts index c7abca39..57fb26c8 100644 --- a/electron/main/init.ts +++ b/electron/main/init.ts @@ -24,11 +24,13 @@ import { promisify } from 'util'; import { PromiseReturnType } from './install-deps'; import { maskProxyUrl, readGlobalEnvKey } from './utils/envUtil'; import { + ensureTerminalVenvAtUserPath, + findNodejsWheelBinPath, + findNodejsWheelNpmPath, getBackendPath, getBinaryPath, getCachePath, getPrebuiltPythonDir, - getPrebuiltVenvPath, getUvEnv, getVenvPath, getVenvPythonPath, @@ -305,6 +307,23 @@ export async function startBackend( `Backend SERVER_URL resolved to: ${serverUrl} (source: ${resolvedSource})` ); + // Ensure prebuilt terminal venv is copied to ~/.eigent/venvs for terminal toolkit + ensureTerminalVenvAtUserPath(currentVersion); + + // Add nodejs-wheel paths for browser toolkit (needs npm, npx, and node) + const npmWrapperDir = findNodejsWheelNpmPath(venvPath); + const nodejsWheelBin = findNodejsWheelBinPath(venvPath); + const pathEnv = process.env.PATH || ''; + const pathParts: string[] = []; + if (npmWrapperDir) pathParts.push(npmWrapperDir); + if (nodejsWheelBin && nodejsWheelBin !== npmWrapperDir) { + pathParts.push(nodejsWheelBin); + } + const updatedPath = + pathParts.length > 0 + ? pathParts.join(path.delimiter) + path.delimiter + pathEnv + : pathEnv; + const env = { ...process.env, ...uvEnv, @@ -313,6 +332,7 @@ export async function startBackend( PYTHONIOENCODING: 'utf-8', PYTHONUNBUFFERED: '1', npm_config_cache: npmCacheDir, + PATH: updatedPath, }; const displayFilteredLogs = (data: String) => { @@ -397,18 +417,9 @@ export async function startBackend( } // Cleanup corrupted venv (pyvenv.cfg may reference non-existent Python version) - // This is especially important for prebuilt venvs with hardcoded paths from CI - const prebuiltVenvPath = getPrebuiltVenvPath(); try { - // If the broken venv is the prebuilt venv, we need to remove it - // and let UV recreate it from the bundled Python if (fs.existsSync(venvPath)) { log.info(`Removing potentially corrupted venv: ${venvPath}`); - if (venvPath === prebuiltVenvPath) { - log.info( - `This is the prebuilt venv with hardcoded paths - will recreate from bundled Python` - ); - } fs.rmSync(venvPath, { recursive: true, force: true }); } } catch (e) { diff --git a/electron/main/install-deps.ts b/electron/main/install-deps.ts index 62ad6a4e..e564bee5 100644 --- a/electron/main/install-deps.ts +++ b/electron/main/install-deps.ts @@ -61,11 +61,8 @@ export const checkAndInstallDepsOnUpdate = async ({ return false; } const prebuiltBinDir = path.join(process.resourcesPath, 'prebuilt', 'bin'); - const prebuiltVenvDir = path.join( - process.resourcesPath, - 'prebuilt', - 'venv' - ); + const prebuiltDir = path.join(process.resourcesPath, 'prebuilt'); + const prebuiltVenvDir = path.join(prebuiltDir, 'venv'); const uvPath = path.join( prebuiltBinDir, process.platform === 'win32' ? 'uv.exe' : 'uv' diff --git a/electron/main/utils/process.ts b/electron/main/utils/process.ts index 65bf24d5..c827768d 100644 --- a/electron/main/utils/process.ts +++ b/electron/main/utils/process.ts @@ -12,7 +12,7 @@ // limitations under the License. // ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= -import { spawn } from 'child_process'; +import { execSync, spawn } from 'child_process'; import { app } from 'electron'; import log from 'electron-log'; import fs from 'fs'; @@ -212,32 +212,61 @@ function fixPyvenvCfgPlaceholder(pyvenvCfgPath: string): boolean { } /** - * Fix shebang lines in venv scripts by replacing placeholder with actual Python path - * This ensures scripts can be executed directly (not just via `uv run`) - * Note: Windows doesn't use shebangs - it uses .exe wrappers instead + * Get the actual Python interpreter path from venv's pyvenv.cfg (home points to Python's bin dir). + * Used to fix shebangs when venv is in userData but Python is in app bundle. + */ +function getActualPythonPathFromPyvenvCfg(venvPath: string): string | null { + const pyvenvCfgPath = path.join(venvPath, 'pyvenv.cfg'); + if (!fs.existsSync(pyvenvCfgPath)) return null; + + const content = fs.readFileSync(pyvenvCfgPath, 'utf-8'); + const homeMatch = content.match(/^home\s*=\s*(.+)$/m); + if (!homeMatch) return null; + + const home = homeMatch[1].trim(); + if (!path.isAbsolute(home) || !fs.existsSync(home)) return null; + + // home is Python's bin dir; find python3.X or python3 + try { + const entries = fs.readdirSync(home); + const py = entries.find( + (e) => e === 'python3' || (e.startsWith('python3.') && !e.endsWith('.py')) + ); + if (py) { + const fullPath = path.join(home, py); + if (fs.existsSync(fullPath)) return fullPath; + } + } catch { + // ignore + } + return null; +} + +/** + * Fix shebang lines in venv scripts by replacing placeholder or broken relative path with actual Python path. + * The venv/bin/python script was previously skipped but must be fixed when venv is extracted to userData + * (relative paths like ../../uv_python/... break because Python lives in the app bundle). */ function fixVenvScriptShebangs(venvPath: string): boolean { const isWindows = process.platform === 'win32'; - // Windows doesn't use shebangs - skip this step if (isWindows) { log.info(`[VENV] Skipping shebang fixes on Windows (not needed)`); return true; } const binDir = path.join(venvPath, 'bin'); - - if (!fs.existsSync(binDir)) { - return false; - } + if (!fs.existsSync(binDir)) return false; const pythonExe = path.join(binDir, 'python'); - if (!fs.existsSync(pythonExe)) { log.warn(`[VENV] Python executable not found: ${pythonExe}`); return false; } + const actualPythonPath = + getActualPythonPathFromPyvenvCfg(venvPath) ?? findPythonForTerminalVenv(); + try { const entries = fs.readdirSync(binDir); let fixedCount = 0; @@ -247,60 +276,59 @@ function fixVenvScriptShebangs(venvPath: string): boolean { try { const stat = fs.lstatSync(filePath); - if (stat.isDirectory() || stat.isSymbolicLink()) { - continue; - } - // Skip .exe files (binary), .dll, .pyd (compiled Python modules) + if (stat.isDirectory() || stat.isSymbolicLink()) continue; if ( entry.endsWith('.exe') || entry.endsWith('.dll') || - entry.endsWith('.pyd') || - entry.startsWith('python') || - entry.startsWith('activate') + entry.endsWith('.pyd') ) { continue; } + // Include python/activate scripts - they were previously skipped but need shebang fix + // when venv is in userData with relative paths } catch { continue; } try { const content = fs.readFileSync(filePath, 'utf-8'); + const firstLine = content.split('\n')[0]; + if (!firstLine?.startsWith('#!')) continue; - // Check if file contains any placeholders - const hasVenvPythonPlaceholder = content.includes( - '{{PREBUILT_VENV_PYTHON}}' - ); - const hasPythonDirPlaceholder = content.includes( - '{{PREBUILT_PYTHON_DIR}}' - ); + const shebangPath = firstLine.slice(2).trim(); + let newContent = content; - if (hasVenvPythonPlaceholder || hasPythonDirPlaceholder) { - let newContent = content; - if (hasVenvPythonPlaceholder) { + // Replace placeholders + if (content.includes('{{PREBUILT_VENV_PYTHON}}')) { + newContent = newContent.replace( + /\{\{PREBUILT_VENV_PYTHON\}\}/g, + actualPythonPath ?? pythonExe + ); + } + if (content.includes('{{PREBUILT_PYTHON_DIR}}')) { + const prebuiltPythonDir = getPrebuiltPythonDir(); + if (prebuiltPythonDir) { newContent = newContent.replace( - /\{\{PREBUILT_VENV_PYTHON\}\}/g, - pythonExe + /\{\{PREBUILT_PYTHON_DIR\}\}/g, + prebuiltPythonDir ); } - if (hasPythonDirPlaceholder) { - const prebuiltPythonDir = getPrebuiltPythonDir(); - if (prebuiltPythonDir) { - newContent = newContent.replace( - /\{\{PREBUILT_PYTHON_DIR\}\}/g, - prebuiltPythonDir - ); - } - } + } - if (newContent !== content) { - fs.writeFileSync(filePath, newContent, 'utf-8'); - if (process.platform !== 'win32') { - fs.chmodSync(filePath, 0o755); - } - fixedCount++; + if (actualPythonPath && shebangPath && !shebangPath.startsWith('{{')) { + const resolved = path.resolve(path.dirname(filePath), shebangPath); + if (!fs.existsSync(resolved)) { + newContent = newContent.replace(/^#!.*$/m, `#!${actualPythonPath}`); } } + + if (newContent !== content) { + fs.writeFileSync(filePath, newContent, 'utf-8'); + if (process.platform !== 'win32') { + fs.chmodSync(filePath, 0o755); + } + fixedCount++; + } } catch { // Silently skip files that can't be processed } @@ -316,30 +344,110 @@ function fixVenvScriptShebangs(venvPath: string): boolean { } } +const PREBUILT_FIXED_MARKER = '.prebuilt_fixed'; + +/** + * Ensure venv/bin/python exists - create symlink if missing or broken. + */ +function ensureVenvPythonSymlink(venvPath: string): boolean { + if (process.platform === 'win32') return true; + + const binDir = path.join(venvPath, 'bin'); + const pythonPath = path.join(binDir, 'python'); + if (!fs.existsSync(binDir)) return false; + + try { + fs.accessSync(pythonPath, fs.constants.X_OK); + return true; + } catch { + // python missing or broken symlink - create/fix below + log.info( + `[VENV] python not found or broken at ${pythonPath}, creating symlink...` + ); + } + + const actualPython = getActualPythonPathFromPyvenvCfg(venvPath); + + // Find python3.X in venv/bin as fallback (e.g. python3.10) + const entries = fs.readdirSync(binDir, { withFileTypes: true }); + const py3 = entries.find( + (e) => + !e.isDirectory() && + (e.name === 'python3' || + (e.name.startsWith('python3.') && !e.name.endsWith('.py'))) + ); + const targetInBin = py3 ? path.join(binDir, py3.name) : null; + + try { + // Remove existing file/symlink (existsSync is false for broken symlinks, so use lstat) + try { + fs.lstatSync(pythonPath); + fs.unlinkSync(pythonPath); + } catch { + // ENOENT = path doesn't exist, that's fine + } + + // Prefer actual Python from pyvenv.cfg (absolute path to app bundle); + // fallback to python3.X in same dir (relative symlink) + let target: string | null = null; + if (actualPython && fs.existsSync(actualPython)) { + target = actualPython; + } else if (targetInBin && fs.existsSync(targetInBin)) { + // Use relative name for symlink within same directory + target = py3!.name; + } + + if (!target) { + log.warn(`[VENV] No valid Python target found for symlink`); + return false; + } + + fs.symlinkSync(target, pythonPath); + try { + fs.chmodSync(pythonPath, 0o755); + } catch {} + log.info(`[VENV] Created python symlink -> ${target}`); + return true; + } catch (error) { + log.warn(`[VENV] Failed to create python symlink: ${error}`); + return false; + } +} + /** * Get path to prebuilt venv (if available in packaged app) + * All platforms use prebuilt/venv directory. */ export function getPrebuiltVenvPath(): string | null { if (!app.isPackaged) { return null; } - const prebuiltVenvPath = path.join(process.resourcesPath, 'prebuilt', 'venv'); + const prebuiltDir = path.join(process.resourcesPath, 'prebuilt'); + const prebuiltVenvPath = path.join(prebuiltDir, 'venv'); const pyvenvCfgPath = path.join(prebuiltVenvPath, 'pyvenv.cfg'); - - log.info(`[VENV] Checking prebuilt venv at: ${prebuiltVenvPath}`); + const fixedMarkerPath = path.join(prebuiltDir, PREBUILT_FIXED_MARKER); + const currentVersion = app.getVersion(); if (fs.existsSync(prebuiltVenvPath) && fs.existsSync(pyvenvCfgPath)) { - fixPyvenvCfgPlaceholder(pyvenvCfgPath); - fixVenvScriptShebangs(prebuiltVenvPath); + const needsFix = + !fs.existsSync(fixedMarkerPath) || + fs.readFileSync(fixedMarkerPath, 'utf-8').trim() !== currentVersion; + + if (needsFix) { + fixPyvenvCfgPlaceholder(pyvenvCfgPath); + ensureVenvPythonSymlink(prebuiltVenvPath); + fixVenvScriptShebangs(prebuiltVenvPath); + fs.writeFileSync(fixedMarkerPath, currentVersion, 'utf-8'); + } const pythonExePath = getVenvPythonPath(prebuiltVenvPath); if (fs.existsSync(pythonExePath)) { - log.info(`[VENV] Using prebuilt venv: ${prebuiltVenvPath}`); return prebuiltVenvPath; } log.warn(`[VENV] Prebuilt venv Python missing at: ${pythonExePath}`); } + return null; } @@ -395,6 +503,236 @@ function findPythonForTerminalVenv(): string | null { return null; } +const TERMINAL_VENV_VERSION_FILE = '.terminal_venv_version'; +const BACKEND_VENV_VERSION_FILE = '.backend_venv_version'; + +/** + * Copy prebuilt backend venv to ~/.eigent/venvs/backend-{version} for unified management. + * The copied venv is the one actually used by the backend (via getVenvPath()). + * The source venv (prebuilt/extracted) is kept as-is for re-copying on version changes. + * + * @param version App version (used for version-specific venv directory) + */ +export function ensureBackendVenvAtUserPath(version: string): void { + if (!app.isPackaged) return; + + const prebuiltDir = path.join(process.resourcesPath, 'prebuilt'); + const prebuiltVenvPath = path.join(prebuiltDir, 'venv'); + const prebuiltUvPython = path.join(prebuiltDir, 'uv_python'); + + if ( + !fs.existsSync(prebuiltVenvPath) || + !fs.existsSync(path.join(prebuiltVenvPath, 'pyvenv.cfg')) + ) { + return; + } + + const sourceVenvPath = prebuiltVenvPath; + + const userVenvsDir = path.join(os.homedir(), '.eigent', 'venvs'); + const userBackendVenv = path.join(userVenvsDir, `backend-${version}`); + const pyvenvCfgPath = path.join(userBackendVenv, 'pyvenv.cfg'); + const versionFile = path.join(userVenvsDir, BACKEND_VENV_VERSION_FILE); + + // Ensure uv_python symlink exists (needed even if venv already copied) + const userUvPython = path.join(os.homedir(), '.eigent', 'uv_python'); + if (!fs.existsSync(userUvPython) && fs.existsSync(prebuiltUvPython)) { + try { + fs.mkdirSync(path.dirname(userUvPython), { recursive: true }); + fs.symlinkSync(prebuiltUvPython, userUvPython); + log.info(`[VENV] Created uv_python symlink: ${userUvPython}`); + } catch (e) { + log.warn(`[VENV] Failed to create uv_python symlink: ${e}`); + } + } + + if (fs.existsSync(pyvenvCfgPath)) { + const storedVersion = fs.existsSync(versionFile) + ? fs.readFileSync(versionFile, 'utf-8').trim() + : null; + if (storedVersion === version) { + log.info( + `[VENV] Backend venv already at ${userBackendVenv} (v${version})` + ); + return; + } + } + + log.info(`[VENV] Copying prebuilt backend venv to ${userBackendVenv}...`); + + try { + fs.mkdirSync(userVenvsDir, { recursive: true }); + + if (fs.existsSync(userBackendVenv)) { + fs.rmSync(userBackendVenv, { recursive: true, force: true }); + } + + fs.cpSync(sourceVenvPath, userBackendVenv, { + recursive: true, + verbatimSymlinks: true, + }); + + // Fix paths after copying (source venv paths don't match user venv location) + // - pyvenv.cfg: update home path to point to correct Python location + // - shebangs: update #! paths in bin/* scripts to point to correct Python + // - python symlink: ensure bin/python exists and points to correct Python + fixPyvenvCfgPlaceholder(pyvenvCfgPath); + fixVenvScriptShebangs(userBackendVenv); + ensureVenvPythonSymlink(userBackendVenv); + + if (process.platform === 'darwin') { + try { + execSync(`xattr -cr "${userBackendVenv}"`, { stdio: 'ignore' }); + } catch { + // ignore + } + } + + fs.writeFileSync(versionFile, version, 'utf-8'); + log.info(`[VENV] Backend venv copied successfully`); + + // Sync optional deps from backend/uv.lock into user venv (e.g. yt_dlp if excluded from app bundle). + // Runs in background so app startup is not blocked; uses China mirror when timezone is Asia/Shanghai. + const uvPath = getPrebuiltBinaryPath('uv'); + const backendPath = getBackendPath(); + const uvLockPath = path.join(backendPath, 'uv.lock'); + if ( + uvPath && + fs.existsSync(uvLockPath) && + fs.existsSync(path.join(backendPath, 'pyproject.toml')) + ) { + const prebuiltPython = getPrebuiltPythonDir(); + const uvEnv = { + ...process.env, + UV_PROJECT_ENVIRONMENT: userBackendVenv, + UV_PYTHON_INSTALL_DIR: prebuiltPython || getCachePath('uv_python'), + UV_TOOL_DIR: getCachePath('uv_tool'), + UV_HTTP_TIMEOUT: '300', + } as NodeJS.ProcessEnv; + const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone; + const syncArgs = + timezone === 'Asia/Shanghai' + ? [ + 'sync', + '--no-dev', + '--default-index', + 'https://mirrors.aliyun.com/pypi/simple/', + '--index', + 'https://pypi.org/simple/', + ] + : ['sync', '--no-dev']; + log.info( + '[VENV] Starting background uv sync to install optional deps (e.g. yt_dlp); app will not wait.' + ); + const child = spawn(uvPath, syncArgs, { + cwd: backendPath, + env: uvEnv, + stdio: 'ignore', + detached: true, + }); + child.unref(); + child.on('error', (err) => { + log.warn(`[VENV] Background uv sync error: ${err.message}`); + }); + child.on('exit', (code) => { + if (code === 0) { + log.info('[VENV] Background uv sync completed'); + } else { + log.warn( + `[VENV] Background uv sync exited with code ${code} (optional deps may be missing)` + ); + } + }); + } + } catch (error) { + log.error(`[VENV] Failed to copy backend venv: ${error}`); + } +} + +/** + * Copy prebuilt terminal venv to ~/.eigent/venvs/terminal_base-{version}. + * @param version App version (used for version-specific venv directory) + */ +export function ensureTerminalVenvAtUserPath(version: string): void { + if (!app.isPackaged) return; + + const prebuiltDir = path.join(process.resourcesPath, 'prebuilt'); + const prebuiltTerminalVenv = path.join(prebuiltDir, 'terminal_venv'); + const prebuiltUvPython = path.join(prebuiltDir, 'uv_python'); + + if (!fs.existsSync(prebuiltTerminalVenv)) return; + const installedMarker = path.join( + prebuiltTerminalVenv, + '.packages_installed' + ); + if (!fs.existsSync(installedMarker)) return; + + const userVenvsDir = path.join(os.homedir(), '.eigent', 'venvs'); + const userTerminalVenv = path.join(userVenvsDir, `terminal_base-${version}`); + const userVenvMarker = path.join(userTerminalVenv, '.packages_installed'); + const versionFile = path.join(userVenvsDir, TERMINAL_VENV_VERSION_FILE); + + // Ensure uv_python symlink exists (needed even if venv already copied) + const userUvPython = path.join(os.homedir(), '.eigent', 'uv_python'); + if (!fs.existsSync(userUvPython) && fs.existsSync(prebuiltUvPython)) { + try { + fs.mkdirSync(path.dirname(userUvPython), { recursive: true }); + fs.symlinkSync(prebuiltUvPython, userUvPython); + log.info(`[VENV] Created uv_python symlink: ${userUvPython}`); + } catch (e) { + log.warn(`[VENV] Failed to create uv_python symlink: ${e}`); + } + } + + if (fs.existsSync(userVenvMarker)) { + const storedVersion = fs.existsSync(versionFile) + ? fs.readFileSync(versionFile, 'utf-8').trim() + : null; + if (storedVersion === version) { + log.info( + `[VENV] Terminal venv already at ${userTerminalVenv} (v${version})` + ); + return; + } + } + + log.info(`[VENV] Copying prebuilt terminal venv to ${userTerminalVenv}...`); + + try { + fs.mkdirSync(userVenvsDir, { recursive: true }); + + if (fs.existsSync(userTerminalVenv)) { + fs.rmSync(userTerminalVenv, { recursive: true, force: true }); + } + + fs.cpSync(prebuiltTerminalVenv, userTerminalVenv, { + recursive: true, + verbatimSymlinks: true, + }); + + // Fix paths after copying (source venv paths don't match user venv location) + // - pyvenv.cfg: update home path to point to correct Python location + // - shebangs: update #! paths in bin/* scripts to point to correct Python + // - python symlink: ensure bin/python exists and points to correct Python + fixPyvenvCfgPlaceholder(path.join(userTerminalVenv, 'pyvenv.cfg')); + fixVenvScriptShebangs(userTerminalVenv); + ensureVenvPythonSymlink(userTerminalVenv); + + if (process.platform === 'darwin') { + try { + execSync(`xattr -cr "${userTerminalVenv}"`, { stdio: 'ignore' }); + } catch { + // ignore + } + } + + fs.writeFileSync(versionFile, version, 'utf-8'); + log.info(`[VENV] Terminal venv copied successfully`); + } catch (error) { + log.error(`[VENV] Failed to copy terminal venv: ${error}`); + } +} + /** * Get path to prebuilt terminal venv (if available in packaged app) */ @@ -408,59 +746,74 @@ export function getPrebuiltTerminalVenvPath(): string | null { 'prebuilt', 'terminal_venv' ); - if (fs.existsSync(prebuiltTerminalVenvPath)) { - const pyvenvCfgPath = path.join(prebuiltTerminalVenvPath, 'pyvenv.cfg'); - const installedMarker = path.join( - prebuiltTerminalVenvPath, - '.packages_installed' - ); - if (fs.existsSync(pyvenvCfgPath) && fs.existsSync(installedMarker)) { - fixPyvenvCfgPlaceholder(pyvenvCfgPath); - fixVenvScriptShebangs(prebuiltTerminalVenvPath); + if (!fs.existsSync(prebuiltTerminalVenvPath)) { + return null; + } - const pythonExePath = getVenvPythonPath(prebuiltTerminalVenvPath); + const pyvenvCfgPath = path.join(prebuiltTerminalVenvPath, 'pyvenv.cfg'); + const installedMarker = path.join( + prebuiltTerminalVenvPath, + '.packages_installed' + ); + if (!fs.existsSync(pyvenvCfgPath) || !fs.existsSync(installedMarker)) { + return null; + } + + // Check if already fixed for this version (avoid repeated fixes) + const fixedMarkerPath = path.join( + process.resourcesPath, + 'prebuilt', + '.terminal_venv_fixed' + ); + const currentVersion = app.getVersion(); + const needsFix = + !fs.existsSync(fixedMarkerPath) || + fs.readFileSync(fixedMarkerPath, 'utf-8').trim() !== currentVersion; + + if (needsFix) { + fixPyvenvCfgPlaceholder(pyvenvCfgPath); + ensureVenvPythonSymlink(prebuiltTerminalVenvPath); + fixVenvScriptShebangs(prebuiltTerminalVenvPath); + fs.writeFileSync(fixedMarkerPath, currentVersion, 'utf-8'); + } + + const pythonExePath = getVenvPythonPath(prebuiltTerminalVenvPath); + + if (fs.existsSync(pythonExePath)) { + return prebuiltTerminalVenvPath; + } + + // Try to fix the missing Python executable by creating a symlink to prebuilt Python + const prebuiltPython = findPythonForTerminalVenv(); + if (prebuiltPython && fs.existsSync(prebuiltPython)) { + try { + const binDir = path.join( + prebuiltTerminalVenvPath, + process.platform === 'win32' ? 'Scripts' : 'bin' + ); + + if (!fs.existsSync(binDir)) { + fs.mkdirSync(binDir, { recursive: true }); + } if (fs.existsSync(pythonExePath)) { - log.info( - `[VENV] Using prebuilt terminal venv: ${prebuiltTerminalVenvPath}` - ); - return prebuiltTerminalVenvPath; + fs.unlinkSync(pythonExePath); } - // Try to fix the missing Python executable by creating a symlink to prebuilt Python - const prebuiltPython = findPythonForTerminalVenv(); - if (prebuiltPython && fs.existsSync(prebuiltPython)) { - try { - const binDir = path.join( - prebuiltTerminalVenvPath, - process.platform === 'win32' ? 'Scripts' : 'bin' - ); - - if (!fs.existsSync(binDir)) { - fs.mkdirSync(binDir, { recursive: true }); - } - - if (fs.existsSync(pythonExePath)) { - fs.unlinkSync(pythonExePath); - } - - const relativePath = path.relative(binDir, prebuiltPython); - fs.symlinkSync(relativePath, pythonExePath); - log.info( - `[VENV] Fixed terminal venv Python symlink: ${pythonExePath} -> ${prebuiltPython}` - ); - return prebuiltTerminalVenvPath; - } catch (error) { - log.warn( - `[VENV] Failed to fix terminal venv Python symlink: ${error}` - ); - } - } - log.warn( - `[VENV] Prebuilt terminal venv Python missing, falling back to user venv` + const relativePath = path.relative(binDir, prebuiltPython); + fs.symlinkSync(relativePath, pythonExePath); + log.info( + `[VENV] Fixed terminal venv Python symlink: ${pythonExePath} -> ${prebuiltPython}` ); + return prebuiltTerminalVenvPath; + } catch (error) { + log.warn(`[VENV] Failed to fix terminal venv Python symlink: ${error}`); } } + + log.warn( + `[VENV] Prebuilt terminal venv Python missing, falling back to user venv` + ); return null; } @@ -475,9 +828,72 @@ export function getVenvPythonPath(venvPath: string): string { : path.join(venvPath, 'bin', 'python'); } +/** + * Check venv existence for pre-check WITHOUT triggering extraction. + * Used to avoid blocking app launch - extraction is deferred to startBackend when window is already visible. + */ +export function checkVenvExistsForPreCheck(version: string): { + exists: boolean; + path: string; +} { + if (!app.isPackaged) { + const venvDir = path.join( + os.homedir(), + '.eigent', + 'venvs', + `backend-${version}` + ); + const pyvenvCfg = path.join(venvDir, 'pyvenv.cfg'); + return { + exists: fs.existsSync(pyvenvCfg), + path: venvDir, + }; + } + + const prebuiltDir = path.join(process.resourcesPath, 'prebuilt'); + const prebuiltVenvPath = path.join(prebuiltDir, 'venv'); + const prebuiltPyvenvCfg = path.join(prebuiltVenvPath, 'pyvenv.cfg'); + + if (fs.existsSync(prebuiltVenvPath) && fs.existsSync(prebuiltPyvenvCfg)) { + return { exists: true, path: prebuiltVenvPath }; + } + + const venvDir = path.join( + os.homedir(), + '.eigent', + 'venvs', + `backend-${version}` + ); + const pyvenvCfg = path.join(venvDir, 'pyvenv.cfg'); + return { + exists: fs.existsSync(pyvenvCfg), + path: venvDir, + }; +} + +/** + * Get path to backend venv for the given version. + * @param version App version + * @returns Path to backend venv + */ export function getVenvPath(version: string): string { - // First check for prebuilt venv in packaged app + // For packaged apps, ensure venv is copied to ~/.eigent/venvs first if (app.isPackaged) { + ensureBackendVenvAtUserPath(version); + + // Check if user venv exists (after ensuring copy) + const userVenvDir = path.join( + os.homedir(), + '.eigent', + 'venvs', + `backend-${version}` + ); + const pyvenvCfgPath = path.join(userVenvDir, 'pyvenv.cfg'); + if (fs.existsSync(pyvenvCfgPath)) { + return userVenvDir; + } + + // Fallback to prebuilt venv if copy failed (shouldn't happen normally) const prebuiltVenv = getPrebuiltVenvPath(); if (prebuiltVenv) { return prebuiltVenv; @@ -500,6 +916,138 @@ export function getVenvPath(version: string): string { return venvDir; } +/** + * Create npm/npx wrapper scripts that use nodejs_wheel Python API. + * The bin/npm from nodejs_wheel can fail with "Cannot find module '../lib/cli.js'" + * when invoked directly. Using the Python API avoids this. + */ +export function ensureNpmWrappersForBrowserToolkit( + venvPath: string +): string | null { + const pythonPath = getVenvPythonPath(venvPath); + if (!fs.existsSync(pythonPath)) return null; + + const eigentBinDir = path.join(os.homedir(), '.eigent', 'bin'); + fs.mkdirSync(eigentBinDir, { recursive: true }); + + const wrapperVersion = '1'; + const versionFile = path.join(eigentBinDir, '.npm_wrapper_version'); + const storedVersion = fs.existsSync(versionFile) + ? fs.readFileSync(versionFile, 'utf-8').trim() + : ''; + + const npmWrapper = path.join( + eigentBinDir, + process.platform === 'win32' ? 'npm.cmd' : 'npm' + ); + const npxWrapper = path.join( + eigentBinDir, + process.platform === 'win32' ? 'npx.cmd' : 'npx' + ); + + const needsUpdate = + storedVersion !== wrapperVersion || + !fs.existsSync(npmWrapper) || + !fs.existsSync(npxWrapper); + + if (needsUpdate) { + try { + if (process.platform === 'win32') { + const npmContent = `@echo off +"${pythonPath.replace(/\//g, '\\')}" -c "import sys; from nodejs_wheel import npm; sys.exit(npm(sys.argv[1:]))" %* +`; + const npxContent = `@echo off +"${pythonPath.replace(/\//g, '\\')}" -c "import sys; from nodejs_wheel import npx; sys.exit(npx(sys.argv[1:]))" %* +`; + fs.writeFileSync(npmWrapper, npmContent, 'utf-8'); + fs.writeFileSync(npxWrapper, npxContent, 'utf-8'); + } else { + const shebang = `#!${pythonPath}\n`; + const npmContent = + shebang + + `import sys +from nodejs_wheel import npm +sys.exit(npm(sys.argv[1:])) +`; + const npxContent = + shebang + + `import sys +from nodejs_wheel import npx +sys.exit(npx(sys.argv[1:])) +`; + fs.writeFileSync(npmWrapper, npmContent, 'utf-8'); + fs.writeFileSync(npxWrapper, npxContent, 'utf-8'); + fs.chmodSync(npmWrapper, 0o755); + fs.chmodSync(npxWrapper, 0o755); + } + fs.writeFileSync(versionFile, wrapperVersion, 'utf-8'); + log.info(`[VENV] Created npm/npx wrappers at ${eigentBinDir}`); + } catch (error) { + log.warn(`[VENV] Failed to create npm wrappers: ${error}`); + return null; + } + } + + return eigentBinDir; +} + +/** + * Find nodejs-wheel npm path in venv for browser toolkit. + * Prefer Python API wrappers over direct bin (which can fail with cli.js error). + */ +export function findNodejsWheelNpmPath(venvPath: string): string | null { + // Prefer wrapper scripts that use Python API (avoids bin/npm "../lib/cli.js" error) + const wrapperDir = ensureNpmWrappersForBrowserToolkit(venvPath); + if (wrapperDir) { + const npmWrapper = path.join( + wrapperDir, + process.platform === 'win32' ? 'npm.cmd' : 'npm' + ); + const npxWrapper = path.join( + wrapperDir, + process.platform === 'win32' ? 'npx.cmd' : 'npx' + ); + if (fs.existsSync(npmWrapper) && fs.existsSync(npxWrapper)) { + return wrapperDir; + } + } + + // Fallback to nodejs_wheel/bin (may fail with cli.js error) + return findNodejsWheelBinPath(venvPath); +} + +/** + * Find nodejs_wheel/bin directory for the node executable. + * Browser toolkit needs node in PATH (npm/npx use our wrappers from ~/.eigent/bin). + */ +export function findNodejsWheelBinPath(venvPath: string): string | null { + try { + const libPath = path.join(venvPath, 'lib'); + if (!fs.existsSync(libPath)) return null; + + const pythonDirs = fs + .readdirSync(libPath) + .filter((n) => n.startsWith('python')); + if (pythonDirs.length === 0) return null; + + for (const pythonDir of pythonDirs) { + const sitePackages = path.join(libPath, pythonDir, 'site-packages'); + const nodejsWheelBin = path.join(sitePackages, 'nodejs_wheel', 'bin'); + const nodePath = path.join( + nodejsWheelBin, + process.platform === 'win32' ? 'node.exe' : 'node' + ); + + if (fs.existsSync(nodePath)) { + return nodejsWheelBin; + } + } + } catch { + // ignore + } + return null; +} + export function getVenvsBaseDir(): string { return path.join(os.homedir(), '.eigent', 'venvs'); } @@ -517,6 +1065,7 @@ export const TERMINAL_BASE_PACKAGES = [ 'openpyxl', 'beautifulsoup4', 'pillow', + 'plotly', ]; /** diff --git a/index.html b/index.html index 5c3481fa..e000c9cd 100644 --- a/index.html +++ b/index.html @@ -4,9 +4,50 @@ + Eigent diff --git a/package.json b/package.json index bc879554..d741fc7d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "eigent", - "version": "0.0.82", + "version": "0.0.84", "main": "dist-electron/main/index.js", "description": "Eigent", "author": "Eigent.AI", diff --git a/scripts/preinstall-deps.js b/scripts/preinstall-deps.js index 44394600..c41e6c8b 100644 --- a/scripts/preinstall-deps.js +++ b/scripts/preinstall-deps.js @@ -48,6 +48,7 @@ const TERMINAL_BASE_PACKAGES = [ 'openpyxl', 'beautifulsoup4', 'pillow', + 'plotly', ]; console.log('🚀 Starting pre-installation of dependencies...'); diff --git a/server/app/model/user/key.py b/server/app/model/user/key.py index 135c8ed2..1ff5583c 100644 --- a/server/app/model/user/key.py +++ b/server/app/model/user/key.py @@ -26,6 +26,7 @@ class ModelType(StrEnum): gpt4_1 = "gpt-4.1" gpt4_mini = "gpt-4.1-mini" gemini_3_pro = "gemini-3-pro-preview" + minimax_m2_5 = "minimax_m2_5" class KeyStatus(IntEnum): diff --git a/server/pyproject.toml b/server/pyproject.toml index 6620d919..f8da418d 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.12,<3.13" dependencies = [ "alembic>=1.15.2", "openai>=1.99.3,<2", - "camel-ai==0.2.85a0", + "camel-ai==0.2.90a1", "pydantic[email]>=2.11.1", "click>=8.1.8", "fastapi>=0.115.12", diff --git a/server/uv.lock b/server/uv.lock index 72e76bb9..53feb0a8 100644 --- a/server/uv.lock +++ b/server/uv.lock @@ -206,6 +206,7 @@ dependencies = [ { name = "pillow" }, { name = "psutil" }, { name = "pydantic" }, + { name = "pyyaml" }, { name = "tiktoken" }, { name = "websockets" }, ] @@ -1229,6 +1230,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159 }, ] +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, +] + [[package]] name = "referencing" version = "0.37.0" diff --git a/src/assets/model/anthropic.svg b/src/assets/model/anthropic.svg index 5b81844c..b93d6a02 100644 --- a/src/assets/model/anthropic.svg +++ b/src/assets/model/anthropic.svg @@ -1 +1 @@ -Anthropic \ No newline at end of file +Anthropic diff --git a/src/assets/model/azure.svg b/src/assets/model/azure.svg index ed50209c..16606a29 100644 --- a/src/assets/model/azure.svg +++ b/src/assets/model/azure.svg @@ -1 +1 @@ -Azure \ No newline at end of file +Azure diff --git a/src/assets/model/bedrock.svg b/src/assets/model/bedrock.svg index e0f929a7..43b55174 100644 --- a/src/assets/model/bedrock.svg +++ b/src/assets/model/bedrock.svg @@ -1 +1 @@ -Bedrock \ No newline at end of file +Bedrock diff --git a/src/assets/model/deepseek.svg b/src/assets/model/deepseek.svg index 3fc23024..52eec25c 100644 --- a/src/assets/model/deepseek.svg +++ b/src/assets/model/deepseek.svg @@ -1 +1 @@ -DeepSeek \ No newline at end of file +DeepSeek diff --git a/src/assets/model/gemini.svg b/src/assets/model/gemini.svg index f1cf3575..a623d100 100644 --- a/src/assets/model/gemini.svg +++ b/src/assets/model/gemini.svg @@ -1 +1 @@ -Gemini \ No newline at end of file +Gemini diff --git a/src/assets/model/lmstudio.svg b/src/assets/model/lmstudio.svg index ea0816b6..ae503961 100644 --- a/src/assets/model/lmstudio.svg +++ b/src/assets/model/lmstudio.svg @@ -1 +1 @@ -LM Studio \ No newline at end of file +LM Studio diff --git a/src/assets/model/minimax.svg b/src/assets/model/minimax.svg index 2a60bd47..61fcafe0 100644 --- a/src/assets/model/minimax.svg +++ b/src/assets/model/minimax.svg @@ -1 +1 @@ -Minimax \ No newline at end of file +Minimax diff --git a/src/assets/model/moonshot.svg b/src/assets/model/moonshot.svg index fb56ac10..829ab293 100644 --- a/src/assets/model/moonshot.svg +++ b/src/assets/model/moonshot.svg @@ -1 +1 @@ -MoonshotAI \ No newline at end of file +MoonshotAI diff --git a/src/assets/model/ollama.svg b/src/assets/model/ollama.svg index cc887e3d..c5e666a0 100644 --- a/src/assets/model/ollama.svg +++ b/src/assets/model/ollama.svg @@ -1 +1 @@ -Ollama \ No newline at end of file +Ollama diff --git a/src/assets/model/openrouter.svg b/src/assets/model/openrouter.svg index e6cca2a8..cb51863f 100644 --- a/src/assets/model/openrouter.svg +++ b/src/assets/model/openrouter.svg @@ -1 +1 @@ -OpenRouter \ No newline at end of file +OpenRouter diff --git a/src/assets/model/qwen.svg b/src/assets/model/qwen.svg index 33b3f645..24d11f33 100644 --- a/src/assets/model/qwen.svg +++ b/src/assets/model/qwen.svg @@ -1 +1 @@ -Qwen \ No newline at end of file +Qwen diff --git a/src/assets/model/vllm.svg b/src/assets/model/vllm.svg index 54acc3de..07eaef09 100644 --- a/src/assets/model/vllm.svg +++ b/src/assets/model/vllm.svg @@ -1 +1 @@ -vLLM \ No newline at end of file +vLLM diff --git a/src/assets/model/zai.svg b/src/assets/model/zai.svg index 04ba2d98..1a47fd72 100644 --- a/src/assets/model/zai.svg +++ b/src/assets/model/zai.svg @@ -1 +1 @@ -Z.ai \ No newline at end of file +Z.ai diff --git a/src/assets/wechat_qr.jpg b/src/assets/wechat_qr.jpg index 5bf53b57..beb8ae4b 100644 Binary files a/src/assets/wechat_qr.jpg and b/src/assets/wechat_qr.jpg differ diff --git a/src/components/Folder/index.tsx b/src/components/Folder/index.tsx index 558879ac..62bfd335 100644 --- a/src/components/Folder/index.tsx +++ b/src/components/Folder/index.tsx @@ -30,13 +30,78 @@ import FolderComponent from './FolderComponent'; import { proxyFetchGet } from '@/api/http'; import { MarkDown } from '@/components/ChatBox/MessageItem/MarkDown'; import useChatStoreAdapter from '@/hooks/useChatStoreAdapter'; -import { injectFontStyles } from '@/lib/htmlFontStyles'; +import { + deferInlineScriptsUntilLoad, + injectFontStyles, +} from '@/lib/htmlFontStyles'; import { containsDangerousContent } from '@/lib/htmlSanitization'; import { useAuthStore } from '@/store/authStore'; import { useTranslation } from 'react-i18next'; import { toast } from 'sonner'; import { ZoomControls } from './ZoomControls'; +const IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp', 'svg']; +const AUDIO_EXTENSIONS = ['mp3', 'wav', 'ogg', 'flac', 'aac', 'm4a', 'wma']; +const VIDEO_EXTENSIONS = ['mp4', 'webm', 'mov', 'avi', 'mkv', 'flv', 'wmv']; + +type FileTypeTarget = { + name?: string; + path?: string; + type?: string; +}; +const loggedFileTypeWarnings = new Set(); + +function getExt(value?: string) { + if (!value) return ''; + const normalized = value.split(/[?#]/)[0]; + const lastSegment = normalized.split('/').pop() || normalized; + if (!lastSegment.includes('.')) return ''; + return lastSegment.split('.').pop()?.toLowerCase() || ''; +} + +function getFileType(file: FileTypeTarget) { + const extFromNameOrPath = getExt(file.name) || getExt(file.path); + const normalizedType = (file.type || '').replace(/^\./, '').toLowerCase(); + const fileId = file.path || file.name || 'unknown-file'; + + if (!extFromNameOrPath && normalizedType) { + const key = `missing-ext|${fileId}|${normalizedType}`; + if (!loggedFileTypeWarnings.has(key)) { + loggedFileTypeWarnings.add(key); + console.warn( + `[Folder getFileType] extension missing in name/path, file.type fallback disabled: ${fileId} (type=${normalizedType})` + ); + } + } + + if ( + extFromNameOrPath && + normalizedType && + normalizedType !== 'folder' && + extFromNameOrPath !== normalizedType + ) { + const key = `mismatch|${fileId}|${extFromNameOrPath}|${normalizedType}`; + if (!loggedFileTypeWarnings.has(key)) { + loggedFileTypeWarnings.add(key); + console.warn( + `[Folder getFileType] extension/type mismatch for ${fileId}: inferred=${extFromNameOrPath}, type=${normalizedType}` + ); + } + } + + return extFromNameOrPath; +} + +function isImageFile(file: FileTypeTarget) { + return IMAGE_EXTENSIONS.includes(getFileType(file)); +} +function isAudioFile(file: FileTypeTarget) { + return AUDIO_EXTENSIONS.includes(getFileType(file)); +} +function isVideoFile(file: FileTypeTarget) { + return VIDEO_EXTENSIONS.includes(getFileType(file)); +} + // Type definitions interface FileTreeNode { name: string; @@ -70,7 +135,7 @@ interface FileTreeProps { isShowSourceCode: boolean; } -const FileTree: React.FC = ({ +export const FileTree: React.FC = ({ node, level = 0, selectedFile, @@ -104,29 +169,33 @@ const FileTree: React.FC = ({ onSelectFile(fileInfo); } }} - className={`text-primary flex w-full items-center justify-start rounded-xl bg-fill-fill-transparent p-2 text-left text-sm backdrop-blur-lg transition-colors hover:bg-fill-fill-transparent-active ${ + className={`text-primary flex w-full items-center justify-start gap-2 rounded-xl bg-fill-fill-transparent p-2 text-left text-sm backdrop-blur-lg transition-colors hover:bg-fill-fill-transparent-active ${ selectedFile?.path === child.path ? 'bg-fill-fill-transparent-active' : '' }`} > - {child.isFolder && ( - + {child.isFolder ? ( + {isExpanded ? ( ) : ( )} + ) : ( + )} - {!child.isFolder && } {child.isFolder ? ( - + ) : child.icon ? ( - + ) : ( - + )} - ) : [ - 'png', - 'jpg', - 'jpeg', - 'gif', - 'bmp', - 'webp', - 'svg', - ].includes(selectedFile.type.toLowerCase()) ? ( + ) : isAudioFile(selectedFile) ? ( +
+ +
+ ) : isVideoFile(selectedFile) ? ( +
+ +
+ ) : isImageFile(selectedFile) ? (
@@ -708,6 +785,75 @@ function ImageLoader({ selectedFile }: { selectedFile: FileInfo }) { ); } +function AudioLoader({ selectedFile }: { selectedFile: FileInfo }) { + const [src, setSrc] = useState(''); + + useEffect(() => { + let cancelled = false; + setSrc(''); + if (selectedFile.isRemote) { + setSrc(selectedFile.content || selectedFile.path); + return; + } + window.electronAPI + .readFileAsDataUrl(selectedFile.path) + .then((dataUrl: string) => { + if (!cancelled) setSrc(dataUrl); + }) + .catch((err: any) => { + if (cancelled) return; + console.error('Audio load error:', err); + setSrc(''); + }); + return () => { + cancelled = true; + }; + }, [selectedFile]); + + return ( +
+

+ {selectedFile.name} +

+ +
+ ); +} + +function VideoLoader({ selectedFile }: { selectedFile: FileInfo }) { + const [src, setSrc] = useState(''); + + useEffect(() => { + let cancelled = false; + setSrc(''); + if (selectedFile.isRemote) { + setSrc(selectedFile.content || selectedFile.path); + return; + } + window.electronAPI + .readFileAsDataUrl(selectedFile.path) + .then((dataUrl: string) => { + if (!cancelled) setSrc(dataUrl); + }) + .catch((err: any) => { + if (cancelled) return; + console.error('Video load error:', err); + setSrc(''); + }); + return () => { + cancelled = true; + }; + }, [selectedFile]); + + return ( + + ); +} + // Helper function to get directory path from file path function getDirPath(filePath: string): string { const normalizedPath = filePath.replace(/\\/g, '/'); @@ -979,8 +1125,12 @@ function HtmlRenderer({ return; } + // Defer inline scripts until load when document has external scripts (e.g. Chart.js), + const htmlWithDeferredScripts = + deferInlineScriptsUntilLoad(processedHtmlContent); + // Set the processed HTML with font styles - iframe sandbox provides security - setProcessedHtml(injectFontStyles(processedHtmlContent)); + setProcessedHtml(injectFontStyles(htmlWithDeferredScripts)); }; processHtml(); @@ -1025,6 +1175,7 @@ function HtmlRenderer({ height: `${10000 / zoom}%`, }} > + {/*Security is maintained via CSP allowlist in index.html which restricts script sources. */}