eigent/backend/benchmark/dataset/2.json
bytecii f7bf29a40a
benchmark: update benchmark (#1207)
Co-authored-by: bytecii <bytecii@users.noreply.github.com>
Co-authored-by: Wendong-Fan <w3ndong.fan@gmail.com>
Co-authored-by: Wendong-Fan <133094783+Wendong-Fan@users.noreply.github.com>
2026-02-12 16:35:18 +08:00

16 lines
1.2 KiB
JSON

{
"metadata": {
"difficulty": "medium",
"description": "1) benchmark browser use capability with in-depth browser operations, 2) document generation with strict format constraints on the CSV generation, 3) implicit classification for each company's category.",
"tags": ["browser", "research", "data-extraction", "csv", "multi-step"]
},
"data": {
"name": "2",
"question": "Identify all B2B companies in the Y Combinator Winter 2025 batch whose product is related to AI. After you obtain the full company list, independently investigate each company's product information in detail and consolidate all findings into a clean, well-structured CSV file named 'yc_w25_b2b_ai.csv' with columns: company_name (in lowercase), product_description (100 chars max), ai_category (use a consistent set of values including 'ai-agents', 'ai-infrastructure', 'ai-developer-tools', 'ai-analytics', 'ai-security', 'ai-healthcare', 'ai-sales', 'ai-productivity', 'ai-customer-support', 'ai-coding', 'ai-data', 'ai-fintech', 'ai-legal', 'ai-hr', 'ai-marketing', and 'ai-other').",
"env": {}
},
"tests": {
"checker": ["benchmark/checker/2.py"],
"grader": ["benchmark/grader/2.py"]
}
}