mirror of
https://github.com/eigent-ai/eigent.git
synced 2026-05-23 04:17:45 +00:00
Co-authored-by: bytecii <bytecii@users.noreply.github.com> Co-authored-by: Wendong-Fan <w3ndong.fan@gmail.com> Co-authored-by: Wendong-Fan <133094783+Wendong-Fan@users.noreply.github.com>
139 lines
4.3 KiB
Python
139 lines
4.3 KiB
Python
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
"""Grader for benchmark 1: evaluate python313_features.md milestones."""
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
BROWSER_LOG_DIR = Path(__file__).resolve().parents[2] / "browser_log"
|
|
|
|
|
|
def _visited_urls() -> set[str]:
|
|
"""Extract all URLs seen in browser logs."""
|
|
urls: set[str] = set()
|
|
if not BROWSER_LOG_DIR.exists():
|
|
return urls
|
|
for log_file in BROWSER_LOG_DIR.glob("hybrid_browser_toolkit_ws_*.log"):
|
|
decoder = json.JSONDecoder()
|
|
raw = log_file.read_text()
|
|
pos = 0
|
|
while pos < len(raw):
|
|
stripped = raw[pos:].lstrip()
|
|
if not stripped:
|
|
break
|
|
pos = len(raw) - len(stripped)
|
|
try:
|
|
obj, end = decoder.raw_decode(raw, pos)
|
|
pos = end
|
|
if not isinstance(obj, dict):
|
|
continue
|
|
action = obj.get("action", "")
|
|
if action == "visit_page":
|
|
args = obj.get("inputs", {}).get("args", [])
|
|
if args:
|
|
urls.add(args[0])
|
|
except (json.JSONDecodeError, ValueError):
|
|
pos += 1
|
|
return urls
|
|
|
|
|
|
def grade(working_directory: str) -> tuple[int, int]:
|
|
total = 7
|
|
completed = 0
|
|
|
|
md_file = Path(working_directory) / "python313_features.md"
|
|
|
|
# 1. Visited the Python 3.13 What's New page
|
|
visited = _visited_urls()
|
|
if any(
|
|
(p := urlparse(u)).hostname is not None
|
|
and (
|
|
p.hostname == "docs.python.org"
|
|
or p.hostname.endswith(".docs.python.org")
|
|
)
|
|
and "3.13" in p.path
|
|
for u in visited
|
|
):
|
|
completed += 1
|
|
else:
|
|
print("MISS [1]: did not visit docs.python.org/3.13 What's New page")
|
|
|
|
if not md_file.exists():
|
|
print("MISS [2-7]: python313_features.md does not exist")
|
|
return completed, total
|
|
|
|
content = md_file.read_text()
|
|
lower = content.lower()
|
|
|
|
# 2. Has a # warnings heading
|
|
if re.search(r"^# warnings\b", content, re.MULTILINE | re.IGNORECASE):
|
|
completed += 1
|
|
else:
|
|
print("MISS [2]: no '# warnings' heading found")
|
|
|
|
# 3. Has a # multiprocessing heading
|
|
if re.search(
|
|
r"^# multiprocessing\b", content, re.MULTILINE | re.IGNORECASE
|
|
):
|
|
completed += 1
|
|
else:
|
|
print("MISS [3]: no '# multiprocessing' heading found")
|
|
|
|
# 4. Mentions warnings.deprecated() with backticks
|
|
if "`warnings.deprecated()`" in content or (
|
|
"warnings.deprecated" in lower and "`" in content
|
|
):
|
|
completed += 1
|
|
else:
|
|
print(
|
|
"MISS [4]: missing `warnings.deprecated()` "
|
|
"(expected backtick-wrapped reference)"
|
|
)
|
|
|
|
# 5. Mentions PEP 702
|
|
if "pep 702" in lower:
|
|
completed += 1
|
|
else:
|
|
print("MISS [5]: no mention of PEP 702")
|
|
|
|
# 6. Mentions os.process_cpu_count() with backticks
|
|
if "`os.process_cpu_count()`" in content or (
|
|
"os.process_cpu_count" in lower and "`" in content
|
|
):
|
|
completed += 1
|
|
else:
|
|
print(
|
|
"MISS [6]: missing `os.process_cpu_count()` "
|
|
"(expected backtick-wrapped reference)"
|
|
)
|
|
|
|
# 7. Mentions os.cpu_count() (the old default being replaced)
|
|
if "os.cpu_count" in lower:
|
|
completed += 1
|
|
else:
|
|
print("MISS [7]: no mention of os.cpu_count()")
|
|
|
|
return completed, total
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 2:
|
|
print(f"Usage: {sys.argv[0]} <working_directory>")
|
|
sys.exit(1)
|
|
completed, total = grade(sys.argv[1])
|
|
print(f"{completed}/{total}")
|
|
sys.exit(0 if completed == total else 1)
|