Delete ProblemMap/Atlas/Fixes/official/demos/demo-f4-execution-closure/demo_04_f7_container_fidelity_replay.ipynb

This commit is contained in:
PSBigBig + MiniPS 2026-03-12 21:48:19 +08:00 committed by GitHub
parent 983336a240
commit 6d2f13e7bc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,248 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"from IPython.display import display, Markdown\n",
"from pprint import pprint\n",
"import json\n",
"\n",
"display(Markdown(\"\"\"\n",
"# Problem Map 3.0 Troubleshooting Atlas\n",
"## Demo 4 · F7 Container Fidelity\n",
"\n",
"### What this experiment is\n",
"This notebook is an MVP **route-first repair** experiment.\n",
"\n",
"It is designed to show that:\n",
"\n",
"> not every bad output is a reasoning failure\n",
"> some failures break at **Representation & Localization Integrity** first\n",
"> so the first repair move should target the **formal container**, not generic reasoning pressure\n",
"\n",
"---\n",
"\n",
"## What you should expect to see\n",
"\n",
"### Replay mode\n",
"You do **not** need an API key.\n",
"\n",
"You will see:\n",
"\n",
"- how the baseline answer contains partly correct routing content but sits in a broken shell\n",
"- why Atlas routes the case to **F7**, not **F2**\n",
"- what the first repair move is\n",
"- how the answer shifts from a broken container state toward a valid structured output\n",
"\n",
"---\n",
"\n",
"## Why this matters\n",
"This demo is designed to show that:\n",
"\n",
"- the content can be partly right\n",
"- the routing can still fail at the container layer first\n",
"- a better formal shell can be a stronger first repair move than asking for \"better reasoning\"\n",
"\n",
"---\n",
"\n",
"## What counts as success\n",
"The strongest version of this demo is:\n",
"\n",
"- baseline contains the right idea but fails as a valid JSON object\n",
"- repaired version returns a valid JSON object with the expected fields\n",
"\n",
"A softer result is still acceptable:\n",
"\n",
"- baseline mixes prose and JSON-like content\n",
"- repaired version is clearly more structured and machine-readable\n",
"\"\"\"))\n",
"\n",
"MODE = \"replay\"\n",
"\n",
"INPUT_CASE = {\n",
" \"demo_id\": \"demo_f7_container_fidelity\",\n",
" \"demo_version\": \"v1A\",\n",
" \"case_id\": \"f7_container_case_001\",\n",
" \"title\": \"Partly correct routing content inside a broken formal shell\",\n",
" \"task_type\": \"structured_output_routing\",\n",
" \"family_target\": {\n",
" \"primary_family\": \"F7\",\n",
" \"secondary_family\": \"F2\",\n",
" \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n",
" \"broken_invariant\": \"formal_container_fidelity_broken\"\n",
" },\n",
" \"user_question\": \"Return the routing result for this failure as valid JSON with keys primary_family, secondary_family, broken_invariant, best_current_fit, and first_repair_move.\",\n",
" \"routing_notes\": [\n",
" {\n",
" \"note_id\": \"note_001\",\n",
" \"label\": \"failure description\",\n",
" \"text\": \"The answer often contains the right diagnosis but wraps it in mixed prose, partial JSON, or malformed shell structure.\"\n",
" },\n",
" {\n",
" \"note_id\": \"note_002\",\n",
" \"label\": \"family clue\",\n",
" \"text\": \"This failure is container-first. The formal shell fails before the reasoning path itself becomes the main issue.\"\n",
" },\n",
" {\n",
" \"note_id\": \"note_003\",\n",
" \"label\": \"repair clue\",\n",
" \"text\": \"The first repair move is to tighten the output contract and preserve the formal container.\"\n",
" }\n",
" ]\n",
"}\n",
"\n",
"REPLAY_OUTPUTS = {\n",
" \"baseline_answer\": '''Here is the routing result.\n",
"\n",
"{\n",
" \"primary_family\": \"F7\",\n",
" \"secondary_family\": \"F2\",\n",
" \"broken_invariant\": \"formal_container_fidelity_broken\",\n",
" \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n",
" \"first_repair_move\": \"tighten_output_contract\"\n",
"''',\n",
" \"repaired_answer\": '''{\n",
" \"primary_family\": \"F7\",\n",
" \"secondary_family\": \"F2\",\n",
" \"broken_invariant\": \"formal_container_fidelity_broken\",\n",
" \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n",
" \"first_repair_move\": \"tighten_output_contract\"\n",
"}'''\n",
"}\n",
"\n",
"EXPECTED_OUTPUT = {\n",
" \"primary_family\": \"F7\",\n",
" \"secondary_family\": \"F2\",\n",
" \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n",
" \"broken_invariant\": \"formal_container_fidelity_broken\",\n",
" \"first_repair_move\": \"tighten_output_contract\"\n",
"}\n",
"\n",
"def print_section(title: str):\n",
" print(\"\\n\" + \"=\" * 80)\n",
" print(title)\n",
" print(\"=\" * 80)\n",
"\n",
"def print_notes(notes):\n",
" for item in notes:\n",
" print(f\"[{item['note_id']}] {item['label']}\")\n",
" print(item[\"text\"])\n",
" print(\"-\" * 80)\n",
"\n",
"def json_status(text: str):\n",
" try:\n",
" json.loads(text)\n",
" return \"valid JSON\"\n",
" except Exception as e:\n",
" return f\"invalid JSON | {type(e).__name__}: {e}\"\n",
"\n",
"baseline_prompt = f'''\n",
"You are helping with an atlas routing task.\n",
"\n",
"Question:\n",
"{INPUT_CASE[\"user_question\"]}\n",
"\n",
"Notes:\n",
"1. {INPUT_CASE[\"routing_notes\"][0][\"text\"]}\n",
"2. {INPUT_CASE[\"routing_notes\"][1][\"text\"]}\n",
"3. {INPUT_CASE[\"routing_notes\"][2][\"text\"]}\n",
"\n",
"First explain the result in one short sentence.\n",
"Then provide a JSON-style answer if possible.\n",
"Keep it compact and natural.\n",
"'''.strip()\n",
"\n",
"repaired_prompt = f'''\n",
"You are helping with an atlas routing task.\n",
"\n",
"Question:\n",
"{INPUT_CASE[\"user_question\"]}\n",
"\n",
"Notes:\n",
"1. {INPUT_CASE[\"routing_notes\"][0][\"text\"]}\n",
"2. {INPUT_CASE[\"routing_notes\"][1][\"text\"]}\n",
"3. {INPUT_CASE[\"routing_notes\"][2][\"text\"]}\n",
"\n",
"Return exactly one valid JSON object.\n",
"Do not add any prose before or after the object.\n",
"Use exactly these keys:\n",
"primary_family\n",
"secondary_family\n",
"broken_invariant\n",
"best_current_fit\n",
"first_repair_move\n",
"'''.strip()\n",
"\n",
"print_section(\"1. Case overview\")\n",
"print(\"Title:\")\n",
"print(INPUT_CASE[\"title\"])\n",
"print(\"\\nQuestion:\")\n",
"print(INPUT_CASE[\"user_question\"])\n",
"\n",
"print_section(\"2. Routing notes\")\n",
"print_notes(INPUT_CASE[\"routing_notes\"])\n",
"\n",
"print_section(\"3. Atlas routing target\")\n",
"pprint(INPUT_CASE[\"family_target\"])\n",
"\n",
"print_section(\"4. Baseline prompt\")\n",
"print(baseline_prompt)\n",
"\n",
"print_section(\"5. Repaired prompt\")\n",
"print(repaired_prompt)\n",
"\n",
"print_section(\"6. Replay mode · baseline output\")\n",
"print(REPLAY_OUTPUTS[\"baseline_answer\"])\n",
"\n",
"print_section(\"7. Replay mode · repaired output\")\n",
"print(REPLAY_OUTPUTS[\"repaired_answer\"])\n",
"\n",
"print_section(\"8. Container check\")\n",
"print(\"Baseline:\", json_status(REPLAY_OUTPUTS[\"baseline_answer\"]))\n",
"print(\"Repaired:\", json_status(REPLAY_OUTPUTS[\"repaired_answer\"]))\n",
"\n",
"print_section(\"9. Expected effect checklist\")\n",
"print(\"You should verify the following:\")\n",
"print(\"1. Baseline contains partly correct routing content.\")\n",
"print(\"2. Baseline still fails as a formal container.\")\n",
"print(\"3. Repaired version returns a valid JSON shell.\")\n",
"print(\"4. The important shift is not deeper reasoning pressure.\")\n",
"print(\"5. The important shift is formal container repair.\")\n",
"print(\"6. This is why Atlas routes the case to F7 before F2.\")\n",
"\n",
"print_section(\"10. Expected success contract\")\n",
"pprint(EXPECTED_OUTPUT)\n",
"\n",
"display(Markdown(\"\"\"\n",
"---\n",
"\n",
"## Back to the main page\n",
"\n",
"Read the full product page here:\n",
"[Problem Map 3.0 Troubleshooting Atlas](https://github.com/onestardao/WFGY/blob/main/ProblemMap/wfgy-ai-problem-map-troubleshooting-atlas.md)\n",
"\n",
"If you like the project, **star the repo** ⭐\n",
"\"\"\"))"
],
"metadata": {
"id": "VhAKL4-YPsHU"
},
"execution_count": null,
"outputs": []
}
]
}