mirror of
https://github.com/onestardao/WFGY.git
synced 2026-05-05 23:40:49 +00:00
Delete ProblemMap/Atlas/Fixes/official/demos/demo-f4-execution-closure/demo_04_f7_container_fidelity_replay.ipynb
This commit is contained in:
parent
983336a240
commit
6d2f13e7bc
1 changed files with 0 additions and 248 deletions
|
|
@ -1,248 +0,0 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from IPython.display import display, Markdown\n",
|
||||
"from pprint import pprint\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"display(Markdown(\"\"\"\n",
|
||||
"# Problem Map 3.0 Troubleshooting Atlas\n",
|
||||
"## Demo 4 · F7 Container Fidelity\n",
|
||||
"\n",
|
||||
"### What this experiment is\n",
|
||||
"This notebook is an MVP **route-first repair** experiment.\n",
|
||||
"\n",
|
||||
"It is designed to show that:\n",
|
||||
"\n",
|
||||
"> not every bad output is a reasoning failure\n",
|
||||
"> some failures break at **Representation & Localization Integrity** first\n",
|
||||
"> so the first repair move should target the **formal container**, not generic reasoning pressure\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## What you should expect to see\n",
|
||||
"\n",
|
||||
"### Replay mode\n",
|
||||
"You do **not** need an API key.\n",
|
||||
"\n",
|
||||
"You will see:\n",
|
||||
"\n",
|
||||
"- how the baseline answer contains partly correct routing content but sits in a broken shell\n",
|
||||
"- why Atlas routes the case to **F7**, not **F2**\n",
|
||||
"- what the first repair move is\n",
|
||||
"- how the answer shifts from a broken container state toward a valid structured output\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Why this matters\n",
|
||||
"This demo is designed to show that:\n",
|
||||
"\n",
|
||||
"- the content can be partly right\n",
|
||||
"- the routing can still fail at the container layer first\n",
|
||||
"- a better formal shell can be a stronger first repair move than asking for \"better reasoning\"\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## What counts as success\n",
|
||||
"The strongest version of this demo is:\n",
|
||||
"\n",
|
||||
"- baseline contains the right idea but fails as a valid JSON object\n",
|
||||
"- repaired version returns a valid JSON object with the expected fields\n",
|
||||
"\n",
|
||||
"A softer result is still acceptable:\n",
|
||||
"\n",
|
||||
"- baseline mixes prose and JSON-like content\n",
|
||||
"- repaired version is clearly more structured and machine-readable\n",
|
||||
"\"\"\"))\n",
|
||||
"\n",
|
||||
"MODE = \"replay\"\n",
|
||||
"\n",
|
||||
"INPUT_CASE = {\n",
|
||||
" \"demo_id\": \"demo_f7_container_fidelity\",\n",
|
||||
" \"demo_version\": \"v1A\",\n",
|
||||
" \"case_id\": \"f7_container_case_001\",\n",
|
||||
" \"title\": \"Partly correct routing content inside a broken formal shell\",\n",
|
||||
" \"task_type\": \"structured_output_routing\",\n",
|
||||
" \"family_target\": {\n",
|
||||
" \"primary_family\": \"F7\",\n",
|
||||
" \"secondary_family\": \"F2\",\n",
|
||||
" \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n",
|
||||
" \"broken_invariant\": \"formal_container_fidelity_broken\"\n",
|
||||
" },\n",
|
||||
" \"user_question\": \"Return the routing result for this failure as valid JSON with keys primary_family, secondary_family, broken_invariant, best_current_fit, and first_repair_move.\",\n",
|
||||
" \"routing_notes\": [\n",
|
||||
" {\n",
|
||||
" \"note_id\": \"note_001\",\n",
|
||||
" \"label\": \"failure description\",\n",
|
||||
" \"text\": \"The answer often contains the right diagnosis but wraps it in mixed prose, partial JSON, or malformed shell structure.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"note_id\": \"note_002\",\n",
|
||||
" \"label\": \"family clue\",\n",
|
||||
" \"text\": \"This failure is container-first. The formal shell fails before the reasoning path itself becomes the main issue.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"note_id\": \"note_003\",\n",
|
||||
" \"label\": \"repair clue\",\n",
|
||||
" \"text\": \"The first repair move is to tighten the output contract and preserve the formal container.\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"REPLAY_OUTPUTS = {\n",
|
||||
" \"baseline_answer\": '''Here is the routing result.\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"primary_family\": \"F7\",\n",
|
||||
" \"secondary_family\": \"F2\",\n",
|
||||
" \"broken_invariant\": \"formal_container_fidelity_broken\",\n",
|
||||
" \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n",
|
||||
" \"first_repair_move\": \"tighten_output_contract\"\n",
|
||||
"''',\n",
|
||||
" \"repaired_answer\": '''{\n",
|
||||
" \"primary_family\": \"F7\",\n",
|
||||
" \"secondary_family\": \"F2\",\n",
|
||||
" \"broken_invariant\": \"formal_container_fidelity_broken\",\n",
|
||||
" \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n",
|
||||
" \"first_repair_move\": \"tighten_output_contract\"\n",
|
||||
"}'''\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"EXPECTED_OUTPUT = {\n",
|
||||
" \"primary_family\": \"F7\",\n",
|
||||
" \"secondary_family\": \"F2\",\n",
|
||||
" \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n",
|
||||
" \"broken_invariant\": \"formal_container_fidelity_broken\",\n",
|
||||
" \"first_repair_move\": \"tighten_output_contract\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"def print_section(title: str):\n",
|
||||
" print(\"\\n\" + \"=\" * 80)\n",
|
||||
" print(title)\n",
|
||||
" print(\"=\" * 80)\n",
|
||||
"\n",
|
||||
"def print_notes(notes):\n",
|
||||
" for item in notes:\n",
|
||||
" print(f\"[{item['note_id']}] {item['label']}\")\n",
|
||||
" print(item[\"text\"])\n",
|
||||
" print(\"-\" * 80)\n",
|
||||
"\n",
|
||||
"def json_status(text: str):\n",
|
||||
" try:\n",
|
||||
" json.loads(text)\n",
|
||||
" return \"valid JSON\"\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"invalid JSON | {type(e).__name__}: {e}\"\n",
|
||||
"\n",
|
||||
"baseline_prompt = f'''\n",
|
||||
"You are helping with an atlas routing task.\n",
|
||||
"\n",
|
||||
"Question:\n",
|
||||
"{INPUT_CASE[\"user_question\"]}\n",
|
||||
"\n",
|
||||
"Notes:\n",
|
||||
"1. {INPUT_CASE[\"routing_notes\"][0][\"text\"]}\n",
|
||||
"2. {INPUT_CASE[\"routing_notes\"][1][\"text\"]}\n",
|
||||
"3. {INPUT_CASE[\"routing_notes\"][2][\"text\"]}\n",
|
||||
"\n",
|
||||
"First explain the result in one short sentence.\n",
|
||||
"Then provide a JSON-style answer if possible.\n",
|
||||
"Keep it compact and natural.\n",
|
||||
"'''.strip()\n",
|
||||
"\n",
|
||||
"repaired_prompt = f'''\n",
|
||||
"You are helping with an atlas routing task.\n",
|
||||
"\n",
|
||||
"Question:\n",
|
||||
"{INPUT_CASE[\"user_question\"]}\n",
|
||||
"\n",
|
||||
"Notes:\n",
|
||||
"1. {INPUT_CASE[\"routing_notes\"][0][\"text\"]}\n",
|
||||
"2. {INPUT_CASE[\"routing_notes\"][1][\"text\"]}\n",
|
||||
"3. {INPUT_CASE[\"routing_notes\"][2][\"text\"]}\n",
|
||||
"\n",
|
||||
"Return exactly one valid JSON object.\n",
|
||||
"Do not add any prose before or after the object.\n",
|
||||
"Use exactly these keys:\n",
|
||||
"primary_family\n",
|
||||
"secondary_family\n",
|
||||
"broken_invariant\n",
|
||||
"best_current_fit\n",
|
||||
"first_repair_move\n",
|
||||
"'''.strip()\n",
|
||||
"\n",
|
||||
"print_section(\"1. Case overview\")\n",
|
||||
"print(\"Title:\")\n",
|
||||
"print(INPUT_CASE[\"title\"])\n",
|
||||
"print(\"\\nQuestion:\")\n",
|
||||
"print(INPUT_CASE[\"user_question\"])\n",
|
||||
"\n",
|
||||
"print_section(\"2. Routing notes\")\n",
|
||||
"print_notes(INPUT_CASE[\"routing_notes\"])\n",
|
||||
"\n",
|
||||
"print_section(\"3. Atlas routing target\")\n",
|
||||
"pprint(INPUT_CASE[\"family_target\"])\n",
|
||||
"\n",
|
||||
"print_section(\"4. Baseline prompt\")\n",
|
||||
"print(baseline_prompt)\n",
|
||||
"\n",
|
||||
"print_section(\"5. Repaired prompt\")\n",
|
||||
"print(repaired_prompt)\n",
|
||||
"\n",
|
||||
"print_section(\"6. Replay mode · baseline output\")\n",
|
||||
"print(REPLAY_OUTPUTS[\"baseline_answer\"])\n",
|
||||
"\n",
|
||||
"print_section(\"7. Replay mode · repaired output\")\n",
|
||||
"print(REPLAY_OUTPUTS[\"repaired_answer\"])\n",
|
||||
"\n",
|
||||
"print_section(\"8. Container check\")\n",
|
||||
"print(\"Baseline:\", json_status(REPLAY_OUTPUTS[\"baseline_answer\"]))\n",
|
||||
"print(\"Repaired:\", json_status(REPLAY_OUTPUTS[\"repaired_answer\"]))\n",
|
||||
"\n",
|
||||
"print_section(\"9. Expected effect checklist\")\n",
|
||||
"print(\"You should verify the following:\")\n",
|
||||
"print(\"1. Baseline contains partly correct routing content.\")\n",
|
||||
"print(\"2. Baseline still fails as a formal container.\")\n",
|
||||
"print(\"3. Repaired version returns a valid JSON shell.\")\n",
|
||||
"print(\"4. The important shift is not deeper reasoning pressure.\")\n",
|
||||
"print(\"5. The important shift is formal container repair.\")\n",
|
||||
"print(\"6. This is why Atlas routes the case to F7 before F2.\")\n",
|
||||
"\n",
|
||||
"print_section(\"10. Expected success contract\")\n",
|
||||
"pprint(EXPECTED_OUTPUT)\n",
|
||||
"\n",
|
||||
"display(Markdown(\"\"\"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Back to the main page\n",
|
||||
"\n",
|
||||
"Read the full product page here:\n",
|
||||
"[Problem Map 3.0 Troubleshooting Atlas](https://github.com/onestardao/WFGY/blob/main/ProblemMap/wfgy-ai-problem-map-troubleshooting-atlas.md)\n",
|
||||
"\n",
|
||||
"If you like the project, **star the repo** ⭐\n",
|
||||
"\"\"\"))"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "VhAKL4-YPsHU"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue