From 6d2f13e7bcea946f3a4a50ca2945b888b5c6bc7b Mon Sep 17 00:00:00 2001 From: PSBigBig + MiniPS Date: Thu, 12 Mar 2026 21:48:19 +0800 Subject: [PATCH] Delete ProblemMap/Atlas/Fixes/official/demos/demo-f4-execution-closure/demo_04_f7_container_fidelity_replay.ipynb --- ...demo_04_f7_container_fidelity_replay.ipynb | 248 ------------------ 1 file changed, 248 deletions(-) delete mode 100644 ProblemMap/Atlas/Fixes/official/demos/demo-f4-execution-closure/demo_04_f7_container_fidelity_replay.ipynb diff --git a/ProblemMap/Atlas/Fixes/official/demos/demo-f4-execution-closure/demo_04_f7_container_fidelity_replay.ipynb b/ProblemMap/Atlas/Fixes/official/demos/demo-f4-execution-closure/demo_04_f7_container_fidelity_replay.ipynb deleted file mode 100644 index 9d9667ca..00000000 --- a/ProblemMap/Atlas/Fixes/official/demos/demo-f4-execution-closure/demo_04_f7_container_fidelity_replay.ipynb +++ /dev/null @@ -1,248 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "code", - "source": [ - "from IPython.display import display, Markdown\n", - "from pprint import pprint\n", - "import json\n", - "\n", - "display(Markdown(\"\"\"\n", - "# Problem Map 3.0 Troubleshooting Atlas\n", - "## Demo 4 · F7 Container Fidelity\n", - "\n", - "### What this experiment is\n", - "This notebook is an MVP **route-first repair** experiment.\n", - "\n", - "It is designed to show that:\n", - "\n", - "> not every bad output is a reasoning failure\n", - "> some failures break at **Representation & Localization Integrity** first\n", - "> so the first repair move should target the **formal container**, not generic reasoning pressure\n", - "\n", - "---\n", - "\n", - "## What you should expect to see\n", - "\n", - "### Replay mode\n", - "You do **not** need an API key.\n", - "\n", - "You will see:\n", - "\n", - "- how the baseline answer contains partly correct routing content but sits in a broken shell\n", - "- why Atlas routes the case to **F7**, not **F2**\n", - "- what the first repair move is\n", - "- how the answer shifts from a broken container state toward a valid structured output\n", - "\n", - "---\n", - "\n", - "## Why this matters\n", - "This demo is designed to show that:\n", - "\n", - "- the content can be partly right\n", - "- the routing can still fail at the container layer first\n", - "- a better formal shell can be a stronger first repair move than asking for \"better reasoning\"\n", - "\n", - "---\n", - "\n", - "## What counts as success\n", - "The strongest version of this demo is:\n", - "\n", - "- baseline contains the right idea but fails as a valid JSON object\n", - "- repaired version returns a valid JSON object with the expected fields\n", - "\n", - "A softer result is still acceptable:\n", - "\n", - "- baseline mixes prose and JSON-like content\n", - "- repaired version is clearly more structured and machine-readable\n", - "\"\"\"))\n", - "\n", - "MODE = \"replay\"\n", - "\n", - "INPUT_CASE = {\n", - " \"demo_id\": \"demo_f7_container_fidelity\",\n", - " \"demo_version\": \"v1A\",\n", - " \"case_id\": \"f7_container_case_001\",\n", - " \"title\": \"Partly correct routing content inside a broken formal shell\",\n", - " \"task_type\": \"structured_output_routing\",\n", - " \"family_target\": {\n", - " \"primary_family\": \"F7\",\n", - " \"secondary_family\": \"F2\",\n", - " \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n", - " \"broken_invariant\": \"formal_container_fidelity_broken\"\n", - " },\n", - " \"user_question\": \"Return the routing result for this failure as valid JSON with keys primary_family, secondary_family, broken_invariant, best_current_fit, and first_repair_move.\",\n", - " \"routing_notes\": [\n", - " {\n", - " \"note_id\": \"note_001\",\n", - " \"label\": \"failure description\",\n", - " \"text\": \"The answer often contains the right diagnosis but wraps it in mixed prose, partial JSON, or malformed shell structure.\"\n", - " },\n", - " {\n", - " \"note_id\": \"note_002\",\n", - " \"label\": \"family clue\",\n", - " \"text\": \"This failure is container-first. The formal shell fails before the reasoning path itself becomes the main issue.\"\n", - " },\n", - " {\n", - " \"note_id\": \"note_003\",\n", - " \"label\": \"repair clue\",\n", - " \"text\": \"The first repair move is to tighten the output contract and preserve the formal container.\"\n", - " }\n", - " ]\n", - "}\n", - "\n", - "REPLAY_OUTPUTS = {\n", - " \"baseline_answer\": '''Here is the routing result.\n", - "\n", - "{\n", - " \"primary_family\": \"F7\",\n", - " \"secondary_family\": \"F2\",\n", - " \"broken_invariant\": \"formal_container_fidelity_broken\",\n", - " \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n", - " \"first_repair_move\": \"tighten_output_contract\"\n", - "''',\n", - " \"repaired_answer\": '''{\n", - " \"primary_family\": \"F7\",\n", - " \"secondary_family\": \"F2\",\n", - " \"broken_invariant\": \"formal_container_fidelity_broken\",\n", - " \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n", - " \"first_repair_move\": \"tighten_output_contract\"\n", - "}'''\n", - "}\n", - "\n", - "EXPECTED_OUTPUT = {\n", - " \"primary_family\": \"F7\",\n", - " \"secondary_family\": \"F2\",\n", - " \"best_current_fit\": \"F7_N01_B Formal Container Adequacy Failure\",\n", - " \"broken_invariant\": \"formal_container_fidelity_broken\",\n", - " \"first_repair_move\": \"tighten_output_contract\"\n", - "}\n", - "\n", - "def print_section(title: str):\n", - " print(\"\\n\" + \"=\" * 80)\n", - " print(title)\n", - " print(\"=\" * 80)\n", - "\n", - "def print_notes(notes):\n", - " for item in notes:\n", - " print(f\"[{item['note_id']}] {item['label']}\")\n", - " print(item[\"text\"])\n", - " print(\"-\" * 80)\n", - "\n", - "def json_status(text: str):\n", - " try:\n", - " json.loads(text)\n", - " return \"valid JSON\"\n", - " except Exception as e:\n", - " return f\"invalid JSON | {type(e).__name__}: {e}\"\n", - "\n", - "baseline_prompt = f'''\n", - "You are helping with an atlas routing task.\n", - "\n", - "Question:\n", - "{INPUT_CASE[\"user_question\"]}\n", - "\n", - "Notes:\n", - "1. {INPUT_CASE[\"routing_notes\"][0][\"text\"]}\n", - "2. {INPUT_CASE[\"routing_notes\"][1][\"text\"]}\n", - "3. {INPUT_CASE[\"routing_notes\"][2][\"text\"]}\n", - "\n", - "First explain the result in one short sentence.\n", - "Then provide a JSON-style answer if possible.\n", - "Keep it compact and natural.\n", - "'''.strip()\n", - "\n", - "repaired_prompt = f'''\n", - "You are helping with an atlas routing task.\n", - "\n", - "Question:\n", - "{INPUT_CASE[\"user_question\"]}\n", - "\n", - "Notes:\n", - "1. {INPUT_CASE[\"routing_notes\"][0][\"text\"]}\n", - "2. {INPUT_CASE[\"routing_notes\"][1][\"text\"]}\n", - "3. {INPUT_CASE[\"routing_notes\"][2][\"text\"]}\n", - "\n", - "Return exactly one valid JSON object.\n", - "Do not add any prose before or after the object.\n", - "Use exactly these keys:\n", - "primary_family\n", - "secondary_family\n", - "broken_invariant\n", - "best_current_fit\n", - "first_repair_move\n", - "'''.strip()\n", - "\n", - "print_section(\"1. Case overview\")\n", - "print(\"Title:\")\n", - "print(INPUT_CASE[\"title\"])\n", - "print(\"\\nQuestion:\")\n", - "print(INPUT_CASE[\"user_question\"])\n", - "\n", - "print_section(\"2. Routing notes\")\n", - "print_notes(INPUT_CASE[\"routing_notes\"])\n", - "\n", - "print_section(\"3. Atlas routing target\")\n", - "pprint(INPUT_CASE[\"family_target\"])\n", - "\n", - "print_section(\"4. Baseline prompt\")\n", - "print(baseline_prompt)\n", - "\n", - "print_section(\"5. Repaired prompt\")\n", - "print(repaired_prompt)\n", - "\n", - "print_section(\"6. Replay mode · baseline output\")\n", - "print(REPLAY_OUTPUTS[\"baseline_answer\"])\n", - "\n", - "print_section(\"7. Replay mode · repaired output\")\n", - "print(REPLAY_OUTPUTS[\"repaired_answer\"])\n", - "\n", - "print_section(\"8. Container check\")\n", - "print(\"Baseline:\", json_status(REPLAY_OUTPUTS[\"baseline_answer\"]))\n", - "print(\"Repaired:\", json_status(REPLAY_OUTPUTS[\"repaired_answer\"]))\n", - "\n", - "print_section(\"9. Expected effect checklist\")\n", - "print(\"You should verify the following:\")\n", - "print(\"1. Baseline contains partly correct routing content.\")\n", - "print(\"2. Baseline still fails as a formal container.\")\n", - "print(\"3. Repaired version returns a valid JSON shell.\")\n", - "print(\"4. The important shift is not deeper reasoning pressure.\")\n", - "print(\"5. The important shift is formal container repair.\")\n", - "print(\"6. This is why Atlas routes the case to F7 before F2.\")\n", - "\n", - "print_section(\"10. Expected success contract\")\n", - "pprint(EXPECTED_OUTPUT)\n", - "\n", - "display(Markdown(\"\"\"\n", - "---\n", - "\n", - "## Back to the main page\n", - "\n", - "Read the full product page here:\n", - "[Problem Map 3.0 Troubleshooting Atlas](https://github.com/onestardao/WFGY/blob/main/ProblemMap/wfgy-ai-problem-map-troubleshooting-atlas.md)\n", - "\n", - "If you like the project, **star the repo** ⭐\n", - "\"\"\"))" - ], - "metadata": { - "id": "VhAKL4-YPsHU" - }, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file