Add files via upload

2026-05-05 23:40:49 +00:00 · 2026-03-12 21:26:36 +08:00 · 2026-03-12 21:26:36 +08:00 · c470fc1861
commit c470fc1861
parent fc9d8a4885
1 changed files with 227 additions and 0 deletions
--- a/ProblemMap/Atlas/Fixes/official/demos/demo-f5-observability-first/demo_f5_observability.ipynb
+++ b/ProblemMap/Atlas/Fixes/official/demos/demo-f5-observability-first/demo_f5_observability.ipynb
@ -0,0 +1,227 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "from pprint import pprint\n",
+        "from IPython.display import display, Markdown\n",
+        "\n",
+        "MODE = \"replay\"\n",
+        "\n",
+        "INPUT_CASE = {\n",
+        "    \"demo_id\": \"demo_f5_observability_first\",\n",
+        "    \"demo_version\": \"v1\",\n",
+        "    \"case_id\": \"f5_observability_case_001\",\n",
+        "    \"title\": \"Workflow failure with too little visibility for safe diagnosis\",\n",
+        "    \"task_type\": \"workflow_debugging\",\n",
+        "    \"family_target\": {\n",
+        "        \"primary_family\": \"F5\",\n",
+        "        \"secondary_family\": \"F4\",\n",
+        "        \"best_current_fit\": \"F5_N01 Failure Path Opacity\",\n",
+        "        \"broken_invariant\": \"failure_path_visibility_broken\"\n",
+        "    },\n",
+        "    \"user_question\": \"Why is the workflow returning irrelevant answers, and what should be fixed first?\",\n",
+        "    \"thin_trace\": [\n",
+        "        \"Step 1: User query received\",\n",
+        "        \"Step 2: Retrieval executed\",\n",
+        "        \"Step 3: Final answer produced\",\n",
+        "        \"Observed symptom: answer is irrelevant to the user question\"\n",
+        "    ],\n",
+        "    \"observability_uplift\": {\n",
+        "        \"retrieval_trace\": [\n",
+        "            \"retriever_query = 'general company summary'\",\n",
+        "            \"top_k = 2\",\n",
+        "            \"returned_chunk_ids = ['chunk_014', 'chunk_019']\",\n",
+        "            \"both chunks are broad product overviews, not release-note evidence\"\n",
+        "        ],\n",
+        "        \"candidate_trace\": [\n",
+        "            \"candidate_answer_1 = 'The workflow likely needs a stronger generation step'\",\n",
+        "            \"candidate_answer_2 = 'The retrieval target appears off-topic relative to the question'\"\n",
+        "        ],\n",
+        "        \"post_check_trace\": [\n",
+        "            \"answer_to_question_alignment = low\",\n",
+        "            \"evidence_to_answer_alignment = low\",\n",
+        "            \"retrieval_to_question_alignment = low\"\n",
+        "        ]\n",
+        "    }\n",
+        "}\n",
+        "\n",
+        "REPLAY_OUTPUTS = {\n",
+        "    \"baseline_diagnosis\": \"The workflow likely needs a stronger final prompt or a better answer generation step. Try improving the model instructions first.\",\n",
+        "    \"baseline_problem\": \"The diagnosis jumps too early to a direct fix even though the workflow is still too opaque for a safe root-cause claim.\",\n",
+        "    \"repaired_diagnosis\": \"The first repair move should be observability uplift. The workflow should not be treated as execution-first or reasoning-first yet, because the visible trace is still too thin. Once retrieval trace, candidate trace, and post-check trace are exposed, the system becomes diagnosable and the off-target retrieval signal becomes inspectable.\",\n",
+        "    \"before_state\": \"opaque\",\n",
+        "    \"after_state\": \"diagnosable\"\n",
+        "}\n",
+        "\n",
+        "EXPECTED_OUTPUT = {\n",
+        "    \"primary_family\": \"F5\",\n",
+        "    \"secondary_family\": \"F4\",\n",
+        "    \"best_current_fit\": \"F5_N01 Failure Path Opacity\",\n",
+        "    \"broken_invariant\": \"failure_path_visibility_broken\",\n",
+        "    \"first_repair_move\": \"observability insertion\"\n",
+        "}\n",
+        "\n",
+        "baseline_prompt = f\"\"\"\n",
+        "You are a workflow debugging assistant.\n",
+        "\n",
+        "A system received a user query, ran retrieval, and produced a final answer.\n",
+        "The final answer was irrelevant to the user question.\n",
+        "\n",
+        "Available trace:\n",
+        "- {INPUT_CASE[\"thin_trace\"][0]}\n",
+        "- {INPUT_CASE[\"thin_trace\"][1]}\n",
+        "- {INPUT_CASE[\"thin_trace\"][2]}\n",
+        "- {INPUT_CASE[\"thin_trace\"][3]}\n",
+        "\n",
+        "Explain what probably went wrong.\n",
+        "Then recommend one direct fix to apply immediately.\n",
+        "\n",
+        "Assume the current trace is enough.\n",
+        "Keep the answer short and confident.\n",
+        "\"\"\".strip()\n",
+        "\n",
+        "repaired_prompt = f\"\"\"\n",
+        "You are diagnosing a workflow failure.\n",
+        "\n",
+        "Question:\n",
+        "{INPUT_CASE[\"user_question\"]}\n",
+        "\n",
+        "Thin trace:\n",
+        "- {INPUT_CASE[\"thin_trace\"][0]}\n",
+        "- {INPUT_CASE[\"thin_trace\"][1]}\n",
+        "- {INPUT_CASE[\"thin_trace\"][2]}\n",
+        "- {INPUT_CASE[\"thin_trace\"][3]}\n",
+        "\n",
+        "Additional observability:\n",
+        "Retrieval trace:\n",
+        "- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][0]}\n",
+        "- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][1]}\n",
+        "- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][2]}\n",
+        "- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][3]}\n",
+        "\n",
+        "Candidate trace:\n",
+        "- {INPUT_CASE[\"observability_uplift\"][\"candidate_trace\"][0]}\n",
+        "- {INPUT_CASE[\"observability_uplift\"][\"candidate_trace\"][1]}\n",
+        "\n",
+        "Post-check trace:\n",
+        "- {INPUT_CASE[\"observability_uplift\"][\"post_check_trace\"][0]}\n",
+        "- {INPUT_CASE[\"observability_uplift\"][\"post_check_trace\"][1]}\n",
+        "- {INPUT_CASE[\"observability_uplift\"][\"post_check_trace\"][2]}\n",
+        "\n",
+        "Answer in this order:\n",
+        "1. What the first failure family is\n",
+        "2. Why F5 is a better first cut than F4\n",
+        "3. What the first repair move should be\n",
+        "4. What the visible evidence now suggests\n",
+        "\"\"\".strip()\n",
+        "\n",
+        "def section(title: str):\n",
+        "    print(\"\\n\" + \"=\" * 88)\n",
+        "    print(title)\n",
+        "    print(\"=\" * 88)\n",
+        "\n",
+        "def bullet_list(items):\n",
+        "    for item in items:\n",
+        "        print(f\"- {item}\")\n",
+        "\n",
+        "display(Markdown(\"\"\"\n",
+        "# Problem Map 3.0 Troubleshooting Atlas\n",
+        "## Demo 2 · F5 Observability First\n",
+        "\n",
+        "### What this experiment is\n",
+        "This notebook is a **replay-only MVP** experiment.\n",
+        "\n",
+        "It is designed to show that some failures should not be repaired by guessing a root cause too early.\n",
+        "Some failures first break at **Observability & Diagnosability Integrity**, so the correct first move is to expose the failure path before attempting deeper repair.\n",
+        "\n",
+        "### Why this notebook is replay-only\n",
+        "For this MVP, **live mode is not required**.\n",
+        "The point of this demo is not model creativity.\n",
+        "The point is to make the **before / after visibility shift** obvious and easy to inspect.\n",
+        "\n",
+        "### What you should expect to see\n",
+        "- The baseline overcommits under thin trace\n",
+        "- The repaired version does not jump too early\n",
+        "- The repaired version treats **observability uplift** as the correct first move\n",
+        "- The workflow shifts from **opaque** to **diagnosable**\n",
+        "\"\"\"))\n",
+        "\n",
+        "section(\"Mode\")\n",
+        "print(f\"MODE = {MODE}\")\n",
+        "\n",
+        "section(\"Case overview\")\n",
+        "print(\"Title:\")\n",
+        "print(INPUT_CASE[\"title\"])\n",
+        "print()\n",
+        "print(\"Question:\")\n",
+        "print(INPUT_CASE[\"user_question\"])\n",
+        "\n",
+        "section(\"Thin trace\")\n",
+        "bullet_list(INPUT_CASE[\"thin_trace\"])\n",
+        "\n",
+        "section(\"Atlas routing target\")\n",
+        "pprint(INPUT_CASE[\"family_target\"])\n",
+        "\n",
+        "section(\"Baseline prompt\")\n",
+        "print(baseline_prompt)\n",
+        "\n",
+        "section(\"Repaired prompt\")\n",
+        "print(repaired_prompt)\n",
+        "\n",
+        "section(\"Replay mode · baseline\")\n",
+        "print(\"Baseline diagnosis:\")\n",
+        "print(REPLAY_OUTPUTS[\"baseline_diagnosis\"])\n",
+        "print()\n",
+        "print(\"Why baseline is weak:\")\n",
+        "print(REPLAY_OUTPUTS[\"baseline_problem\"])\n",
+        "\n",
+        "section(\"Replay mode · repaired\")\n",
+        "print(\"Repaired diagnosis:\")\n",
+        "print(REPLAY_OUTPUTS[\"repaired_diagnosis\"])\n",
+        "\n",
+        "section(\"Replay mode · state shift\")\n",
+        "print(\"Before:\", REPLAY_OUTPUTS[\"before_state\"])\n",
+        "print(\"After :\", REPLAY_OUTPUTS[\"after_state\"])\n",
+        "\n",
+        "section(\"Expected effect checklist\")\n",
+        "print(\"1. Baseline treats a thin trace as if it were sufficient.\")\n",
+        "print(\"2. Baseline jumps too early to a direct repair move.\")\n",
+        "print(\"3. Repaired version identifies F5 before F4.\")\n",
+        "print(\"4. Repaired version treats observability uplift as the first repair move.\")\n",
+        "print(\"5. The main gain is diagnosability, not a magically perfect final answer.\")\n",
+        "\n",
+        "section(\"Expected success contract\")\n",
+        "pprint(EXPECTED_OUTPUT)\n",
+        "\n",
+        "display(Markdown(\"\"\"\n",
+        "## Back to the main page\n",
+        "\n",
+        "Read the full product page here:\n",
+        "[Problem Map 3.0 Troubleshooting Atlas](https://github.com/onestardao/WFGY/blob/main/ProblemMap/wfgy-ai-problem-map-troubleshooting-atlas.md)\n",
+        "\n",
+        "If you like the project, star the repo ⭐\n",
+        "\"\"\"))"
+      ],
+      "metadata": {
+        "id": "jS5RVmnSOVU1"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}