diff --git a/ProblemMap/Atlas/Fixes/official/demos/demo-f5-observability-first/demo_f5_observability.ipynb b/ProblemMap/Atlas/Fixes/official/demos/demo-f5-observability-first/demo_f5_observability.ipynb deleted file mode 100644 index c0155a37..00000000 --- a/ProblemMap/Atlas/Fixes/official/demos/demo-f5-observability-first/demo_f5_observability.ipynb +++ /dev/null @@ -1,227 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "code", - "source": [ - "from pprint import pprint\n", - "from IPython.display import display, Markdown\n", - "\n", - "MODE = \"replay\"\n", - "\n", - "INPUT_CASE = {\n", - " \"demo_id\": \"demo_f5_observability_first\",\n", - " \"demo_version\": \"v1\",\n", - " \"case_id\": \"f5_observability_case_001\",\n", - " \"title\": \"Workflow failure with too little visibility for safe diagnosis\",\n", - " \"task_type\": \"workflow_debugging\",\n", - " \"family_target\": {\n", - " \"primary_family\": \"F5\",\n", - " \"secondary_family\": \"F4\",\n", - " \"best_current_fit\": \"F5_N01 Failure Path Opacity\",\n", - " \"broken_invariant\": \"failure_path_visibility_broken\"\n", - " },\n", - " \"user_question\": \"Why is the workflow returning irrelevant answers, and what should be fixed first?\",\n", - " \"thin_trace\": [\n", - " \"Step 1: User query received\",\n", - " \"Step 2: Retrieval executed\",\n", - " \"Step 3: Final answer produced\",\n", - " \"Observed symptom: answer is irrelevant to the user question\"\n", - " ],\n", - " \"observability_uplift\": {\n", - " \"retrieval_trace\": [\n", - " \"retriever_query = 'general company summary'\",\n", - " \"top_k = 2\",\n", - " \"returned_chunk_ids = ['chunk_014', 'chunk_019']\",\n", - " \"both chunks are broad product overviews, not release-note evidence\"\n", - " ],\n", - " \"candidate_trace\": [\n", - " \"candidate_answer_1 = 'The workflow likely needs a stronger generation step'\",\n", - " \"candidate_answer_2 = 'The retrieval target appears off-topic relative to the question'\"\n", - " ],\n", - " \"post_check_trace\": [\n", - " \"answer_to_question_alignment = low\",\n", - " \"evidence_to_answer_alignment = low\",\n", - " \"retrieval_to_question_alignment = low\"\n", - " ]\n", - " }\n", - "}\n", - "\n", - "REPLAY_OUTPUTS = {\n", - " \"baseline_diagnosis\": \"The workflow likely needs a stronger final prompt or a better answer generation step. Try improving the model instructions first.\",\n", - " \"baseline_problem\": \"The diagnosis jumps too early to a direct fix even though the workflow is still too opaque for a safe root-cause claim.\",\n", - " \"repaired_diagnosis\": \"The first repair move should be observability uplift. The workflow should not be treated as execution-first or reasoning-first yet, because the visible trace is still too thin. Once retrieval trace, candidate trace, and post-check trace are exposed, the system becomes diagnosable and the off-target retrieval signal becomes inspectable.\",\n", - " \"before_state\": \"opaque\",\n", - " \"after_state\": \"diagnosable\"\n", - "}\n", - "\n", - "EXPECTED_OUTPUT = {\n", - " \"primary_family\": \"F5\",\n", - " \"secondary_family\": \"F4\",\n", - " \"best_current_fit\": \"F5_N01 Failure Path Opacity\",\n", - " \"broken_invariant\": \"failure_path_visibility_broken\",\n", - " \"first_repair_move\": \"observability insertion\"\n", - "}\n", - "\n", - "baseline_prompt = f\"\"\"\n", - "You are a workflow debugging assistant.\n", - "\n", - "A system received a user query, ran retrieval, and produced a final answer.\n", - "The final answer was irrelevant to the user question.\n", - "\n", - "Available trace:\n", - "- {INPUT_CASE[\"thin_trace\"][0]}\n", - "- {INPUT_CASE[\"thin_trace\"][1]}\n", - "- {INPUT_CASE[\"thin_trace\"][2]}\n", - "- {INPUT_CASE[\"thin_trace\"][3]}\n", - "\n", - "Explain what probably went wrong.\n", - "Then recommend one direct fix to apply immediately.\n", - "\n", - "Assume the current trace is enough.\n", - "Keep the answer short and confident.\n", - "\"\"\".strip()\n", - "\n", - "repaired_prompt = f\"\"\"\n", - "You are diagnosing a workflow failure.\n", - "\n", - "Question:\n", - "{INPUT_CASE[\"user_question\"]}\n", - "\n", - "Thin trace:\n", - "- {INPUT_CASE[\"thin_trace\"][0]}\n", - "- {INPUT_CASE[\"thin_trace\"][1]}\n", - "- {INPUT_CASE[\"thin_trace\"][2]}\n", - "- {INPUT_CASE[\"thin_trace\"][3]}\n", - "\n", - "Additional observability:\n", - "Retrieval trace:\n", - "- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][0]}\n", - "- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][1]}\n", - "- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][2]}\n", - "- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][3]}\n", - "\n", - "Candidate trace:\n", - "- {INPUT_CASE[\"observability_uplift\"][\"candidate_trace\"][0]}\n", - "- {INPUT_CASE[\"observability_uplift\"][\"candidate_trace\"][1]}\n", - "\n", - "Post-check trace:\n", - "- {INPUT_CASE[\"observability_uplift\"][\"post_check_trace\"][0]}\n", - "- {INPUT_CASE[\"observability_uplift\"][\"post_check_trace\"][1]}\n", - "- {INPUT_CASE[\"observability_uplift\"][\"post_check_trace\"][2]}\n", - "\n", - "Answer in this order:\n", - "1. What the first failure family is\n", - "2. Why F5 is a better first cut than F4\n", - "3. What the first repair move should be\n", - "4. What the visible evidence now suggests\n", - "\"\"\".strip()\n", - "\n", - "def section(title: str):\n", - " print(\"\\n\" + \"=\" * 88)\n", - " print(title)\n", - " print(\"=\" * 88)\n", - "\n", - "def bullet_list(items):\n", - " for item in items:\n", - " print(f\"- {item}\")\n", - "\n", - "display(Markdown(\"\"\"\n", - "# Problem Map 3.0 Troubleshooting Atlas\n", - "## Demo 2 · F5 Observability First\n", - "\n", - "### What this experiment is\n", - "This notebook is a **replay-only MVP** experiment.\n", - "\n", - "It is designed to show that some failures should not be repaired by guessing a root cause too early.\n", - "Some failures first break at **Observability & Diagnosability Integrity**, so the correct first move is to expose the failure path before attempting deeper repair.\n", - "\n", - "### Why this notebook is replay-only\n", - "For this MVP, **live mode is not required**.\n", - "The point of this demo is not model creativity.\n", - "The point is to make the **before / after visibility shift** obvious and easy to inspect.\n", - "\n", - "### What you should expect to see\n", - "- The baseline overcommits under thin trace\n", - "- The repaired version does not jump too early\n", - "- The repaired version treats **observability uplift** as the correct first move\n", - "- The workflow shifts from **opaque** to **diagnosable**\n", - "\"\"\"))\n", - "\n", - "section(\"Mode\")\n", - "print(f\"MODE = {MODE}\")\n", - "\n", - "section(\"Case overview\")\n", - "print(\"Title:\")\n", - "print(INPUT_CASE[\"title\"])\n", - "print()\n", - "print(\"Question:\")\n", - "print(INPUT_CASE[\"user_question\"])\n", - "\n", - "section(\"Thin trace\")\n", - "bullet_list(INPUT_CASE[\"thin_trace\"])\n", - "\n", - "section(\"Atlas routing target\")\n", - "pprint(INPUT_CASE[\"family_target\"])\n", - "\n", - "section(\"Baseline prompt\")\n", - "print(baseline_prompt)\n", - "\n", - "section(\"Repaired prompt\")\n", - "print(repaired_prompt)\n", - "\n", - "section(\"Replay mode · baseline\")\n", - "print(\"Baseline diagnosis:\")\n", - "print(REPLAY_OUTPUTS[\"baseline_diagnosis\"])\n", - "print()\n", - "print(\"Why baseline is weak:\")\n", - "print(REPLAY_OUTPUTS[\"baseline_problem\"])\n", - "\n", - "section(\"Replay mode · repaired\")\n", - "print(\"Repaired diagnosis:\")\n", - "print(REPLAY_OUTPUTS[\"repaired_diagnosis\"])\n", - "\n", - "section(\"Replay mode · state shift\")\n", - "print(\"Before:\", REPLAY_OUTPUTS[\"before_state\"])\n", - "print(\"After :\", REPLAY_OUTPUTS[\"after_state\"])\n", - "\n", - "section(\"Expected effect checklist\")\n", - "print(\"1. Baseline treats a thin trace as if it were sufficient.\")\n", - "print(\"2. Baseline jumps too early to a direct repair move.\")\n", - "print(\"3. Repaired version identifies F5 before F4.\")\n", - "print(\"4. Repaired version treats observability uplift as the first repair move.\")\n", - "print(\"5. The main gain is diagnosability, not a magically perfect final answer.\")\n", - "\n", - "section(\"Expected success contract\")\n", - "pprint(EXPECTED_OUTPUT)\n", - "\n", - "display(Markdown(\"\"\"\n", - "## Back to the main page\n", - "\n", - "Read the full product page here:\n", - "[Problem Map 3.0 Troubleshooting Atlas](https://github.com/onestardao/WFGY/blob/main/ProblemMap/wfgy-ai-problem-map-troubleshooting-atlas.md)\n", - "\n", - "If you like the project, star the repo ⭐\n", - "\"\"\"))" - ], - "metadata": { - "id": "jS5RVmnSOVU1" - }, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file