mirror of
https://github.com/onestardao/WFGY.git
synced 2026-05-05 23:40:49 +00:00
Add files via upload
This commit is contained in:
parent
fc9d8a4885
commit
c470fc1861
1 changed files with 227 additions and 0 deletions
|
|
@ -0,0 +1,227 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from pprint import pprint\n",
|
||||
"from IPython.display import display, Markdown\n",
|
||||
"\n",
|
||||
"MODE = \"replay\"\n",
|
||||
"\n",
|
||||
"INPUT_CASE = {\n",
|
||||
" \"demo_id\": \"demo_f5_observability_first\",\n",
|
||||
" \"demo_version\": \"v1\",\n",
|
||||
" \"case_id\": \"f5_observability_case_001\",\n",
|
||||
" \"title\": \"Workflow failure with too little visibility for safe diagnosis\",\n",
|
||||
" \"task_type\": \"workflow_debugging\",\n",
|
||||
" \"family_target\": {\n",
|
||||
" \"primary_family\": \"F5\",\n",
|
||||
" \"secondary_family\": \"F4\",\n",
|
||||
" \"best_current_fit\": \"F5_N01 Failure Path Opacity\",\n",
|
||||
" \"broken_invariant\": \"failure_path_visibility_broken\"\n",
|
||||
" },\n",
|
||||
" \"user_question\": \"Why is the workflow returning irrelevant answers, and what should be fixed first?\",\n",
|
||||
" \"thin_trace\": [\n",
|
||||
" \"Step 1: User query received\",\n",
|
||||
" \"Step 2: Retrieval executed\",\n",
|
||||
" \"Step 3: Final answer produced\",\n",
|
||||
" \"Observed symptom: answer is irrelevant to the user question\"\n",
|
||||
" ],\n",
|
||||
" \"observability_uplift\": {\n",
|
||||
" \"retrieval_trace\": [\n",
|
||||
" \"retriever_query = 'general company summary'\",\n",
|
||||
" \"top_k = 2\",\n",
|
||||
" \"returned_chunk_ids = ['chunk_014', 'chunk_019']\",\n",
|
||||
" \"both chunks are broad product overviews, not release-note evidence\"\n",
|
||||
" ],\n",
|
||||
" \"candidate_trace\": [\n",
|
||||
" \"candidate_answer_1 = 'The workflow likely needs a stronger generation step'\",\n",
|
||||
" \"candidate_answer_2 = 'The retrieval target appears off-topic relative to the question'\"\n",
|
||||
" ],\n",
|
||||
" \"post_check_trace\": [\n",
|
||||
" \"answer_to_question_alignment = low\",\n",
|
||||
" \"evidence_to_answer_alignment = low\",\n",
|
||||
" \"retrieval_to_question_alignment = low\"\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"REPLAY_OUTPUTS = {\n",
|
||||
" \"baseline_diagnosis\": \"The workflow likely needs a stronger final prompt or a better answer generation step. Try improving the model instructions first.\",\n",
|
||||
" \"baseline_problem\": \"The diagnosis jumps too early to a direct fix even though the workflow is still too opaque for a safe root-cause claim.\",\n",
|
||||
" \"repaired_diagnosis\": \"The first repair move should be observability uplift. The workflow should not be treated as execution-first or reasoning-first yet, because the visible trace is still too thin. Once retrieval trace, candidate trace, and post-check trace are exposed, the system becomes diagnosable and the off-target retrieval signal becomes inspectable.\",\n",
|
||||
" \"before_state\": \"opaque\",\n",
|
||||
" \"after_state\": \"diagnosable\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"EXPECTED_OUTPUT = {\n",
|
||||
" \"primary_family\": \"F5\",\n",
|
||||
" \"secondary_family\": \"F4\",\n",
|
||||
" \"best_current_fit\": \"F5_N01 Failure Path Opacity\",\n",
|
||||
" \"broken_invariant\": \"failure_path_visibility_broken\",\n",
|
||||
" \"first_repair_move\": \"observability insertion\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"baseline_prompt = f\"\"\"\n",
|
||||
"You are a workflow debugging assistant.\n",
|
||||
"\n",
|
||||
"A system received a user query, ran retrieval, and produced a final answer.\n",
|
||||
"The final answer was irrelevant to the user question.\n",
|
||||
"\n",
|
||||
"Available trace:\n",
|
||||
"- {INPUT_CASE[\"thin_trace\"][0]}\n",
|
||||
"- {INPUT_CASE[\"thin_trace\"][1]}\n",
|
||||
"- {INPUT_CASE[\"thin_trace\"][2]}\n",
|
||||
"- {INPUT_CASE[\"thin_trace\"][3]}\n",
|
||||
"\n",
|
||||
"Explain what probably went wrong.\n",
|
||||
"Then recommend one direct fix to apply immediately.\n",
|
||||
"\n",
|
||||
"Assume the current trace is enough.\n",
|
||||
"Keep the answer short and confident.\n",
|
||||
"\"\"\".strip()\n",
|
||||
"\n",
|
||||
"repaired_prompt = f\"\"\"\n",
|
||||
"You are diagnosing a workflow failure.\n",
|
||||
"\n",
|
||||
"Question:\n",
|
||||
"{INPUT_CASE[\"user_question\"]}\n",
|
||||
"\n",
|
||||
"Thin trace:\n",
|
||||
"- {INPUT_CASE[\"thin_trace\"][0]}\n",
|
||||
"- {INPUT_CASE[\"thin_trace\"][1]}\n",
|
||||
"- {INPUT_CASE[\"thin_trace\"][2]}\n",
|
||||
"- {INPUT_CASE[\"thin_trace\"][3]}\n",
|
||||
"\n",
|
||||
"Additional observability:\n",
|
||||
"Retrieval trace:\n",
|
||||
"- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][0]}\n",
|
||||
"- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][1]}\n",
|
||||
"- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][2]}\n",
|
||||
"- {INPUT_CASE[\"observability_uplift\"][\"retrieval_trace\"][3]}\n",
|
||||
"\n",
|
||||
"Candidate trace:\n",
|
||||
"- {INPUT_CASE[\"observability_uplift\"][\"candidate_trace\"][0]}\n",
|
||||
"- {INPUT_CASE[\"observability_uplift\"][\"candidate_trace\"][1]}\n",
|
||||
"\n",
|
||||
"Post-check trace:\n",
|
||||
"- {INPUT_CASE[\"observability_uplift\"][\"post_check_trace\"][0]}\n",
|
||||
"- {INPUT_CASE[\"observability_uplift\"][\"post_check_trace\"][1]}\n",
|
||||
"- {INPUT_CASE[\"observability_uplift\"][\"post_check_trace\"][2]}\n",
|
||||
"\n",
|
||||
"Answer in this order:\n",
|
||||
"1. What the first failure family is\n",
|
||||
"2. Why F5 is a better first cut than F4\n",
|
||||
"3. What the first repair move should be\n",
|
||||
"4. What the visible evidence now suggests\n",
|
||||
"\"\"\".strip()\n",
|
||||
"\n",
|
||||
"def section(title: str):\n",
|
||||
" print(\"\\n\" + \"=\" * 88)\n",
|
||||
" print(title)\n",
|
||||
" print(\"=\" * 88)\n",
|
||||
"\n",
|
||||
"def bullet_list(items):\n",
|
||||
" for item in items:\n",
|
||||
" print(f\"- {item}\")\n",
|
||||
"\n",
|
||||
"display(Markdown(\"\"\"\n",
|
||||
"# Problem Map 3.0 Troubleshooting Atlas\n",
|
||||
"## Demo 2 · F5 Observability First\n",
|
||||
"\n",
|
||||
"### What this experiment is\n",
|
||||
"This notebook is a **replay-only MVP** experiment.\n",
|
||||
"\n",
|
||||
"It is designed to show that some failures should not be repaired by guessing a root cause too early.\n",
|
||||
"Some failures first break at **Observability & Diagnosability Integrity**, so the correct first move is to expose the failure path before attempting deeper repair.\n",
|
||||
"\n",
|
||||
"### Why this notebook is replay-only\n",
|
||||
"For this MVP, **live mode is not required**.\n",
|
||||
"The point of this demo is not model creativity.\n",
|
||||
"The point is to make the **before / after visibility shift** obvious and easy to inspect.\n",
|
||||
"\n",
|
||||
"### What you should expect to see\n",
|
||||
"- The baseline overcommits under thin trace\n",
|
||||
"- The repaired version does not jump too early\n",
|
||||
"- The repaired version treats **observability uplift** as the correct first move\n",
|
||||
"- The workflow shifts from **opaque** to **diagnosable**\n",
|
||||
"\"\"\"))\n",
|
||||
"\n",
|
||||
"section(\"Mode\")\n",
|
||||
"print(f\"MODE = {MODE}\")\n",
|
||||
"\n",
|
||||
"section(\"Case overview\")\n",
|
||||
"print(\"Title:\")\n",
|
||||
"print(INPUT_CASE[\"title\"])\n",
|
||||
"print()\n",
|
||||
"print(\"Question:\")\n",
|
||||
"print(INPUT_CASE[\"user_question\"])\n",
|
||||
"\n",
|
||||
"section(\"Thin trace\")\n",
|
||||
"bullet_list(INPUT_CASE[\"thin_trace\"])\n",
|
||||
"\n",
|
||||
"section(\"Atlas routing target\")\n",
|
||||
"pprint(INPUT_CASE[\"family_target\"])\n",
|
||||
"\n",
|
||||
"section(\"Baseline prompt\")\n",
|
||||
"print(baseline_prompt)\n",
|
||||
"\n",
|
||||
"section(\"Repaired prompt\")\n",
|
||||
"print(repaired_prompt)\n",
|
||||
"\n",
|
||||
"section(\"Replay mode · baseline\")\n",
|
||||
"print(\"Baseline diagnosis:\")\n",
|
||||
"print(REPLAY_OUTPUTS[\"baseline_diagnosis\"])\n",
|
||||
"print()\n",
|
||||
"print(\"Why baseline is weak:\")\n",
|
||||
"print(REPLAY_OUTPUTS[\"baseline_problem\"])\n",
|
||||
"\n",
|
||||
"section(\"Replay mode · repaired\")\n",
|
||||
"print(\"Repaired diagnosis:\")\n",
|
||||
"print(REPLAY_OUTPUTS[\"repaired_diagnosis\"])\n",
|
||||
"\n",
|
||||
"section(\"Replay mode · state shift\")\n",
|
||||
"print(\"Before:\", REPLAY_OUTPUTS[\"before_state\"])\n",
|
||||
"print(\"After :\", REPLAY_OUTPUTS[\"after_state\"])\n",
|
||||
"\n",
|
||||
"section(\"Expected effect checklist\")\n",
|
||||
"print(\"1. Baseline treats a thin trace as if it were sufficient.\")\n",
|
||||
"print(\"2. Baseline jumps too early to a direct repair move.\")\n",
|
||||
"print(\"3. Repaired version identifies F5 before F4.\")\n",
|
||||
"print(\"4. Repaired version treats observability uplift as the first repair move.\")\n",
|
||||
"print(\"5. The main gain is diagnosability, not a magically perfect final answer.\")\n",
|
||||
"\n",
|
||||
"section(\"Expected success contract\")\n",
|
||||
"pprint(EXPECTED_OUTPUT)\n",
|
||||
"\n",
|
||||
"display(Markdown(\"\"\"\n",
|
||||
"## Back to the main page\n",
|
||||
"\n",
|
||||
"Read the full product page here:\n",
|
||||
"[Problem Map 3.0 Troubleshooting Atlas](https://github.com/onestardao/WFGY/blob/main/ProblemMap/wfgy-ai-problem-map-troubleshooting-atlas.md)\n",
|
||||
"\n",
|
||||
"If you like the project, star the repo ⭐\n",
|
||||
"\"\"\"))"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "jS5RVmnSOVU1"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue