diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index d481c5ec529..972c212a4f4 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -660,7 +660,7 @@ jobs: published_upgrade_survivor_baselines: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'last-stable-4 2026.4.23 2026.5.2 2026.4.15' || '' }} published_upgrade_survivor_scenarios: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'reported-issues' || '' }} telegram_mode: mock-openai - telegram_scenarios: telegram-help-command,telegram-commands-command,telegram-tools-compact-command,telegram-whoami-command,telegram-status-command,telegram-other-bot-command-gating,telegram-context-command,telegram-mentioned-message-reply,telegram-reply-chain-exact-marker,telegram-stream-final-single-message,telegram-long-final-reuses-preview,telegram-mention-gating + telegram_scenarios: telegram-help-command,telegram-commands-command,telegram-tools-compact-command,telegram-whoami-command,telegram-status-command,telegram-other-bot-command-gating,telegram-context-command,telegram-mentioned-message-reply,telegram-long-final-reuses-preview,telegram-mention-gating secrets: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} diff --git a/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.test.ts b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.test.ts index d4ba59882e4..ecd54932016 100644 --- a/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.test.ts +++ b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.test.ts @@ -483,8 +483,6 @@ describe("telegram live qa runtime", () => { "telegram-other-bot-command-gating", "telegram-context-command", "telegram-mentioned-message-reply", - "telegram-reply-chain-exact-marker", - "telegram-stream-final-single-message", "telegram-long-final-reuses-preview", "telegram-mention-gating", ], @@ -500,8 +498,11 @@ describe("telegram live qa runtime", () => { false, ); const streamSingle = requireScenario(catalog, "telegram-stream-final-single-message"); - expect(streamSingle.defaultEnabled).toBe(true); + expect(streamSingle.defaultEnabled).toBe(false); expect(streamSingle.regressionRefs).toEqual(["openclaw/openclaw#39905"]); + expect(requireScenario(catalog, "telegram-reply-chain-exact-marker").defaultEnabled).toBe( + false, + ); }); it("tracks Telegram live coverage against the shared transport contract", () => { diff --git a/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts index 56f13e0552e..883c02ef1e8 100644 --- a/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts +++ b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts @@ -396,8 +396,10 @@ const TELEGRAM_QA_SCENARIOS: TelegramQaScenarioDefinition[] = [ { id: "telegram-reply-chain-exact-marker", title: "Telegram reply-chain exact marker", + defaultEnabled: false, defaultProviderModes: ["mock-openai"], - rationale: "Mock-backed exact-marker check proves Telegram final text survives reply handling.", + rationale: + "Opt-in mock-backed exact-marker check for Telegram final text through reply handling.", timeoutMs: 75_000, buildRun: (sutUsername) => telegramQaStepRun({ @@ -412,8 +414,9 @@ const TELEGRAM_QA_SCENARIOS: TelegramQaScenarioDefinition[] = [ { id: "telegram-stream-final-single-message", title: "Telegram streamed final stays one message", + defaultEnabled: false, defaultProviderModes: ["mock-openai"], - rationale: "Regression guard for duplicate final replies from Telegram streaming paths.", + rationale: "Opt-in regression guard for duplicate final replies from Telegram streaming paths.", regressionRefs: ["openclaw/openclaw#39905"], timeoutMs: 75_000, buildRun: (sutUsername) => diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts index cd41c8d42c6..5818ea40a44 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts @@ -303,11 +303,19 @@ export async function runThreadIsolationScenario(context: MatrixQaScenarioContex export async function runSubagentThreadSpawnScenario(context: MatrixQaScenarioContext) { const { client, startSince } = await primeMatrixQaDriverScenarioClient(context); const childToken = buildMatrixQaToken("MATRIX_QA_SUBAGENT_CHILD"); + const spawnArgs = { + task: `Finish with exactly ${childToken}.`, + label: "matrix-thread-subagent", + thread: true, + mode: "session", + runTimeoutSeconds: 120, + }; const triggerBody = [ - `${context.sutUserId} For this QA check, call the sessions_spawn tool now.`, - `Use exactly these arguments: task="Finish with exactly ${childToken}.", label="matrix-thread-subagent", thread=true, mode="session", runTimeoutSeconds=120.`, + `${context.sutUserId} Run this exact OpenClaw Matrix thread-spawn QA check. Use tool calls, not prose.`, + `Step 1: call sessions_spawn with exactly this JSON input: ${JSON.stringify(spawnArgs)}.`, + 'Step 2: after spawn returns status="accepted", wait for the child session reply in the spawned Matrix thread.', "Do not omit thread=true; the child must bind to this Matrix thread.", - "Do not write the child token in the parent response.", + `Do not write ${childToken} in the parent response.`, ].join(" "); const driverEventId = await client.sendTextMessage({ body: triggerBody, diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts index 177da1ae3a5..24f057bad4c 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts @@ -2694,7 +2694,7 @@ describe("matrix live qa scenarios", () => { })) .mockImplementationOnce(async () => { const childToken = - /task="Finish with exactly ([^".]+)\./.exec( + /"task":"Finish with exactly ([^".]+)\./.exec( mockMessageBody(sendTextMessage, "sendTextMessage"), )?.[1] ?? "MATRIX_QA_SUBAGENT_CHILD_FIXED"; return { @@ -2766,7 +2766,11 @@ describe("matrix live qa scenarios", () => { expect(artifacts.threadRootEventId).toBe("$subagent-thread-root"); expectSentTextMessage(sendTextMessage, { - bodyIncludes: ["call the sessions_spawn tool now", "thread=true", "runTimeoutSeconds=120"], + bodyIncludes: [ + "call sessions_spawn with exactly this JSON input", + '"thread":true', + '"runTimeoutSeconds":120', + ], mentionUserIds: ["@sut:matrix-qa.test"], roomId: "!main:matrix-qa.test", }); diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index d14a0132935..e3b78479e94 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -614,7 +614,7 @@ describe("package artifact reuse", () => { ); expect(workflow).toContain("telegram_mode: mock-openai"); expect(workflow).toContain( - "telegram_scenarios: telegram-help-command,telegram-commands-command,telegram-tools-compact-command,telegram-whoami-command,telegram-status-command,telegram-other-bot-command-gating,telegram-context-command,telegram-mentioned-message-reply,telegram-reply-chain-exact-marker,telegram-stream-final-single-message,telegram-long-final-reuses-preview,telegram-mention-gating", + "telegram_scenarios: telegram-help-command,telegram-commands-command,telegram-tools-compact-command,telegram-whoami-command,telegram-status-command,telegram-other-bot-command-gating,telegram-context-command,telegram-mentioned-message-reply,telegram-long-final-reuses-preview,telegram-mention-gating", ); expect(workflow).toContain("ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}"); expect(workflow).toContain("ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}");