fix(attribution): depth-1 shallow detection, snapshot dedup post-rewind/post-failure

sfGz (Copilot): rev-list --count HEAD === 1 cannot distinguish a
true root commit from a depth-1 shallow clone — both report 1
because rev-list only walks locally available objects. Switch to
git log -1 --pretty=%P HEAD which reads the parent SHA directly
from commit metadata: empty means a real root, non-empty means a
parent is recorded (whether or not its object is local). The
shallow-clone bail is now reliable.

sfIm (Copilot): the dedup key persisted across rewindRecording, so
the previous snapshot living on the now-abandoned branch would
match the next post-rewind snapshot and silently skip the write,
leaving /resume on the rewound session with no attribution state.
Reset lastAttributionSnapshotJson when rewindRecording fires.

sfJE (Copilot): dedup key was committed before the async write
settled. A transient write failure would update the key, then
permanently suppress all future identical snapshots even though
nothing was ever persisted. Switch to optimistic-set then rollback
on appendRecord rejection — synchronous identical calls dedup
cleanly, but a failed write clears the key so the next identical
snapshot retries. appendRecord now returns the per-record write
promise (writeChain still has its swallow-catch for chain liveness)
so callers needing per-write success can react to it. Tests added
in chatRecordingService.test.ts for both rewind-reset and
rollback-on-failure paths.
This commit is contained in:
wenshao 2026-05-06 01:04:57 +08:00
parent 3c0e3293be
commit e4bb0181ad
3 changed files with 93 additions and 23 deletions

View file

@ -466,6 +466,45 @@ describe('ChatRecordingService', () => {
await chatRecordingService.flush();
expect(jsonl.writeLine).toHaveBeenCalledTimes(1);
});
// After rewindRecording, the previous attribution snapshot lives on
// the abandoned branch, so the dedup key has to clear — otherwise
// the post-rewind identical snapshot would be silently skipped and
// /resume on the rewound session would lose all attribution state.
it('should re-write an identical snapshot after rewindRecording', async () => {
chatRecordingService.recordUserMessage([{ text: 'turn 1' }]);
chatRecordingService.recordAttributionSnapshot(baseSnapshot);
await chatRecordingService.flush();
const beforeRewind = vi.mocked(jsonl.writeLine).mock.calls.length;
chatRecordingService.rewindRecording(0, { truncatedCount: 0 });
// Same snapshot bytes — without the rewind reset this would dedup.
chatRecordingService.recordAttributionSnapshot(baseSnapshot);
await chatRecordingService.flush();
// 1 rewind record + 1 fresh snapshot = 2 more writes after rewind.
expect(vi.mocked(jsonl.writeLine).mock.calls.length).toBe(
beforeRewind + 2,
);
});
// A transient write failure must NOT permanently suppress future
// identical snapshots: if the dedup key were committed before the
// write, the next identical snapshot would dedup and the session
// would have no attribution snapshot at all.
it('should retry an identical snapshot after a write failure', async () => {
vi.mocked(jsonl.writeLine).mockRejectedValueOnce(new Error('disk full'));
chatRecordingService.recordAttributionSnapshot(baseSnapshot);
// Wait for the queued (failing) write to settle so the rollback runs.
await chatRecordingService.flush();
const afterFailure = vi.mocked(jsonl.writeLine).mock.calls.length;
chatRecordingService.recordAttributionSnapshot(baseSnapshot);
await chatRecordingService.flush();
// Retry should fire, so we get a new write call.
expect(vi.mocked(jsonl.writeLine).mock.calls.length).toBe(
afterFailure + 1,
);
});
});
// Note: Session management tests (listSessions, loadSession, deleteSession, etc.)

View file

@ -507,7 +507,14 @@ export class ChatRecordingService {
* local-disk writes failures are rare enough to accept the fire-and-forget
* simplification.
*/
private appendRecord(record: ChatRecord): void {
/**
* Returns a promise that resolves after the queued write succeeds and
* rejects (without logging) if it fails. The internal `writeChain` is
* advanced with a swallowing catch so the chain stays alive across
* failures; callers that need to react to per-record success can await
* the returned promise.
*/
private appendRecord(record: ChatRecord): Promise<void> {
let conversationFile: string;
try {
conversationFile = this.ensureConversationFile();
@ -516,12 +523,13 @@ export class ChatRecordingService {
throw error;
}
this.lastRecordUuid = record.uuid;
this.writeChain = this.writeChain
const writePromise = this.writeChain
.catch(() => {})
.then(() => jsonl.writeLine(conversationFile, record))
.catch((err) => {
debugLogger.error('Error appending record (async):', err);
});
.then(() => jsonl.writeLine(conversationFile, record));
this.writeChain = writePromise.catch((err) => {
debugLogger.error('Error appending record (async):', err);
});
return writePromise;
}
/**
@ -836,6 +844,12 @@ export class ChatRecordingService {
this.lastRecordUuid = this.turnParentUuids[targetTurnIndex] ?? null;
// Trim future boundaries — they no longer exist in the active branch.
this.turnParentUuids = this.turnParentUuids.slice(0, targetTurnIndex);
// The previous attribution snapshot now sits on the abandoned
// branch — clear the dedup key so the next snapshot lands on the
// active branch and `/resume` can find it. Without this, a
// post-rewind identical snapshot would be skipped and the rewound
// session would lose all attribution state on restore.
this.lastAttributionSnapshotJson = undefined;
const record: ChatRecord = {
...this.createBaseRecord('system'),
@ -985,6 +999,12 @@ export class ChatRecordingService {
* Without this, sessions that touch many files would write a full
* duplicate of the entire snapshot to the JSONL on every turn, even
* when nothing changed inflating session size and slowing /resume.
*
* Set the dedup key optimistically and roll it back if the write
* fails. Synchronous identical calls (common during a tool-driven
* turn) all dedup correctly, but a transient write failure clears
* the key so the next identical snapshot retries the write rather
* than being permanently suppressed.
*/
recordAttributionSnapshot(snapshot: AttributionSnapshot): void {
try {
@ -999,8 +1019,14 @@ export class ChatRecordingService {
systemPayload: { snapshot },
};
this.appendRecord(record);
this.lastAttributionSnapshotJson = json;
this.appendRecord(record).catch(() => {
// Write failed — only roll back if the key still belongs to
// our snapshot (a later distinct write may have overwritten it).
if (this.lastAttributionSnapshotJson === json) {
this.lastAttributionSnapshotJson = undefined;
}
});
} catch (error) {
debugLogger.error('Error saving attribution snapshot:', error);
}

View file

@ -2035,30 +2035,35 @@ export class ShellToolInvocation extends BaseToolInvocation<
};
try {
// The four `rev-parse`-shaped calls are independent — fan out so
// we don't pay the spawn latency serially. Same for the three
// diff calls below once we know which form to use.
// The three calls are independent — fan out so we don't pay the
// spawn latency serially. Same for the three diff calls below
// once we know which form to use.
// - `rev-parse --verify HEAD~1`: probe whether the parent OBJECT
// is locally available (fails in shallow clones where the
// parent was pruned).
// - `rev-list --count HEAD`: total commits reachable. `=== 1`
// means HEAD truly is the root commit (no parent SHA recorded
// on HEAD), where `diff-tree --root` is correct. `> 1` means
// HEAD has a parent recorded — if --verify also failed, this
// is a shallow clone and we must NOT fall back to --root
// (which would diff against the empty tree and over-attribute).
const [hasParentOutput, commitCountOutput, repoRootOutput] =
// - `log -1 --pretty=%P HEAD`: read the parent SHA from HEAD's
// commit metadata. Works regardless of shallow status because
// the parent SHA is recorded on the commit itself, not derived
// by walking. Empty output = HEAD is a true root commit.
// Non-empty output = HEAD has a parent (whether or not its
// object is locally available).
// - `rev-parse --show-toplevel`: capture the repo root.
//
// `rev-list --count HEAD` looks tempting as a "is this a root
// commit?" probe but it returns 1 in a depth-1 shallow clone
// (only the local object is reachable), aliasing the shallow
// and root cases. The parent-SHA approach disambiguates them
// correctly.
const [hasParentOutput, parentShaOutput, repoRootOutput] =
await Promise.all([
runGit('rev-parse --verify HEAD~1'),
runGit('rev-list --count HEAD'),
runGit('log -1 --pretty=%P HEAD'),
runGit('rev-parse --show-toplevel'),
]);
const hasParent = hasParentOutput.length > 0;
const totalCommits = parseInt(commitCountOutput.trim(), 10);
const isTrueRootCommit =
Number.isFinite(totalCommits) && totalCommits === 1;
// Shallow clone: HEAD has a parent but the object isn't local.
// Bail rather than over-attribute via --root.
const isTrueRootCommit = parentShaOutput.trim().length === 0;
// Shallow clone: HEAD has a parent recorded but the object
// isn't local. Bail rather than over-attribute via --root.
if (!hasParent && !isTrueRootCommit) {
debugLogger.warn(
'getCommittedFileInfo: HEAD~1 unreadable but commit is not the ' +