diff --git a/.changeset/rework-compaction-strategy.md b/.changeset/rework-compaction-strategy.md new file mode 100644 index 000000000..554ba6615 --- /dev/null +++ b/.changeset/rework-compaction-strategy.md @@ -0,0 +1,10 @@ +--- +"@moonshot-ai/kimi-code": minor +--- + +Rework conversation compaction: + +- Keep only recent user prompts plus a single user-role summary; drop assistant and tool messages. +- Repair tool_use/tool_result adjacency before sending, fixing a strict-provider HTTP 400 when a tool call and its result became non-adjacent. +- Merge consecutive user turns for strict providers (Gemini/Vertex), fixing an HTTP 400 ("roles must alternate") after compaction or when a turn is steered in right after a tool result. +- Micro-compaction now defaults off. diff --git a/apps/vis/server/src/lib/context-projector.ts b/apps/vis/server/src/lib/context-projector.ts index 290ef0c7b..341556696 100644 --- a/apps/vis/server/src/lib/context-projector.ts +++ b/apps/vis/server/src/lib/context-projector.ts @@ -1,3 +1,9 @@ +import { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + isRealUserInput, + selectRecentUserMessages, +} from '@moonshot-ai/agent-core'; import type { ContentPart, ContextMessage, @@ -238,19 +244,21 @@ export function projectContext( break; case 'context.apply_compaction': { openSteps = new Map(); - // Mirror agent-core's actual `applyCompaction` behaviour - // (`packages/agent-core/src/agent/context/index.ts`): history becomes - // `[summaryBubble, ...history.slice(compactedCount)]`. The summary is - // an *assistant* message tagged `origin.kind = 'compaction_summary'` - // (using 'system' would skew role counts and any downstream diff - // against agent-core history). The post-compaction tail is preserved - // rather than dropped, so messages still in context stay visible. + // Mirror agent-core's `applyCompaction` + // (`packages/agent-core/src/agent/context/index.ts`): the live history + // becomes the most recent real user messages (verbatim, within a token + // budget) followed by a single user-role summary tagged + // `origin.kind = 'compaction_summary'`. Assistant messages, tool calls, + // and tool results are dropped. The selection rule + // (`selectRecentUserMessages` / `collectCompactableUserMessages`) is the + // same helper agent-core's `ContextMemory` and the web transcript + // reducer apply, so all three views stay in sync. const summaryBubble: ProjectedMessage = { lineNo: entry.lineNo, time: rec.time, source: 'compaction_summary', message: { - role: 'assistant', + role: 'user', content: [{ type: 'text', text: rec.summary }], toolCalls: [], origin: { kind: 'compaction_summary' }, @@ -262,34 +270,62 @@ export function projectContext( tokensAfter: rec.tokensAfter, }, }; + const modelSummaryBubble: ProjectedMessage = + rec.contextSummary === undefined + ? summaryBubble + : { + ...summaryBubble, + message: { + ...summaryBubble.message, + content: [{ type: 'text', text: rec.contextSummary }], + } as ContextMessage, + }; if (mode === 'model') { - // Drop the first `rec.compactedCount` HISTORY entries (NOT array - // entries): agent-core's `compactedCount` indexes into `_history`, - // which never contains our synthetic 'undo'/'clear' markers. Walk the - // array counting only history entries (`isHistoryEntry`) until - // `compactedCount` are passed, then slice there — any UI-only markers - // in the dropped region go with it (correct: they precede the - // compaction). With no markers this is exactly `slice(compactedCount)`. - let sliceAt = messages.length; - let passed = 0; - for (let i = 0; i < messages.length; i++) { - if (passed >= rec.compactedCount) { - sliceAt = i; - break; - } - if (isHistoryEntry(messages[i]!)) passed++; + // Rebuild the model's-eye view. New records carry `keptUserMessageCount` + // and use the kept-user selection below; legacy records fall back to the + // old verbatim-tail shape (handled first). + const historyEntries = messages.filter(isHistoryEntry); + if (rec.keptUserMessageCount === undefined && rec.compactedCount < historyEntries.length) { + // Legacy (pre-rework) record: it has no `keptUserMessageCount`, so + // agent-core's ContextMemory restore reproduces the old + // `[summary, ...history.slice(compactedCount)]` semantics — a verbatim + // recent tail (assistant/tool included), not the new kept-user + // selection. Mirror that exact shape so opening an older compacted + // session in model mode shows the same tail the resumed agent still + // holds, instead of hiding it behind the new selection. + messages = [modelSummaryBubble, ...historyEntries.slice(rec.compactedCount)]; + } else { + // `realUserEntries` is filtered with the exact + // `collectCompactableUserMessages` predicate so it stays aligned with + // the selection below (genuine user input only — no injections, system + // triggers, or prior summaries). `selectRecentUserMessages` keeps a + // contiguous suffix of that subsequence, with only the oldest kept + // message possibly truncated, so each kept message maps back onto its + // original ProjectedMessage wrapper (preserving line/time); we swap in + // the (possibly truncated) message object. + const realUserEntries = historyEntries.filter( + (pm) => collectCompactableUserMessages([pm.message]).length === 1, + ); + const keptUserMessages = selectRecentUserMessages( + realUserEntries.map((pm) => pm.message), + COMPACT_USER_MESSAGE_MAX_TOKENS, + ); + const suffixStart = realUserEntries.length - keptUserMessages.length; + const keptEntries: ProjectedMessage[] = keptUserMessages.map((message, i) => { + const original = realUserEntries[suffixStart + i]!; + return original.message === message ? original : { ...original, message }; + }); + messages = [...keptEntries, modelSummaryBubble]; } - if (passed < rec.compactedCount) sliceAt = messages.length; - messages = [summaryBubble, ...messages.slice(sliceAt)]; } else { // Full history: keep ALL preceding messages, just append the summary // marker inline so the compacted prefix stays visible. messages.push(summaryBubble); } // Mirror agent-core applyCompaction() → microCompaction.reset() (cutoff - // → 0): the message list is rebuilt as [summary, ...tail], so the old - // index-based cutoff no longer points at the same messages. (In full - // mode the blanking pass does not run, so this is a no-op there.) + // → 0): the message list is rebuilt, so the old index-based cutoff no + // longer points at the same messages. (In full mode the blanking pass + // does not run, so this is a no-op there.) microCutoff = 0; // Mirror agent-core applyCompaction() → _tokenCount = result.tokensAfter: // the live context-window fill is now the post-compaction count. Derived @@ -328,7 +364,7 @@ export function projectContext( // Mirror agent-core `undo` (`agent/context/index.ts`): walk from the // end, skip `origin.kind === 'injection'`, stop at // `origin.kind === 'compaction_summary'`, remove others, counting real - // user prompts via `isRealUserPrompt` until `count` is reached. Then + // user prompts via `isRealUserInput` until `count` is reached. Then // leave an undo marker. // // `computeUndoCutoff` is the single source of truth for that skip/stop @@ -581,22 +617,11 @@ function isHistoryEntry(pm: ProjectedMessage): boolean { return pm.source !== 'undo' && pm.source !== 'clear'; } -/** Mirrors agent-core `isRealUserPrompt` (`agent/context/index.ts`): a message - * counts toward an undo only if it is a genuine user prompt. */ -function isRealUserPrompt(message: ContextMessage): boolean { - if (message.role !== 'user') return false; - const origin = message.origin; - if (origin === undefined || origin.kind === 'user') return true; - if (origin.kind === 'skill_activation') return origin.trigger === 'user-slash'; - if (origin.kind === 'plugin_command') return origin.trigger === 'user-slash'; - return false; -} - /** Single source of truth for the `context.undo` backward walk, shared by both * projection modes. Mirrors agent-core `undo` (`agent/context/index.ts`): walk * from the end, skip `origin.kind === 'injection'` (those are KEPT even when * they sit inside the undo window), stop at `origin.kind === 'compaction_summary'`, - * and count real user prompts via `isRealUserPrompt` until `count` is reached. + * and count real user prompts via `isRealUserInput` until `count` is reached. * * Returns the `cutoff` (lowest index to remove from, inclusive) plus the * `removedMessageCount` (number of non-skipped messages in the window). In @@ -617,7 +642,7 @@ function computeUndoCutoff( if (origin?.kind === 'compaction_summary') break; // stop removedMessageCount++; cutoff = i; - if (isRealUserPrompt(messages[i]!.message) && ++removedUserCount >= count) break; + if (isRealUserInput(messages[i]!.message) && ++removedUserCount >= count) break; } return { cutoff, removedMessageCount }; } diff --git a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl index 317df60b2..9f44d9a7d 100644 --- a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl +++ b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl @@ -1,5 +1,6 @@ {"type":"metadata","protocol_version":"1.1","created_at":1779256791085} {"type":"config.update","cwd":"/tmp/work","profileName":"agent","systemPrompt":"You are Kimi.","time":1779256791100} {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"before compaction"}],"toolCalls":[]},"time":1779256800001} -{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":1,"tokensBefore":100,"tokensAfter":30,"time":1779256800500} +{"type":"context.append_message","message":{"role":"assistant","content":[{"type":"text","text":"assistant reply"}],"toolCalls":[]},"time":1779256800200} +{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":2,"tokensBefore":100,"tokensAfter":30,"time":1779256800500} {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"after compaction"}],"toolCalls":[]},"time":1779256801000} diff --git a/apps/vis/server/test/lib/context-projector.test.ts b/apps/vis/server/test/lib/context-projector.test.ts index e3eb40fc4..dbeaea844 100644 --- a/apps/vis/server/test/lib/context-projector.test.ts +++ b/apps/vis/server/test/lib/context-projector.test.ts @@ -275,33 +275,130 @@ describe('context-projector', () => { { lineNo: 4, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [] } }, raw: {} }, ]; const proj = projectContext(entries as any); - expect(proj.messages[0]!.source).toBe('compaction_summary'); - // Compaction summary is an assistant message (agent-core's own + // Model view: the kept user prompt + user-role summary + the new prompt. + expect(proj.messages.map((m) => m.source)).toEqual([ + 'append_message', 'compaction_summary', 'append_message', + ]); + expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old' }); + // The compaction summary is a user message (agent-core's own // representation), not a synthetic system message. - expect(proj.messages[0]!.message.role).toBe('assistant'); - expect(proj.messages[0]!.message.origin).toEqual({ kind: 'compaction_summary' }); - expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old stuff' }); - expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'new' }); + expect(proj.messages[1]!.message.role).toBe('user'); + expect(proj.messages[1]!.message.origin).toEqual({ kind: 'compaction_summary' }); + expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'old stuff' }); + expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'new' }); + }); + + it('uses contextSummary only for the model view and raw summary for full history', () => { + const entries = [ + { lineNo: 1, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'old' }], toolCalls: [] } }, raw: {} }, + { lineNo: 2, data: { type: 'context.apply_compaction' as const, + summary: 'raw summary', contextSummary: 'prefixed summary', compactedCount: 1, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, + ]; + + const model = projectContext(entries as any); + expect(model.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'old' }, + { text: 'prefixed summary' }, + ]); + + const full = projectContext(entries as any, 'full'); + expect(full.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'old' }, + { text: 'raw summary' }, + ]); }); - it('apply_compaction keeps the post-compaction tail (slice(compactedCount))', () => { + it('apply_compaction keeps the most recent user messages and drops the assistant/tool tail', () => { const entries = [ { lineNo: 1, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} }, { lineNo: 2, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm1' }], toolCalls: [] } }, raw: {} }, { lineNo: 3, data: { type: 'context.append_message' as const, - message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (kept)' }], toolCalls: [] } }, raw: {} }, + message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (dropped)' }], toolCalls: [] } }, raw: {} }, { lineNo: 4, data: { type: 'context.apply_compaction' as const, - summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, + summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, ]; const proj = projectContext(entries as any); - // [summary, m2] — m0 and m1 (the first compactedCount=2) are dropped, m2 kept. - expect(proj.messages).toHaveLength(2); - expect(proj.messages[0]!.source).toBe('compaction_summary'); - expect(proj.messages[0]!.compaction).toEqual({ compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }); - expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm2 (kept)' }); - expect(proj.messages[1]!.lineNo).toBe(3); + // [m0, m1, summary] — real user prompts are kept verbatim, the assistant + // tail is dropped. + expect(proj.messages).toHaveLength(3); + expect(proj.messages.map((m) => m.source)).toEqual([ + 'append_message', 'append_message', 'compaction_summary', + ]); + expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' }); + expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' }); + expect(proj.messages[2]!.compaction).toEqual({ compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }); + expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'sum' }); + }); + + it('apply_compaction mirrors the legacy verbatim tail for records without keptUserMessageCount (model)', () => { + // A pre-rework record has no keptUserMessageCount. agent-core's restore keeps + // the old `[summary, ...history.slice(compactedCount)]` tail (assistant/tool + // included), so the model view must do the same instead of applying the new + // kept-user selection — otherwise it would hide the assistant tail the resumed + // agent still has, and surface a pre-compaction user message the agent dropped. + const entries = [ + { lineNo: 1, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'u0 (compacted away)' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} }, + { lineNo: 2, data: { type: 'context.append_message' as const, + message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'a1' }], toolCalls: [] } }, raw: {} }, + { lineNo: 3, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'u2 (tail)' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} }, + { lineNo: 4, data: { type: 'context.append_message' as const, + message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'a3 (tail)' }], toolCalls: [] } }, raw: {} }, + // Legacy record: no keptUserMessageCount, compactedCount(2) < history(4). + { lineNo: 5, data: { type: 'context.apply_compaction' as const, + summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, + ]; + + const model = projectContext(entries as any); + // [summary, u2, a3] — the verbatim tail beyond compactedCount, summary first. + expect(model.messages.map((m) => m.source)).toEqual([ + 'compaction_summary', 'append_message', 'append_message', + ]); + expect(model.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'sum' }, { text: 'u2 (tail)' }, { text: 'a3 (tail)' }, + ]); + }); + + it('apply_compaction drops shell/local-command/background messages in model mode only', () => { + const entries = [ + { lineNo: 1, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'real user' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} }, + { lineNo: 2, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: '! pwd' }], toolCalls: [], origin: { kind: 'shell_command' as const, phase: 'input' as const } } }, raw: {} }, + { lineNo: 3, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'local output' }], toolCalls: [], origin: { kind: 'injection' as const, variant: 'local-command-stdout' } } }, raw: {} }, + { lineNo: 4, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'background done' }], toolCalls: [], origin: { kind: 'background_task' as const, taskId: 'task', status: 'completed' as const, notificationId: 'notification' } } }, raw: {} }, + { lineNo: 5, data: { type: 'context.append_message' as const, + message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'assistant reply' }], toolCalls: [] } }, raw: {} }, + { lineNo: 6, data: { type: 'context.apply_compaction' as const, + summary: 'sum', compactedCount: 5, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, + { lineNo: 7, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} }, + ]; + + const model = projectContext(entries as any); + expect(model.messages.map((m) => m.source)).toEqual([ + 'append_message', 'compaction_summary', 'append_message', + ]); + expect(model.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'real user' }, { text: 'sum' }, { text: 'new' }, + ]); + + const full = projectContext(entries as any, 'full'); + expect(full.messages.map((m) => m.source)).toEqual([ + 'append_message', 'append_message', 'append_message', 'append_message', + 'append_message', 'compaction_summary', 'append_message', + ]); + expect(full.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'real user' }, { text: '! pwd' }, { text: 'local output' }, + { text: 'background done' }, { text: 'assistant reply' }, { text: 'sum' }, + { text: 'new' }, + ]); }); // ---- Fix ④: UI-only markers must not offset agent-core history indices ------ @@ -311,7 +408,7 @@ describe('context-projector', () => { // real history entries (append_message + compaction_summary), skipping // 'undo'/'clear' markers. - it('apply_compaction slices by history index, skipping a preceding undo marker (model)', () => { + it('apply_compaction keeps user messages across a preceding undo marker (model)', () => { const userMsg = (text: string) => ({ role: 'user' as const, content: [{ type: 'text' as const, text }], toolCalls: [], origin: { kind: 'user' as const }, @@ -319,14 +416,10 @@ describe('context-projector', () => { // Step 1: append u1, u2 then undo(1) → removes u2, leaves [u1, ]. // Step 2: append u3, u4 → array is [u1, , u3, u4]. // History entries (agent-core _history, which has NO marker) are the three - // real messages [u1, u3, u4]. A compaction with compactedCount=2 drops the - // first 2 HISTORY entries (u1, u3) — and the undo marker that sits within - // that compacted prefix is dropped with it — keeping exactly [summary, u4]. - // - // The naive `messages.slice(compactedCount=2)` would instead cut the ARRAY at - // index 2, yielding [summary, u3, u4] — it WRONGLY retains the already- - // compacted u3 because the undo marker offset the index by one. This test - // pins the correct history-aware behaviour and FAILS against the naive slice. + // real user prompts [u1, u3, u4]. Compaction keeps all of them (they fit the + // budget) and appends the summary, dropping only the synthetic undo marker. + // This pins that the marker does not offset the kept-user selection — a naive + // array-slice would have retained the wrong prompts. const entries = [ { lineNo: 1, data: { type: 'context.append_message' as const, message: userMsg('u1') }, raw: {} }, { lineNo: 2, data: { type: 'context.append_message' as const, message: userMsg('u2') }, raw: {} }, @@ -334,12 +427,16 @@ describe('context-projector', () => { { lineNo: 4, data: { type: 'context.append_message' as const, message: userMsg('u3') }, raw: {} }, { lineNo: 5, data: { type: 'context.append_message' as const, message: userMsg('u4') }, raw: {} }, { lineNo: 6, data: { type: 'context.apply_compaction' as const, - summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, + summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, ]; const proj = projectContext(entries as any); - // Correct: [summary, u4]. The marker and the first 2 history entries are gone. - expect(proj.messages.map((m) => m.source)).toEqual(['compaction_summary', 'append_message']); - expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'u4' }); + // Correct: [u1, u3, u4, summary]. The marker is gone, all real prompts kept. + expect(proj.messages.map((m) => m.source)).toEqual([ + 'append_message', 'append_message', 'append_message', 'compaction_summary', + ]); + expect(proj.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'u1' }, { text: 'u3' }, { text: 'u4' }, { text: 'sum' }, + ]); }); it('micro-blanking uses the history index, skipping a preceding undo marker (model)', () => { @@ -688,7 +785,7 @@ describe('context-projector', () => { // marker but do NOT mutate/drop the surrounding message list. 'model' mode // (the default) keeps the existing model's-eye behaviour byte-identical. - it("defaults to 'model' mode when no 2nd arg is passed (compaction drops the prefix)", () => { + it("defaults to 'model' mode when no 2nd arg is passed (keeps recent user messages + summary)", () => { const entries = [ { lineNo: 1, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} }, @@ -697,10 +794,14 @@ describe('context-projector', () => { { lineNo: 3, data: { type: 'context.apply_compaction' as const, summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, ]; - // No 2nd arg → 'model' default: prefix dropped, only the summary remains. + // No 2nd arg → 'model' default: the real user prompts are kept verbatim and + // the summary is appended after them. const proj = projectContext(entries as any); - expect(proj.messages).toHaveLength(1); - expect(proj.messages[0]!.source).toBe('compaction_summary'); + expect(proj.messages.map((m) => m.source)).toEqual([ + 'append_message', 'append_message', 'compaction_summary', + ]); + expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' }); + expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' }); }); it("full mode keeps the pre-compaction messages plus the summary marker plus the tail", () => { diff --git a/apps/vis/server/test/routes/context.test.ts b/apps/vis/server/test/routes/context.test.ts index 486e6175d..6352747e9 100644 --- a/apps/vis/server/test/routes/context.test.ts +++ b/apps/vis/server/test/routes/context.test.ts @@ -69,28 +69,31 @@ describe('context route', () => { cleanup = c; const app = contextRoute(home); - // Default (model view): the pre-compaction message is dropped, leaving - // [summary, after-compaction]. + // Default (model view): the real user prompt before compaction is KEPT, the + // assistant reply is dropped, then the summary, then the post-compaction tail. const modelRes = await app.request('/session_fixture/context?agent=main'); expect(modelRes.status).toBe(200); const modelBody = (await modelRes.json()) as { messages: { source: string; message: { content: { type: string; text?: string }[] } }[]; }; expect(modelBody.messages.map((m) => m.source)).toEqual([ - 'compaction_summary', 'append_message', + 'append_message', 'compaction_summary', 'append_message', ]); + expect(modelBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' }); + expect(modelBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' }); - // Full history: the pre-compaction message is KEPT, then the summary marker, - // then the post-compaction tail. + // Full history: every pre-compaction message (user prompt + assistant reply) + // is KEPT, then the summary marker, then the post-compaction tail. const fullRes = await app.request('/session_fixture/context?agent=main&history=full'); expect(fullRes.status).toBe(200); const fullBody = (await fullRes.json()) as { messages: { source: string; message: { content: { type: string; text?: string }[] } }[]; }; expect(fullBody.messages.map((m) => m.source)).toEqual([ - 'append_message', 'compaction_summary', 'append_message', + 'append_message', 'append_message', 'compaction_summary', 'append_message', ]); expect(fullBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' }); - expect(fullBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' }); + expect(fullBody.messages[1]!.message.content[0]).toMatchObject({ text: 'assistant reply' }); + expect(fullBody.messages[3]!.message.content[0]).toMatchObject({ text: 'after compaction' }); }); }); diff --git a/docs/en/configuration/config-files.md b/docs/en/configuration/config-files.md index 17a00a379..1f2187063 100644 --- a/docs/en/configuration/config-files.md +++ b/docs/en/configuration/config-files.md @@ -52,7 +52,7 @@ max_running_tasks = 4 keep_alive_on_exit = false [experimental] -micro_compaction = true +micro_compaction = false [[permission.rules]] decision = "allow" @@ -181,11 +181,11 @@ You can also switch models temporarily without touching the config file — by s ## `experimental` -`experimental` stores persistent overrides for experimental-feature flags. Currently, `micro_compaction` is the only user-facing entry and defaults to `true`; set it to `false` only when you need to disable automatic trimming of older large tool results. +`experimental` stores persistent overrides for experimental-feature flags. Currently, `micro_compaction` is the only user-facing entry and defaults to `false`; set it to `true` to enable automatic trimming of older large tool results. | Field | Type | Default | Description | | --- | --- | --- | --- | -| `micro_compaction` | `boolean` | `true` | Trim older large tool results from context while preserving recent conversation | +| `micro_compaction` | `boolean` | `false` | Trim older large tool results from context while preserving recent conversation | ## `services` diff --git a/docs/en/configuration/env-vars.md b/docs/en/configuration/env-vars.md index 10518832e..3d57e29a7 100644 --- a/docs/en/configuration/env-vars.md +++ b/docs/en/configuration/env-vars.md @@ -124,7 +124,7 @@ Switches that control the behavior of subsystems such as telemetry, background t | `KIMI_CODE_BACKGROUND_KEEP_ALIVE_ON_EXIT` | Whether to keep background tasks when the session closes; takes higher priority than `config.toml`. The default is to stop them on exit | Truthy: `1`/`true`/`yes`/`on`; falsy: `0`/`false`/`no`/`off` | | `KIMI_CODE_PLUGIN_MARKETPLACE_URL` | Override the plugin marketplace JSON loaded by `/plugins`; useful for dev loopback servers, staging CDN files, or alternate marketplace directories | `https://code.kimi.com/kimi-code/plugins/marketplace.json`; also accepts `http://`, `file://` URLs, and local paths | | `KIMI_CODE_AGENT_SWARM_MAX_CONCURRENCY` | Cap how many AgentSwarm subagents run concurrently during the initial ramp; leave unset for no cap | Positive integer; invalid values fail fast | -| `KIMI_CODE_EXPERIMENTAL_FLAG` | Enable all registered experimental features for this process; `micro_compaction` is already enabled by default | `1`, `true`, `yes`, `on` | +| `KIMI_CODE_EXPERIMENTAL_FLAG` | Enable all registered experimental features for this process | `1`, `true`, `yes`, `on` | | `KIMI_CODE_EXPERIMENTAL_MICRO_COMPACTION` | Override [`[experimental].micro_compaction`](./config-files.md#experimental) for this process | Truthy or falsy | | `KIMI_SHELL_PATH` | Override the Git Bash path on Windows (used when auto-detection fails) | Absolute path | | `KIMI_MODEL_MAX_COMPLETION_TOKENS` | Hard cap on `max_completion_tokens` per LLM step; applies to the `kimi` provider only | Positive integer; `0` or negative disables clamping | diff --git a/docs/zh/configuration/config-files.md b/docs/zh/configuration/config-files.md index ffab2e001..c214ce76b 100644 --- a/docs/zh/configuration/config-files.md +++ b/docs/zh/configuration/config-files.md @@ -52,7 +52,7 @@ max_running_tasks = 4 keep_alive_on_exit = false [experimental] -micro_compaction = true +micro_compaction = false [[permission.rules]] decision = "allow" @@ -181,11 +181,11 @@ max_context_size = 1047576 ## `experimental` -`experimental` 存放实验功能 flag 的持久化覆盖。目前 `micro_compaction` 是唯一用户可见的字段,默认值为 `true`;只有在需要关闭自动清理较旧的大型工具结果时,才需要把它设为 `false`。 +`experimental` 存放实验功能 flag 的持久化覆盖。目前 `micro_compaction` 是唯一用户可见的字段,默认值为 `false`;如需自动清理较旧的大型工具结果,把它设为 `true`。 | 字段 | 类型 | 默认值 | 说明 | | --- | --- | --- | --- | -| `micro_compaction` | `boolean` | `true` | 清理较旧的大型工具结果内容,同时保留最近对话 | +| `micro_compaction` | `boolean` | `false` | 清理较旧的大型工具结果内容,同时保留最近对话 | ## `services` diff --git a/docs/zh/configuration/env-vars.md b/docs/zh/configuration/env-vars.md index ddf76795a..130010a6a 100644 --- a/docs/zh/configuration/env-vars.md +++ b/docs/zh/configuration/env-vars.md @@ -124,7 +124,7 @@ kimi | `KIMI_CODE_BACKGROUND_KEEP_ALIVE_ON_EXIT` | 会话关闭时是否保留后台任务,优先级高于 `config.toml`。默认会在退出时停止后台任务 | 真值:`1`/`true`/`yes`/`on`;假值:`0`/`false`/`no`/`off` | | `KIMI_CODE_PLUGIN_MARKETPLACE_URL` | 覆盖 `/plugins` 加载的 plugin marketplace JSON,适合 dev loopback server、测试 CDN 文件或替换 marketplace 目录 | `https://code.kimi.com/kimi-code/plugins/marketplace.json`;也接受 `http://`、`file://` URL 和本地路径 | | `KIMI_CODE_AGENT_SWARM_MAX_CONCURRENCY` | 限制 AgentSwarm 初始提升并发阶段可同时运行的子 Agent 数量;不设置表示不限制 | 正整数;非法值会立即失败 | -| `KIMI_CODE_EXPERIMENTAL_FLAG` | 在当前进程启用所有已注册的实验功能;`micro_compaction` 已默认开启 | `1`、`true`、`yes`、`on` | +| `KIMI_CODE_EXPERIMENTAL_FLAG` | 在当前进程启用所有已注册的实验功能 | `1`、`true`、`yes`、`on` | | `KIMI_CODE_EXPERIMENTAL_MICRO_COMPACTION` | 覆盖当前进程的 [`[experimental].micro_compaction`](./config-files.md#experimental) | 真值或假值 | | `KIMI_SHELL_PATH` | Windows 上覆盖 Git Bash 路径(自动探测失败时使用) | 绝对路径 | | `KIMI_MODEL_MAX_COMPLETION_TOKENS` | 单步 LLM 请求的 `max_completion_tokens` 硬上限,仅对 `kimi` 供应商生效 | 正整数;`0` 或负数禁用 clamp | diff --git a/packages/agent-core/src/agent/compaction/compaction-instruction.md b/packages/agent-core/src/agent/compaction/compaction-instruction.md index 49b0d80b4..921068742 100644 --- a/packages/agent-core/src/agent/compaction/compaction-instruction.md +++ b/packages/agent-core/src/agent/compaction/compaction-instruction.md @@ -1,69 +1,42 @@ +You are about to run out of context. Write a first-person handoff note to +yourself so you can seamlessly continue this task after the earlier +conversation is cleared. --- This message is a direct task, not part of the above conversation --- -You are now given a task to compact this conversation context according to specific priorities and output requirements. - -Output text only. DO NOT CALL ANY TOOLS. Calling tools will be rejected and fails the task. You already have all the information you need in the conversation history. You have only one chance. - -The goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared work. - +Write the note as your own continuing train of thought — first person, present +tense, the way you would reason through the next move. Do not write a +third-party report about someone else's work, and do not impose rigid section +headings; let the shape follow the task. + +Make the note self-sufficient: the next turn will see only your most recent user +messages and this note — every assistant message, tool call, and tool result +above will be gone. In your own words, preserve what you genuinely need to +continue: + +- The latest user request, quoted verbatim, and what it is actually asking for. +- The instructions and constraints currently in force (user preferences, + project rules, environment and tooling limits) — condensed to what still + matters. +- What has actually been done, at high fidelity: keep the exact commands that + were run, the exact file paths touched, and whether each succeeded or failed. + Keep only the final working version of any code; drop intermediate attempts + and already-resolved errors. +- The precise next action — including the exact next command or tool call you + intend to make — and any required format for the final answer. + +Be honest about uncertainty. If an earlier step claimed something was done but +was never verified (tests "passing", a fix "working", a file "created"), say so +plainly and treat it as unverified rather than fact — re-check before relying +on it. + +Be concise. Include the critical data, identifiers, and references needed to +continue, and omit anything that does not change the next move. + +Respond with text only. Do not call any tools — you already have everything you +need in the conversation history. + +{% if customInstruction %} +Optional user instruction: {{ customInstruction }} - - - -1. **Current Task State**: What is being worked on RIGHT NOW -2. **Errors & Solutions**: All encountered errors and their resolutions -3. **Code Evolution**: Final working versions only (remove intermediate attempts) -4. **System Context**: Project structure, dependencies, environment setup -5. **Design Decisions**: Architectural choices and their rationale -6. **TODO Items**: Unfinished tasks and known issues - - - -## Current Focus - -[What we're working on now] - -## Environment - -- [Key setup/config points] -- ... - -## Completed Tasks - -- [Task]: [Brief outcome] -- ... - -## Active Issues - -- [Issue]: [Status/Next steps] -- ... - -## Code State - -### [Critical file name] - -[Brief description of the file's purpose and current state] - -``` -[The latest version of critical code snippets in this file, <20 lines] -``` - -### [Critical file name] - -- [Useful classes/methods/functions]: [Brief description/usage] -- ... - - - -## Important Context - -- [Any crucial information not covered above] -- ... - -## All User Messages - -- [Detailed non tool use user message] -- ... - - +{% endif %} diff --git a/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md new file mode 100644 index 000000000..157724c84 --- /dev/null +++ b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md @@ -0,0 +1 @@ +The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it. diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index 36be40575..752de8b24 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -22,9 +22,14 @@ import { retryBackoffDelays, sleepForRetry, } from '../../loop/retry'; -import { renderPrompt } from '../../utils/render-prompt'; +import { + renderTodoList, + TODO_STORE_KEY, + type TodoItem, +} from '../../tools/builtin/state/todo-list'; import { estimateTokens, + estimateTokensForMessage, estimateTokensForMessages, estimateTokensForTools, } from '../../utils/tokens'; @@ -32,14 +37,15 @@ import { applyCompletionBudget, resolveCompletionBudget, } from '../../utils/completion-budget'; +import { renderPrompt } from '../../utils/render-prompt'; import compactionInstructionTemplate from './compaction-instruction.md?raw'; -import { renderTodoList, type TodoItem } from '../../tools/builtin/state/todo-list'; import type { CompactionBeginData, CompactionResult } from './types'; import { DEFAULT_COMPACTION_CONFIG, DefaultCompactionStrategy, type CompactionStrategy, } from './strategy'; +import { buildCompactionSummaryText, isRealUserInput } from './handoff'; export const MAX_COMPACTION_RETRY_ATTEMPTS = 5; @@ -62,6 +68,18 @@ export class FullCompaction { blockedByTurn: boolean; } | null = null; private readonly observedMaxContextTokensByModel = new Map(); + // Token count right after the last successful compaction. While no new + // content has been appended (tokenCountWithPending <= this value), the + // history is already in its minimal compacted form ([kept user prompts, + // summary]); re-compacting would only nest summaries, so + // checkAutoCompaction skips in that case even if an observed overflow + // limit still flags the context as oversized. + private lastCompactedTokenCount: number | null = null; + // Counts provider-overflow recoveries in this turn that have not yet been + // followed by a successful step. Trips MAX_OVERFLOW_COMPACTION_ATTEMPTS to + // stop an overflow -> compact -> overflow loop when compaction can no + // longer shrink the request below the model window. + private consecutiveOverflowCompactions = 0; protected readonly strategy: CompactionStrategy; constructor( @@ -77,7 +95,7 @@ export class FullCompaction { reservedContextSize: agent.kimiConfig?.loopControl?.reservedContextSize ?? DEFAULT_COMPACTION_CONFIG.reservedContextSize, - } + }, ); } @@ -139,9 +157,21 @@ export class FullCompaction { }); return; } - const compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source); - if (compactedCount === 0) { - throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No prefix that can be compacted in current history.'); + if (this.agent.context.history.length === 0) { + throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No messages to compact in current history.'); + } + // Manual (SDK/REST) compaction must not start while a turn is running: the + // turn keeps mutating the context (streaming content, appending messages) + // while the summarizer is in flight, and that output is then neither + // summarized nor preserved by the rebuild. Auto compaction is exempt — it is + // triggered from within the turn at a step boundary, which blocks the turn + // for the duration. Refuse manual compaction here so it only runs at a clean + // boundary; the caller can retry once the turn finishes. + if (data.source === 'manual' && this.agent.turn.hasActiveTurn) { + throw new KimiError( + ErrorCodes.COMPACTION_UNABLE, + 'Cannot compact while a turn is active. Wait for it to finish, then retry.', + ); } this.agent.records.logRecord({ type: 'full_compaction.begin', @@ -155,7 +185,7 @@ export class FullCompaction { const abortController = new AbortController(); this.compacting = { abortController, - promise: this.compactionWorker(abortController.signal, data, compactedCount), + promise: this.compactionWorker(abortController.signal, data), blockedByTurn: false, }; } @@ -194,9 +224,20 @@ export class FullCompaction { resetForTurn(): void { this.compactionCountInTurn = 0; + this.lastCompactedTokenCount = null; + this.consecutiveOverflowCompactions = 0; } async handleOverflowError(signal: AbortSignal, error: unknown) { + this.consecutiveOverflowCompactions += 1; + const maxAttempts = this.strategy.maxOverflowCompactionAttempts; + if (this.consecutiveOverflowCompactions > maxAttempts) { + throw new KimiError( + ErrorCodes.CONTEXT_OVERFLOW, + `Compaction failed to bring the context under the model window after ${String(maxAttempts)} attempts.`, + { cause: error instanceof Error ? error : undefined }, + ); + } const didStartCompaction = this.beginAutoCompaction(); if (!didStartCompaction && !this.compacting) throw error; // Always block on overflow errors @@ -211,6 +252,10 @@ export class FullCompaction { } async afterStep(): Promise { + // A completed step means a generate() succeeded, so any prior + // overflow -> compact cycle produced a request that now fits; clear the + // loop guard. + this.consecutiveOverflowCompactions = 0; if (this.strategy.checkAfterStep) { this.checkAutoCompaction(false); } @@ -219,6 +264,12 @@ export class FullCompaction { private checkAutoCompaction(throwOnLimit: boolean = true): boolean { if (this.compacting) return true; + if ( + this.lastCompactedTokenCount !== null && + this.tokenCountWithPending <= this.lastCompactedTokenCount + ) { + return false; + } if (!this.strategy.shouldCompact(this.tokenCountWithPending)) return false; return this.beginAutoCompaction(throwOnLimit); } @@ -258,34 +309,26 @@ export class FullCompaction { private async compactionWorker( signal: AbortSignal, data: Readonly, - compactedCount: number, ): Promise { try { - const finalResult = { - summary: '', - compactedCount: 1, - tokensBefore: 0, - tokensAfter: 0, - }; - - for (let round = 1; ; round++) { - const result = await this.compactionRound(round, signal, data, compactedCount); - if (!result) return; - - finalResult.summary = result.summary; - finalResult.compactedCount += result.compactedCount - 1; - finalResult.tokensBefore += result.tokensBefore - finalResult.tokensAfter; - finalResult.tokensAfter = result.tokensAfter; - - if (result.tokensBefore - result.tokensAfter < 1024) break; - if (!this.strategy.shouldBlock(result.tokensAfter)) break; - compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source); - if (compactedCount === 0) break; + const result = await this.compactionRound(signal, data); + if (!result) return; + // Stay "compacting" through reinjection: a follow-up prompt/steer that lands + // now is buffered (TurnFlow defers on `isCompacting`) until the + // post-compaction reminders are back, so the first post-compaction turn + // never builds a request before they are reinjected. Only after reinjection + // do we clear the flag, announce completion, and replay deferred input. + try { + await this.agent.refreshSystemPrompt(); + } catch (error) { + this.agent.log.error('failed to refresh system prompt after compaction', { error }); } + await this.agent.injection.injectAfterCompaction(); this.markCompleted(); - this.agent.emitEvent({ type: 'compaction.completed', result: finalResult }); - await this.agent.injection.injectGoal(); - this.triggerPostCompactHook(data, finalResult); + const { contextSummary: _contextSummary, ...eventResult } = result; + void _contextSummary; + this.agent.emitEvent({ type: 'compaction.completed', result: eventResult }); + this.triggerPostCompactHook(data, result); } catch (error) { if (isAbortError(error)) return; const blockedByTurn = this.compacting?.blockedByTurn === true; @@ -298,22 +341,40 @@ export class FullCompaction { type: 'error', ...toKimiErrorPayload(error), }); + } finally { + // Replay prompts/steers deferred while compaction held the context — on the + // success path (after reinjection above), on an A1 prefix/tail cancel + // (`!result`), and on failure/abort. `compacting` is null by now in every + // path, so the replay's launch actually starts a turn instead of re-buffering. + this.agent.turn.onCompactionFinished(); + } + } + + private buildInstruction(customInstruction: string | undefined): string { + return renderPrompt(compactionInstructionTemplate, { + customInstruction: customInstruction?.trim() ?? '', + }).trimEnd(); + } + + private postProcessSummary(summary: string): string { + const storeData = this.agent.tools.storeData(); + const todos = (storeData[TODO_STORE_KEY] as readonly TodoItem[] | undefined) ?? []; + if (todos.length === 0) { + return summary; } + const todoMarkdown = renderTodoList(todos, '## TODO List'); + return `${summary.trim()}\n\n${todoMarkdown}`; } private async compactionRound( - round: number, signal: AbortSignal, data: Readonly, - initialCompactedCount: number, - ) { + ): Promise { const startedAt = Date.now(); const originalHistory = [...this.agent.context.history]; const tokensBefore = estimateTokensForMessages(originalHistory); let retryCount = 0; try { - let compactedCount = initialCompactedCount; - await this.triggerPreCompactHook(data, tokensBefore, signal); const model = this.agent.config.model; @@ -337,15 +398,22 @@ export class FullCompaction { }), capability, }); + const instruction = this.buildInstruction(data.instruction); const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS); - let usage: TokenUsage | null; - let summary: string; + let usage: TokenUsage | null = null; + let summary: string | undefined; + // Compact the whole history, trimming old messages only when the + // summarizer request itself cannot fit. Any trimmed messages are not + // covered by the produced summary; `droppedCount` reports that blind spot. + let historyForModel = originalHistory; + let droppedCount = 0; + let overflowShrinkCount = 0; + let emptyOrTruncatedShrinkCount = 0; while (true) { - const messagesToCompact = originalHistory.slice(0, compactedCount); const messages = [ - ...this.agent.context.project(messagesToCompact), - createUserMessage(renderPrompt(compactionInstructionTemplate, { customInstruction: data.instruction ?? '' })), + ...this.agent.context.project(historyForModel, { synthesizeMissing: true }), + createUserMessage(instruction), ]; const estimatedCompactionRequestTokens = this.estimateRequestTokens(messages); try { @@ -371,14 +439,40 @@ export class FullCompaction { if (isContextOverflow) { this.observeContextOverflow(estimatedCompactionRequestTokens); } - if ( - isContextOverflow || + if (isContextOverflow && historyForModel.length > 1) { + overflowShrinkCount += 1; + if (overflowShrinkCount > MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS) { + throw error; + } + const before = historyForModel.length; + historyForModel = shrinkCompactionHistoryAfterOverflow( + historyForModel, + overflowShrinkCount, + ); + droppedCount += before - historyForModel.length; + retryCount = 0; + continue; + } + const shouldShrinkAfterEmptyOrTruncated = error instanceof CompactionTruncatedError || - error instanceof APIEmptyResponseError // e.g. think-only - ) { - compactedCount = this.strategy.reduceCompactOnOverflow(messagesToCompact); + error instanceof APIEmptyResponseError; + if (shouldShrinkAfterEmptyOrTruncated && historyForModel.length > 1) { + // Each empty/truncated summary drops the oldest message and retries, + // but without its own bound this would issue ~one request per message + // (resetting retryCount sidesteps the transient-error budget). Cap the + // shrink attempts by the same retry budget so a model that keeps + // returning empty cannot fan out into a request per history entry. + emptyOrTruncatedShrinkCount += 1; + if (emptyOrTruncatedShrinkCount > MAX_COMPACTION_RETRY_ATTEMPTS) { + throw error; + } + const before = historyForModel.length; + historyForModel = dropOldestMessageAndLeadingToolResults(historyForModel); + droppedCount += before - historyForModel.length; + retryCount = 0; + continue; } - else if (!isRetryableGenerateError(error)) { + if (!isRetryableGenerateError(error)) { throw error; } if (retryCount + 1 >= MAX_COMPACTION_RETRY_ATTEMPTS) { @@ -396,23 +490,33 @@ export class FullCompaction { const newHistory = this.agent.context.history; for (let i = 0; i < originalHistory.length; i++) { if (newHistory[i] !== originalHistory[i]) { - // History changed during compaction, likely due to undo + // The compacted prefix changed under us (e.g. undo). Bail. this.cancel(); return undefined; } } + // The prefix is intact, but the tail grew while the summarizer was in + // flight (a live step racing a manual/SDK compaction). A real user message + // is safe — the all-user rebuild picks recent user input back up from the + // grown history — but anything compaction would drop (an assistant/tool + // turn, or a user-role message like a background-task notification, hook/ + // cron reminder, or shell output) was neither summarized (the summary only + // covers originalHistory) nor kept, so it would silently vanish. Cancel and + // let a later clean-boundary compaction handle it. + if (newHistory.slice(originalHistory.length).some((message) => !isRealUserInput(message))) { + this.cancel(); + return undefined; + } - summary = this.postProcessSummary(summary); - - const recent = originalHistory.slice(compactedCount); - const tokensAfter = estimateTokens(summary) + estimateTokensForMessages(recent); - - const result: CompactionResult = { - summary, - compactedCount, + const rawSummary = this.postProcessSummary(summary ?? ''); + const contextSummary = buildCompactionSummaryText(rawSummary); + const result = this.agent.context.applyCompaction({ + summary: rawSummary, + contextSummary, + compactedCount: originalHistory.length, tokensBefore, - tokensAfter, - }; + droppedCount: droppedCount === 0 ? undefined : droppedCount, + }); // Telemetry keys are snake_case, but the `context.apply_compaction` // record written below keeps its persisted camelCase field names @@ -424,22 +528,23 @@ export class FullCompaction { tokens_after: result.tokensAfter, duration_ms: Date.now() - startedAt, compacted_count: result.compactedCount, + dropped_count: result.droppedCount, retry_count: retryCount, - round, + round: 1, thinking_effort: this.agent.config.thinkingEffort, ...(usage === null ? {} : { input_tokens: inputTotal(usage), output_tokens: usage.output }), }); - this.agent.context.applyCompaction(result); + this.lastCompactedTokenCount = result.tokensAfter; return result; } catch (error) { - if (isAbortError(error)) return; + if (isAbortError(error)) return undefined; this.agent.telemetry.track('compaction_failed', { source: data.source, tokens_before: tokensBefore, duration_ms: Date.now() - startedAt, - round, + round: 1, retry_count: retryCount, thinking_effort: this.agent.config.thinkingEffort, error_type: error instanceof Error ? error.name : 'Unknown', @@ -478,16 +583,52 @@ export class FullCompaction { }, }); } +} - private postProcessSummary(summary: string): string { - const storeData = this.agent.tools.storeData(); - const todos = (storeData['todo'] as readonly TodoItem[] | undefined) ?? []; - if (todos.length === 0) { - return summary; - } - const todoMarkdown = renderTodoList(todos, '## TODO List'); - return `${summary.trim()}\n\n${todoMarkdown}`; +const MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS = 3; +const COMPACTION_OVERFLOW_SHRINK_RATIOS = [0.7, 0.5, 0.35] as const; + +function shrinkCompactionHistoryAfterOverflow( + messages: readonly T[], + attempt: number, +): T[] { + if (messages.length <= 1) return messages.slice(); + const ratio = COMPACTION_OVERFLOW_SHRINK_RATIOS[ + Math.min(attempt - 1, COMPACTION_OVERFLOW_SHRINK_RATIOS.length - 1) + ]!; + const tokenBudget = Math.floor(estimateTokensForMessages(messages) * ratio); + return takeRecentMessagesWithinTokenBudget(messages, tokenBudget); +} + +function takeRecentMessagesWithinTokenBudget( + messages: readonly T[], + tokenBudget: number, +): T[] { + let start = messages.length; + let tokens = 0; + for (let i = messages.length - 1; i >= 0; i--) { + const messageTokens = estimateTokensForMessage(messages[i]!); + if (tokens + messageTokens > tokenBudget) break; + tokens += messageTokens; + start = i; + } + if (start === 0) start = 1; + return dropLeadingToolResults(messages.slice(start)); +} + +function dropOldestMessageAndLeadingToolResults( + messages: readonly T[], +): T[] { + if (messages.length <= 1) return messages.slice(); + return dropLeadingToolResults(messages.slice(1)); +} + +function dropLeadingToolResults(messages: readonly T[]): T[] { + let start = 0; + while (start < messages.length && messages[start]!.role === 'tool') { + start += 1; } + return messages.slice(start); } function extractCompactionSummary(response: GenerateResult): string { diff --git a/packages/agent-core/src/agent/compaction/handoff.ts b/packages/agent-core/src/agent/compaction/handoff.ts new file mode 100644 index 000000000..0ee8bd0f4 --- /dev/null +++ b/packages/agent-core/src/agent/compaction/handoff.ts @@ -0,0 +1,166 @@ +import type { ContentPart } from '@moonshot-ai/kosong'; +import { estimateTokensForMessage } from '../../utils/tokens'; +import type { PromptOrigin } from '../context/types'; +import summaryPrefixTemplate from './compaction-summary-prefix.md?raw'; + +/** + * Compaction handoff helpers. + * + * Compaction rewrites the model context as: the most recent user messages + * (verbatim, within a token budget) followed by a single user-role summary + * that is prefixed with `COMPACTION_SUMMARY_PREFIX`. Assistant messages, + * tool calls, and tool results are dropped. These helpers apply the exact + * same rule for both the live context rewrite and the transcript reducer. + */ + +export const COMPACTION_SUMMARY_PREFIX = summaryPrefixTemplate.trimEnd(); +export const COMPACT_USER_MESSAGE_MAX_TOKENS = 20_000; + +/** + * Structural subset of kosong's `Message` that the handoff helpers inspect. + * Both `ContextMessage` (the live context) and the wire-transcript reducer's + * mutable message satisfy this shape, so one set of helpers serves both + * layers without introducing a shared nominal type. `origin` is what tells + * real user input apart from injections and compaction summaries. + */ +interface MessageLike { + readonly role: string; + readonly content: readonly ContentPart[]; + readonly origin?: PromptOrigin | undefined; +} + +export type CompactionUserDisposition = 'keep' | 'drop'; + +/** + * Single source of truth for whether a user-role message survives compaction as + * genuine user input. Only real user prompts and user-slash skill + * activations are kept verbatim. Everything else user-role is + * either rebuilt by injectors after compaction or intentionally ephemeral, so + * it is dropped from the live context even when transcript/replay retains it + * for UI rendering. New `PromptOrigin` kinds must update this switch. + */ +export function compactionUserMessageDisposition( + origin: PromptOrigin | undefined, +): CompactionUserDisposition { + if (origin === undefined) return 'keep'; + switch (origin.kind) { + case 'user': + return 'keep'; + case 'skill_activation': + case 'plugin_command': + return origin.trigger === 'user-slash' ? 'keep' : 'drop'; + case 'injection': + case 'shell_command': + case 'compaction_summary': + case 'system_trigger': + case 'background_task': + case 'cron_job': + case 'cron_missed': + case 'hook_result': + case 'retry': + return 'drop'; + default: { + const _exhaustive: never = origin; + void _exhaustive; + return 'drop'; + } + } +} + +function extractText(content: readonly ContentPart[]): string { + let text = ''; + for (const part of content) { + if (part.type === 'text') { + text += part.text; + } + } + return text; +} + +export function isCompactionSummaryMessage(message: MessageLike): boolean { + return message.origin?.kind === 'compaction_summary'; +} + +/** + * Keep only genuine user input (real user prompts and user-slash skill + * activations). See `compactionUserMessageDisposition` for the full keep/drop + * policy and the rationale for each origin. + */ +export function isRealUserInput(message: MessageLike): boolean { + return message.role === 'user' && compactionUserMessageDisposition(message.origin) === 'keep'; +} + +export function collectCompactableUserMessages(messages: readonly T[]): T[] { + return messages.filter( + (message) => isRealUserInput(message) && !isCompactionSummaryMessage(message), + ); +} + +function truncateTextToTokens(text: string, maxTokens: number): string { + if (maxTokens <= 0) return ''; + // Single pass: walk the string once, mirroring estimateTokens' heuristic + // (ASCII ~4 chars/token, non-ASCII ~1 char/token) and stop at the first + // code point that would push the running total over the budget. This keeps + // CJK-heavy inputs from the O(n^2) cost of re-estimating shrinking prefixes. + let asciiCount = 0; + let nonAsciiCount = 0; + let end = 0; + for (const char of text) { + if (char.codePointAt(0)! <= 127) { + asciiCount++; + } else { + nonAsciiCount++; + } + if (Math.ceil(asciiCount / 4) + nonAsciiCount > maxTokens) break; + end += char.length; + } + return text.slice(0, end); +} + +function truncateUserMessage(message: T, maxTokens: number): T { + const text = truncateTextToTokens(extractText(message.content), maxTokens); + // Truncating to text only drops any image/audio/video the oldest kept message + // carried: media cannot be partially truncated, and keeping it whole would + // overshoot the budget, so the boundary message loses its attachments. Recent + // messages that fit the budget are kept verbatim (media included); only this + // boundary message is affected. Spread the original to preserve every field + // (notably `origin`); clearing tool calls is safe (real user input never + // carries them). The cast back to `T` is unavoidable: TypeScript cannot prove + // the spread-then-override still equals T. + return { + ...message, + content: [{ type: 'text', text }], + toolCalls: [], + } as unknown as T; +} + +/** + * Keep the most recent user messages whose cumulative estimated size fits + * `maxTokens`. The oldest kept message is truncated to the remaining budget + * when it would otherwise overflow; older messages are dropped. + */ +export function selectRecentUserMessages( + messages: readonly T[], + maxTokens: number = COMPACT_USER_MESSAGE_MAX_TOKENS, +): T[] { + const selected: T[] = []; + let remaining = maxTokens; + for (let i = messages.length - 1; i >= 0 && remaining > 0; i--) { + const message = messages[i]!; + const tokens = estimateTokensForMessage(message); + if (tokens <= remaining) { + selected.push(message); + remaining -= tokens; + } else { + selected.push(truncateUserMessage(message, remaining)); + break; + } + } + selected.reverse(); + return selected; +} + +export function buildCompactionSummaryText(summary: string): string { + const suffix = summary.trim(); + return `${COMPACTION_SUMMARY_PREFIX}\n${suffix.length > 0 ? suffix : '(no summary available)'}`; +} diff --git a/packages/agent-core/src/agent/compaction/index.ts b/packages/agent-core/src/agent/compaction/index.ts index 4f92ac9fe..49978abf1 100644 --- a/packages/agent-core/src/agent/compaction/index.ts +++ b/packages/agent-core/src/agent/compaction/index.ts @@ -2,3 +2,4 @@ export * from './full'; export * from './micro'; export * from './strategy'; export * from './types'; +export * from './handoff'; diff --git a/packages/agent-core/src/agent/compaction/strategy.ts b/packages/agent-core/src/agent/compaction/strategy.ts index edf9132e0..d409d6e8d 100644 --- a/packages/agent-core/src/agent/compaction/strategy.ts +++ b/packages/agent-core/src/agent/compaction/strategy.ts @@ -1,43 +1,48 @@ -import type { Message } from "@moonshot-ai/kosong"; -import { estimateTokensForMessage } from "../../utils/tokens"; -import type { CompactionSource } from "./types"; +import type { CompactionSource } from './types'; export interface CompactionConfig { + /** Fraction of the model context window that triggers auto-compaction. */ triggerRatio: number; + /** Fraction of the model context window that blocks the turn on compaction. */ blockRatio: number; + /** Reserved output budget; compaction triggers early to leave this much room. */ reservedContextSize: number; + /** Maximum number of auto-compactions allowed in a single turn. */ maxCompactionPerTurn: number; - maxRecentMessages: number; - maxRecentUserMessages: number; - maxRecentSizeRatio: number; - minOverflowReductionRatio: number; + /** + * Consecutive provider-overflow recoveries (overflow -> compact -> overflow + * again) allowed in a single turn before giving up. Caps the loop when + * compaction can no longer shrink the request below the model window. + */ + maxOverflowCompactionAttempts: number; } +/** + * Auto-compact at 85% of the resolved context window. `blockRatio` matches + * `triggerRatio` so compaction runs synchronously with no background + * compaction. + */ export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = { triggerRatio: 0.85, - blockRatio: 0.85, // Same as triggerRatio to disable async compaction + blockRatio: 0.85, reservedContextSize: 50_000, maxCompactionPerTurn: Infinity, - maxRecentMessages: 4, - maxRecentUserMessages: Infinity, - maxRecentSizeRatio: 0.2, - minOverflowReductionRatio: 0.05, + maxOverflowCompactionAttempts: 3, }; export interface CompactionStrategy { shouldCompact(usedSize: number): boolean; shouldBlock(usedSize: number): boolean; - computeCompactCount(messages: readonly Message[], source: CompactionSource): number; - reduceCompactOnOverflow(messages: readonly Message[]): number; readonly checkAfterStep: boolean; readonly maxCompactionPerTurn: number; + readonly maxOverflowCompactionAttempts: number; } export class DefaultCompactionStrategy implements CompactionStrategy { constructor( protected readonly maxSizeProvider: () => number, - protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG - ) { } + protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG, + ) {} protected get maxSize(): number { return this.maxSizeProvider(); @@ -64,111 +69,6 @@ export class DefaultCompactionStrategy implements CompactionStrategy { return reservedSize > 0 && reservedSize < this.maxSize && usedSize + reservedSize >= this.maxSize; } - computeCompactCount(messages: readonly Message[], source: CompactionSource): number { - // Return value: N messages to be compacted (0 means no compaction possible) - // LLM Input: messages.slice(0, N) + [user:instruction] - // Preserved recent messages: messages.slice(N) - - // Manual compaction - if (source === 'manual') { - for (let i = messages.length - 1; i > 0; i--) { - if (canSplitAfter(messages, i)) { - return this.fitCompactCountToWindow(messages, i + 1); - } - } - return 0; - } - - // Auto compaction rules (in order of precedence): - // 1. The split after messages[N-1] must be safe per `canSplitAfter`: - // messages[N-1] is not a user or asst-with-tool-calls, and the retained - // suffix messages.slice(N) has no orphan tool result. - // 2. At least one recent message must be preserved - // 3. At most maxRecentMessages recent messages should be preserved - // 4. At most maxRecentUserMessages recent user messages should be preserved - // 5. At most maxRecentSizeRatio * maxSize recent messages should be preserved - // 6. N should be as small as possible - - let recentMessages = 1; - let recentUserMessages = 0; - let recentSize = 0; - let bestN: number | undefined; - - for (; recentMessages < messages.length; recentMessages++) { - const splitIndex = messages.length - recentMessages - 1; - const m2 = messages[messages.length - recentMessages]!; - - if (m2.role === 'user') { - recentUserMessages++; - } - recentSize += estimateTokensForMessage(m2); - - if (canSplitAfter(messages, splitIndex)) { - bestN = splitIndex + 1; - } - - const reachesMax = recentMessages >= this.config.maxRecentMessages - || recentUserMessages >= this.config.maxRecentUserMessages - || recentSize >= this.maxSize * this.config.maxRecentSizeRatio; - if (reachesMax && bestN !== undefined) { - break; - } - } - - return this.fitCompactCountToWindow(messages, bestN ?? 0); - } - - reduceCompactOnOverflow(messages: readonly Message[]): number { - const minReducedSize = Math.max( - 1, - Math.ceil(this.maxSize * this.config.minOverflowReductionRatio), - ); - let reducedSize = 0; - let bestN: number | undefined; - - for (let i = messages.length - 2; i > 0; i--) { - reducedSize += estimateTokensForMessage(messages[i + 1]!); - if (canSplitAfter(messages, i)) { - bestN = i + 1; - if (reducedSize >= minReducedSize) { - return i + 1; - } - } - } - return bestN ?? messages.length; - } - - private fitCompactCountToWindow( - messages: readonly Message[], - compactedCount: number, - ): number { - if (this.maxSize <= 0 || compactedCount <= 0) { - return compactedCount; - } - - let compactedSize = 0; - for (let i = 0; i < compactedCount; i++) { - compactedSize += estimateTokensForMessage(messages[i]!); - } - if (compactedSize <= this.maxSize) { - return compactedCount; - } - - let bestN: number | undefined; - for (let n = compactedCount - 1; n > 0; n--) { - compactedSize -= estimateTokensForMessage(messages[n]!); - if (!canSplitAfter(messages, n - 1)) { - continue; - } - bestN = n; - if (compactedSize <= this.maxSize) { - return n; - } - } - - return bestN ?? compactedCount; - } - get checkAfterStep(): boolean { return this.config.triggerRatio !== this.config.blockRatio; } @@ -176,45 +76,10 @@ export class DefaultCompactionStrategy implements CompactionStrategy { get maxCompactionPerTurn(): number { return this.config.maxCompactionPerTurn; } -} -/** - * Decide whether a compaction split is safe to place immediately after - * `messages[index]`. A split is safe only when: - * - `messages[index]` itself is not a user message or an assistant message - * with pending tool calls (cutting either of those off from what follows - * would break the conversation), AND - * - the next message is not a tool result. The history is well-formed: - * tool results only appear after their owning `asst_w_tc` and all tool - * results for one exchange land consecutively before the next non-tool - * message. So if the suffix starts with a tool result, its `asst_w_tc` - * must be in the compacted prefix, which would orphan that result - * (e.g. splitting between tool_a and tool_b of a parallel call), AND - * - the compacted prefix itself does not end with an unresolved tool - * exchange, because pending tool results must remain in the retained tail. - */ -function canSplitAfter(messages: readonly Message[], index: number): boolean { - const m = messages[index]; - if (m === undefined) return false; - if (m.role === 'user') return false; - if (m.role === 'assistant' && m.toolCalls.length > 0) return false; - if (messages[index + 1]?.role === 'tool') return false; - if (prefixEndsWithOpenToolExchange(messages, index)) return false; - return true; -} - -function prefixEndsWithOpenToolExchange(messages: readonly Message[], index: number): boolean { - if (messages[index]?.role !== 'tool') return false; - - let toolResultCount = 0; - for (let i = index; i >= 0; i--) { - const message = messages[i]; - if (message === undefined) return false; - if (message.role === 'tool') { - toolResultCount++; - continue; - } - return message.role === 'assistant' && message.toolCalls.length > toolResultCount; + get maxOverflowCompactionAttempts(): number { + return this.config.maxOverflowCompactionAttempts; } - return false; } + +export type { CompactionSource }; diff --git a/packages/agent-core/src/agent/compaction/types.ts b/packages/agent-core/src/agent/compaction/types.ts index 820365cdc..cef3c5308 100644 --- a/packages/agent-core/src/agent/compaction/types.ts +++ b/packages/agent-core/src/agent/compaction/types.ts @@ -1,10 +1,46 @@ export interface CompactionResult { + /** Human-facing summary text produced by the compaction model. */ summary: string; + /** + * Exact summary message stored in the live model context. It includes the + * compaction prefix that tells the next model this is handoff context rather + * than a real user prompt. Optional for backward compatibility with older + * wire records, where `summary` was also the model-context text. + */ + contextSummary?: string; compactedCount: number; tokensBefore: number; tokensAfter: number; + /** + * Number of real user messages kept verbatim ahead of the summary in the + * post-compaction live context. Written by `ContextMemory.applyCompaction` + * (the single derivation point for the post-compaction shape) so the + * wire-transcript reducer can reproduce the live folded length without + * re-deriving it from the full transcript. Optional for backward + * compatibility with older wire records. + */ + keptUserMessageCount?: number; + /** + * Number of oldest messages trimmed from the summarizer input when the + * compaction request itself overflowed the model window. These messages are + * not covered by the produced summary — a real-user message among them may + * still be retained verbatim in the live context via `keptUserMessageCount`, + * but assistant/tool messages are lost. Surfacing the count lets records and + * telemetry report the summary's blind spot honestly. Optional for backward + * compatibility with older wire records. + */ + droppedCount?: number; } +/** + * Inputs `ContextMemory.applyCompaction` needs to derive a `CompactionResult`. + * `tokensAfter` / `keptUserMessageCount` / `droppedCount` are optional: the live + * path fills in what it knows, while restore passes the persisted record so its + * historical values are preserved verbatim. + */ +export type CompactionInput = Pick & + Partial>; + export type CompactionSource = 'manual' | 'auto'; export interface CompactionBeginData { diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts index 8e2c699fb..15c2c64d3 100644 --- a/packages/agent-core/src/agent/context/index.ts +++ b/packages/agent-core/src/agent/context/index.ts @@ -3,10 +3,17 @@ import { createToolMessage, type ContentPart, type Message } from '@moonshot-ai/ import type { Agent } from '..'; import { ErrorCodes, KimiError } from '../../errors'; import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop'; -import { estimateTokensForMessages } from '../../utils/tokens'; +import { estimateTokens, estimateTokensForMessages } from '../../utils/tokens'; import { escapeXml } from '../../utils/xml-escape'; -import type { CompactionResult } from '../compaction'; -import { project, trimTrailingOpenToolExchange } from './projector'; +import { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + isRealUserInput, + selectRecentUserMessages, + type CompactionInput, + type CompactionResult, +} from '../compaction'; +import { project, type ProjectOptions, trimTrailingOpenToolExchange } from './projector'; import { USER_PROMPT_ORIGIN, type AgentContextData, @@ -172,7 +179,7 @@ export class ContextMemory { this._tokenCount -= estimateTokensForMessages([message]); } - if (isRealUserPrompt(message)) { + if (isRealUserInput(message)) { removedUserCount++; if (removedUserCount >= count) break; } @@ -205,7 +212,36 @@ export class ContextMemory { } } - applyCompaction(result: CompactionResult): void { + applyCompaction(input: CompactionInput): CompactionResult { + // Single derivation point for the post-compaction shape: the most recent + // real user messages (verbatim, within the token budget) followed by a + // user-role summary. `tokensAfter` and `keptUserMessageCount` are derived + // here from the actual `_history` so the live context, the wire record, + // and the transcript reducer all agree — re-deriving them elsewhere (e.g. + // from the full transcript, which still holds the untruncated originals of + // messages the live context truncated) would diverge. + const keptUserMessages = selectRecentUserMessages( + collectCompactableUserMessages(this._history), + COMPACT_USER_MESSAGE_MAX_TOKENS, + ); + // Live compaction omits these so they are derived from the actual + // `_history`; restore passes the persisted record so its historical values + // are preserved verbatim. Older wire records did not have `contextSummary`, + // so their `summary` remains the model-context text during restore. + const contextSummary = input.contextSummary ?? input.summary; + const tokensAfter = + input.tokensAfter ?? + estimateTokens(contextSummary) + estimateTokensForMessages(keptUserMessages); + const keptUserMessageCount = input.keptUserMessageCount ?? keptUserMessages.length; + const result: CompactionResult = { + summary: input.summary, + contextSummary, + compactedCount: input.compactedCount, + tokensBefore: input.tokensBefore, + tokensAfter, + keptUserMessageCount, + droppedCount: input.droppedCount, + }; this.agent.records.logRecord({ type: 'context.apply_compaction', ...result, @@ -213,27 +249,48 @@ export class ContextMemory { this.agent.replayBuilder.patchLast('compaction', { result: { summary: result.summary, + contextSummary: result.contextSummary, compactedCount: result.compactedCount, tokensBefore: result.tokensBefore, tokensAfter: result.tokensAfter, + keptUserMessageCount: result.keptUserMessageCount, + droppedCount: result.droppedCount, }, }); - this._history = [ - { - role: 'assistant', - content: [{ type: 'text', text: result.summary }], - toolCalls: [], - origin: { kind: 'compaction_summary' }, - }, - ...this._history.slice(result.compactedCount), - ]; + const summaryMessage: ContextMessage = { + role: 'user', + content: [{ type: 'text', text: contextSummary }], + toolCalls: [], + origin: { kind: 'compaction_summary' }, + }; + // Wire backward-compat: a pre-rework `context.apply_compaction` record (which + // has no `keptUserMessageCount`) used `[summary, ...history.slice(compactedCount)]` + // semantics and kept a verbatim recent tail. Reproduce that exact shape on + // restore so resuming a session compacted by an older version does not + // silently drop the recent assistant/tool tail beyond `compactedCount`. Gated + // on `records.restoring`, so the live/forward path — which always sets + // `contextSummary` and `keptUserMessageCount` — is unaffected. The projector's + // tool-adjacency repair keeps the restored tail well-formed for strict + // providers; compaction only runs at a clean step boundary, so the tail has no + // open tool exchange to track. + const isLegacyRestore = + this.agent.records.restoring !== null && + input.keptUserMessageCount === undefined && + input.compactedCount < this._history.length; + this._history = isLegacyRestore + ? [summaryMessage, ...this._history.slice(input.compactedCount)] + : [...keptUserMessages, summaryMessage]; this.openSteps.clear(); - this.flushDeferredMessagesIfToolExchangeClosed(); + this.pendingToolResultIds.clear(); + // Drop deferred messages (mostly injections/system reminders) instead of + // flushing them: initial context is rebuilt every turn. + this.deferredMessages = []; this._tokenCount = result.tokensAfter; this.tokenCountCoveredMessageCount = this._history.length; this.agent.microCompaction.reset(); - this.agent.injection.onContextCompacted(result.compactedCount); + this.agent.injection.onContextCompacted(); this.agent.emitStatusUpdated(); + return result; } data(): AgentContextData { @@ -256,8 +313,8 @@ export class ContextMemory { return this._history; } - project(messages: readonly ContextMessage[]): Message[] { - return project(this.agent.microCompaction.compact(messages)); + project(messages: readonly ContextMessage[], options?: ProjectOptions): Message[] { + return project(this.agent.microCompaction.compact(messages), options); } get messages(): Message[] { @@ -461,19 +518,6 @@ function isEmptyOutputText(output: string): boolean { return output.length === 0 || output.trim() === TOOL_OUTPUT_EMPTY_TEXT; } -function isRealUserPrompt(message: ContextMessage): boolean { - if (message.role !== 'user') return false; - const origin = message.origin; - if (origin === undefined || origin.kind === 'user') return true; - if (origin.kind === 'skill_activation') { - return origin.trigger === 'user-slash'; - } - if (origin.kind === 'plugin_command') { - return origin.trigger === 'user-slash'; - } - return false; -} - function formatUndoUnavailableMessage( requestedCount: number, undoableCount: number, diff --git a/packages/agent-core/src/agent/context/projector.ts b/packages/agent-core/src/agent/context/projector.ts index 02e574c3d..c10de2f9a 100644 --- a/packages/agent-core/src/agent/context/projector.ts +++ b/packages/agent-core/src/agent/context/projector.ts @@ -3,8 +3,96 @@ import type { ContentPart, Message, TextPart } from '@moonshot-ai/kosong'; import { ErrorCodes, KimiError } from '../../errors'; import type { ContextMessage } from './types'; -export function project(history: readonly ContextMessage[]): Message[] { - return mergeAdjacentUserMessages(history); +export interface ProjectOptions { + /** + * When `true`, emit a synthetic `tool_result` for any assistant `tool_use` + * whose result is not present in the provided messages. Used by full + * compaction, where the compacted prefix is a slice that may exclude a + * delayed result preserved in the retained tail; the synthetic result keeps + * the exchange closed so the summary request is not rejected. Leave `false` + * for normal turns, where a missing result means the call is still in-flight + * and must not be closed prematurely. + */ + readonly synthesizeMissing?: boolean; +} + +export function project(history: readonly ContextMessage[], options?: ProjectOptions): Message[] { + return repairToolExchangeAdjacency(mergeAdjacentUserMessages(history), options); +} + +// Strict providers (Anthropic) require every assistant `tool_use` to be answered +// by a matching `tool_result` in the immediately following message(s). A +// misordered history — where a `tool_result` is not adjacent to its `tool_use`, +// e.g. because a user message (background-task notification, flushed steer) +// landed in between, or because an interrupted / nested step delayed the result +// — is rejected with HTTP 400 ("`tool_use` without `tool_result` immediately +// after"). Micro compaction only exposed this latent misordering by busting the +// prompt cache and forcing a full revalidation. +// +// Repair the adjacency so every assistant `tool_use` is immediately followed by +// its matching `tool_result` message(s). Matching results are moved up from +// wherever they appear later in the history; any intervening messages keep their +// relative order and simply follow the repaired exchange. A tool call with no +// recorded result anywhere later in the history is left untouched by default — +// it is still in-flight (pending) rather than orphaned, and the +// trailing-open-exchange trim plus the interrupted-result synthesis during replay +// own those cases. With `synthesizeMissing`, a synthetic `tool_result` is emitted +// for such calls instead; full compaction uses this to keep a sliced prefix +// closed when a delayed result lives in the retained tail. This is purely a +// projection-time fix: the underlying history is left untouched, so replay and +// transcripts keep their original order, while the model always sees a +// well-formed tool exchange. +const SYNTHETIC_TOOL_RESULT_TEXT = + 'Tool result is not available in the current context. Do not assume the tool completed successfully.'; + +function repairToolExchangeAdjacency( + messages: readonly Message[], + options?: ProjectOptions, +): Message[] { + const out: Message[] = []; + const consumed = new Set(); + for (let i = 0; i < messages.length; i++) { + if (consumed.has(i)) continue; + const message = messages[i]!; + if (message.role !== 'assistant' || message.toolCalls.length === 0) { + out.push(message); + continue; + } + + out.push(message); + const pending = new Set(message.toolCalls.map((toolCall) => toolCall.id)); + for (let j = i + 1; j < messages.length && pending.size > 0; j++) { + if (consumed.has(j)) continue; + const next = messages[j]!; + const toolCallId = next.toolCallId; + if (next.role === 'tool' && toolCallId !== undefined && pending.has(toolCallId)) { + out.push(next); + consumed.add(j); + pending.delete(toolCallId); + } + } + if (options?.synthesizeMissing === true) { + // Close any tool call whose result is absent from the provided messages. + // Only used by full compaction, where the prefix is a slice that may + // exclude a delayed result preserved in the retained tail. For normal + // turns a missing result means the call is still in-flight, so it is left + // for the trailing-open-exchange trim and replay's interrupted-result + // synthesis instead of being closed here. + for (const missingId of pending) { + out.push(makeSyntheticToolResult(missingId)); + } + } + } + return out; +} + +function makeSyntheticToolResult(toolCallId: string): Message { + return { + role: 'tool', + content: [{ type: 'text', text: SYNTHETIC_TOOL_RESULT_TEXT }], + toolCalls: [], + toolCallId, + }; } function mergeAdjacentUserMessages(history: readonly ContextMessage[]): Message[] { diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts index bead3466f..3e841ef6f 100644 --- a/packages/agent-core/src/agent/index.ts +++ b/packages/agent-core/src/agent/index.ts @@ -14,7 +14,11 @@ import type { PluginCommandOrigin } from './context'; import type { McpConnectionManager } from '../mcp'; import { FlagResolver, type ExperimentalFlagResolver } from '../flags'; -import type { PreparedSystemPromptContext, ResolvedAgentProfile } from '../profile'; +import { + prepareSystemPromptContext, + type PreparedSystemPromptContext, + type ResolvedAgentProfile, +} from '../profile'; import type { ModelProvider } from '../session/provider-manager'; import type { SessionSubagentHost } from '../session/subagent-host'; import { noopTelemetryClient, type TelemetryClient } from '../telemetry'; @@ -86,6 +90,7 @@ export interface AgentOptions { readonly experimentalFlags?: ExperimentalFlagResolver; readonly replay?: ReplayBuilderOptions; readonly additionalDirs?: readonly string[]; + readonly systemPromptContextProvider?: (() => Promise) | undefined; } export class Agent { @@ -132,6 +137,9 @@ export class Agent { readonly replayBuilder: ReplayBuilder; private additionalDirs: readonly string[]; + private activeProfile?: ResolvedAgentProfile; + private brandHome?: string; + private readonly systemPromptContextProvider?: (() => Promise) | undefined; constructor(options: AgentOptions) { this.type = options.type ?? 'main'; @@ -151,6 +159,7 @@ export class Agent { this.telemetry = options.telemetry ?? noopTelemetryClient; this.experimentalFlags = options.experimentalFlags ?? new FlagResolver(); this.additionalDirs = normalizeAdditionalDirs(options.additionalDirs ?? []); + this.systemPromptContextProvider = options.systemPromptContextProvider; this.llmRequestLogger = new LlmRequestLogger(this.log); this.blobStore = options.homedir @@ -254,7 +263,41 @@ export class Agent { }); } - useProfile(profile: ResolvedAgentProfile, context?: PreparedSystemPromptContext): void { + useProfile( + profile: ResolvedAgentProfile, + context?: PreparedSystemPromptContext, + brandHome?: string, + ): void { + this.setActiveProfile(profile, brandHome); + this.updateSystemPromptFromProfile(profile, context); + this.tools.setActiveTools(profile.tools); + } + + setActiveProfile(profile: ResolvedAgentProfile, brandHome?: string): void { + this.activeProfile = profile; + this.brandHome = brandHome; + } + + /** + * Re-render the system prompt with freshly gathered runtime context (cwd + * listing, AGENTS.md, additional-dirs info, skill list). Called after + * compaction so the post-compaction turns do not keep a snapshot captured + * at session bootstrap. Invalidates the prompt-cache prefix by design. + */ + async refreshSystemPrompt(): Promise { + if (this.activeProfile === undefined) return; + const context = this.systemPromptContextProvider === undefined + ? await prepareSystemPromptContext(this.kaos, this.brandHome, { + additionalDirs: this.additionalDirs, + }) + : await this.systemPromptContextProvider(); + this.updateSystemPromptFromProfile(this.activeProfile, context); + } + + private updateSystemPromptFromProfile( + profile: ResolvedAgentProfile, + context?: PreparedSystemPromptContext, + ): void { const systemPrompt = profile.systemPrompt({ osEnv: this.kaos.osEnv, cwd: this.config.cwd, @@ -264,7 +307,6 @@ export class Agent { additionalDirsInfo: context?.additionalDirsInfo, }); this.config.update({ profileName: profile.name, systemPrompt }); - this.tools.setActiveTools(profile.tools); } async resume(options?: AgentRecordsReplayOptions): Promise<{ warning?: string }> { diff --git a/packages/agent-core/src/agent/injection/injector.ts b/packages/agent-core/src/agent/injection/injector.ts index 504e412de..d13e18159 100644 --- a/packages/agent-core/src/agent/injection/injector.ts +++ b/packages/agent-core/src/agent/injection/injector.ts @@ -9,11 +9,8 @@ export abstract class DynamicInjector { this.injectedAt = null; } - onContextCompacted(compactedCount: number): void { - if (this.injectedAt !== null) { - const newInjectedAt = this.injectedAt - compactedCount + 1; - this.injectedAt = newInjectedAt >= 0 ? newInjectedAt : null; - } + onContextCompacted(): void { + this.injectedAt = null; } onContextMessageRemoved(index: number): void { diff --git a/packages/agent-core/src/agent/injection/manager.ts b/packages/agent-core/src/agent/injection/manager.ts index 99c9cd07e..812aa6188 100644 --- a/packages/agent-core/src/agent/injection/manager.ts +++ b/packages/agent-core/src/agent/injection/manager.ts @@ -1,3 +1,5 @@ +import { formatTaskList } from '#/tools/background/task-list'; + import type { Agent } from '..'; import { GoalInjector } from './goal'; import type { DynamicInjector } from './injector'; @@ -6,6 +8,9 @@ import { PluginSessionStartInjector } from './plugin-session-start'; import { PlanModeInjector } from './plan-mode'; import { TodoListReminderInjector } from './todo-list'; +const ACTIVE_BACKGROUND_TASK_GUIDANCE = + 'The conversation was compacted, so the earlier messages that started these background tasks are gone — but the tasks are still running from before. Do not start duplicates. Use TaskOutput to fetch a task’s result, TaskList to list them, and TaskStop to cancel one.'; + export class InjectionManager { private readonly injectors: DynamicInjector[]; // Goal context is injected at continuation boundaries (turn start, each @@ -40,16 +45,40 @@ export class InjectionManager { await this.activeGoalInjector()?.inject(); } + async injectAfterCompaction(): Promise { + await this.injectGoal(); + this.injectActiveBackgroundTasks(); + await this.inject(); + } + + /** + * Post-compaction only: re-surface still-running background tasks. Folding the + * live context to [recent user prompts, summary] drops the messages that + * started them and their status updates, so without this the model can forget + * a task is running and spawn a duplicate. Appended as an `injection`-origin + * reminder, so the next compaction drops and rebuilds it — kept fresh, never + * stacked. Runs only on the live path: restore replays the persisted reminder + * and `FullCompaction.begin` short-circuits before compaction there. + */ + private injectActiveBackgroundTasks(): void { + const tasks = this.agent.background.list(true); + if (tasks.length === 0) return; + this.agent.context.appendSystemReminder( + `${ACTIVE_BACKGROUND_TASK_GUIDANCE}\n\n${formatTaskList(tasks, true)}`, + { kind: 'injection', variant: 'background_task_status' }, + ); + } + onContextClear(): void { for (const injector of this.lifecycleInjectors()) { injector.onContextClear(); } } - onContextCompacted(compactedCount: number): void { + onContextCompacted(): void { for (const injector of this.lifecycleInjectors()) { try { - injector.onContextCompacted(compactedCount); + injector.onContextCompacted(); } catch { continue; } diff --git a/packages/agent-core/src/agent/injection/permission-mode.ts b/packages/agent-core/src/agent/injection/permission-mode.ts index 638ed6760..ffe5389ad 100644 --- a/packages/agent-core/src/agent/injection/permission-mode.ts +++ b/packages/agent-core/src/agent/injection/permission-mode.ts @@ -15,13 +15,20 @@ const AUTO_MODE_EXIT_REMINDER = [ export class PermissionModeInjector extends DynamicInjector { protected override readonly injectionVariant = 'permission_mode'; private lastMode: PermissionMode | undefined; + private refreshAfterCompaction = false; + + override onContextCompacted(): void { + this.injectedAt = null; + this.refreshAfterCompaction = true; + } getInjection(): string | undefined { const mode = this.agent.permission.mode; const previousMode = this.lastMode; - if (mode === previousMode) return undefined; + if (!this.refreshAfterCompaction && mode === previousMode) return undefined; + this.refreshAfterCompaction = false; this.lastMode = mode; if (mode === 'auto') return AUTO_MODE_ENTER_REMINDER; if (previousMode === 'auto') return AUTO_MODE_EXIT_REMINDER; diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts index 0765e6ca4..08f2178d0 100644 --- a/packages/agent-core/src/agent/turn/index.ts +++ b/packages/agent-core/src/agent/turn/index.ts @@ -137,7 +137,11 @@ export class TurnFlow { input, origin, }); - if (this.activeTurn) { + // Buffer while a turn is active OR a manual compaction holds the context; + // `onCompactionFinished` replays the buffer once compaction's full lifecycle + // (summary + reinjection) is done. Returning null means "buffered" — which is + // exactly what fire-and-forget callers (background notifications, cron) assume. + if (this.activeTurn || this.agent.fullCompaction.isCompacting) { this.steerBuffer.push({ input, origin }); return null; } @@ -161,6 +165,18 @@ export class TurnFlow { return null; } + // While a manual/SDK compaction holds the context, defer the launch instead + // of rejecting it: buffer the input and replay it from `onCompactionFinished` + // once compaction's full lifecycle (summary + reinjection) completes. The + // deferred turn's eventual `turn.started` lets PromptService associate the + // pending prompt, so a prompt submitted mid-compaction completes normally + // rather than getting stuck "running". (Auto compaction runs inside an active + // turn, so the `activeTurn` check above already covers it.) + if (this.agent.fullCompaction.isCompacting) { + this.steerBuffer.push({ input, origin }); + return null; + } + // Per-turn setup (telemetry, usage window, `turn.started`, appending the // prompt) now lives in `runOneTurn`, so a goal-driven run emits a clean // start/end pair per continuation turn rather than one mega-turn. @@ -289,6 +305,25 @@ export class TurnFlow { return true; } + /** + * Replay inputs (prompts or steers) that were deferred while a manual compaction + * held the context. Called by `FullCompaction` once the compaction lifecycle + * (summary + reinjection) is done — and on cancel/failure — so deferred input is + * never lost or stuck. If a turn is somehow already active (e.g. one that raced + * and cancelled the compaction), let it consume the buffer like any other steer; + * otherwise launch a fresh turn from the first buffered item, with the rest + * draining into it via `flushSteerBuffer`. + */ + onCompactionFinished(): void { + if (this.steerBuffer.length === 0) return; + if (this.activeTurn !== null) { + this.flushSteerBuffer(); + return; + } + const next = this.steerBuffer.shift()!; + this.launch(next.input, next.origin); + } + finishResume(): void { if (this.activeTurn === 'resuming') { this.activeTurn = null; @@ -662,9 +697,15 @@ export class TurnFlow { }, hooks: { beforeStep: async ({ signal: stepSignal }) => { - this.flushSteerBuffer(); this.agent.microCompaction.detect(); await this.agent.fullCompaction.beforeStep(stepSignal); + // Flush steered messages (background-task / cron notifications, + // user interrupts) AFTER compaction so they land in the + // post-compaction context instead of being dropped by it. The + // keep/drop decision lives in + // `compactionUserMessageDisposition()`; these origins are not + // re-injected later, so append them only after compaction runs. + this.flushSteerBuffer(); await this.agent.injection.inject(); deduper.beginStep(); return; diff --git a/packages/agent-core/src/flags/registry.ts b/packages/agent-core/src/flags/registry.ts index 16f88d592..fcce75ece 100644 --- a/packages/agent-core/src/flags/registry.ts +++ b/packages/agent-core/src/flags/registry.ts @@ -17,7 +17,7 @@ export const FLAG_DEFINITIONS = [ title: 'Micro compaction', description: 'Trim older large tool results from context while keeping recent conversation intact.', env: 'KIMI_CODE_EXPERIMENTAL_MICRO_COMPACTION', - default: true, + default: false, surface: 'core', }, ] as const satisfies readonly FlagDefinitionInput[]; diff --git a/packages/agent-core/src/index.ts b/packages/agent-core/src/index.ts index 14dcec22a..ae63a8604 100644 --- a/packages/agent-core/src/index.ts +++ b/packages/agent-core/src/index.ts @@ -62,6 +62,12 @@ export type { export { AGENT_WIRE_PROTOCOL_VERSION } from './agent/records'; export type { AgentConfigUpdateData } from './agent/config'; export type { CompactionBeginData, CompactionResult } from './agent/compaction'; +export { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + isRealUserInput, + selectRecentUserMessages, +} from './agent/compaction'; export type { PermissionApprovalResultRecord, PermissionMode, diff --git a/packages/agent-core/src/profile/default/system.md b/packages/agent-core/src/profile/default/system.md index d1102d395..9934290dc 100644 --- a/packages/agent-core/src/profile/default/system.md +++ b/packages/agent-core/src/profile/default/system.md @@ -143,4 +143,5 @@ At any time, you should be HELPFUL, CONCISE, ACCURATE, and CANDID. Be thorough i - Deliver the complete change. Never stub out code with placeholders like `// ... rest unchanged` or leave the user to fill in the gaps; write out every line you mean to change. - After a change, sweep for comments and docstrings that now describe the old behavior, and bring them in line with what the code actually does. - Before calling a task done, verify it: run the checks that cover your change and look at the result instead of assuming. Don't mark work complete while tests are red or the implementation is still partial — this holds whether or not you are tracking the work in a `TodoList`. +- When the context fills up it is compacted automatically, so you may suddenly see a summary of the work so far in place of the full thread. Assume compaction happened while you were working: continue naturally from the summary instead of restarting, and make reasonable assumptions about anything it omits rather than redoing settled work. Treat any "done" it reports as unverified until you re-check. - Before you finalize a reply, re-read the user's latest request and confirm you are answering that one — not an earlier ask left over from a resume, interruption, mid-task steer, or context compaction. diff --git a/packages/agent-core/src/services/message/transcript.ts b/packages/agent-core/src/services/message/transcript.ts index 5003e39e2..a8563370e 100644 --- a/packages/agent-core/src/services/message/transcript.ts +++ b/packages/agent-core/src/services/message/transcript.ts @@ -3,8 +3,10 @@ * agent from its `wire.jsonl` record log. * * Why: `ContextMemory.applyCompaction` rewrites the in-memory history as - * `[compaction_summary, ...tail]`, so `getContext().history` only reflects the - * model's CURRENT context. The wire log, however, keeps every record. The TUI + * `[...keptUserMessages, compaction_summary]` (the most recent real user + * prompts, verbatim within a token budget, followed by a single user-role + * summary), so `getContext().history` only reflects the model's CURRENT + * context. The wire log, however, keeps every record. The TUI * shows the full transcript on resume because `ReplayBuilder` captures every * `pushHistory` during record replay and is never folded by compaction. This * module reproduces that exact view for daemon REST consumers (web), without @@ -19,8 +21,11 @@ * open assistant message; tool.result appends a * tool message with the same `` status * wrapping as `toolResultOutputForModel` - * - `context.apply_compaction` → keep the prefix, insert the summary message - * at the fold point (origin `compaction_summary`) + * - `context.apply_compaction` → keep the full history, append the + * user-role summary marker (origin + * `compaction_summary`), and recover + * `foldedLength` from the recorded + * `keptUserMessageCount` * - `context.undo` → remove tail messages exactly like * `ContextMemory.undo` (skip injections, stop at * compaction summaries / `context.clear` floors) @@ -45,6 +50,12 @@ import path from 'node:path'; import type { AgentRecord } from '../../agent/records'; import type { ContextMessage } from '../../agent/context'; import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop'; +import { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + isRealUserInput, + selectRecentUserMessages, +} from '../../agent/compaction'; type ContentPart = ContextMessage['content'][number]; @@ -212,7 +223,7 @@ export function reduceWireRecords(records: Iterable): { if (message.origin?.kind === 'compaction_summary') break; transcript.splice(i, 1); foldedLength = Math.max(0, foldedLength - 1); - if (isRealUserPrompt(message)) { + if (isRealUserInput(message)) { removedUserCount++; if (removedUserCount >= count) break; } @@ -238,22 +249,58 @@ export function reduceWireRecords(records: Iterable): { applyLoopEvent(record.event, record.time); break; case 'context.apply_compaction': { - // ContextMemory drops history[0..compactedCount] and prepends the - // summary; we keep the prefix and insert the summary at the fold - // point so the transcript shows both. - const tailLength = Math.max(0, foldedLength - record.compactedCount); - transcript.splice(Math.max(0, transcript.length - tailLength), 0, { + // Mirrors ContextMemory.applyCompaction: the live context becomes the + // most recent user messages followed by a user-role summary. The + // transcript keeps the full history and appends the summary marker; + // foldedLength tracks the post-compaction live context length. + transcript.push({ message: { - role: 'assistant', + role: 'user', content: [{ type: 'text', text: record.summary }], toolCalls: [], origin: { kind: 'compaction_summary' }, }, time: record.time, }); - foldedLength = tailLength + 1; - openSteps.clear(); - flushDeferredIfToolExchangeClosed(); + // Prefer the kept-user count recorded by the live + // ContextMemory.applyCompaction. Re-deriving it from the full + // transcript would diverge from the live context: the transcript still + // holds the untruncated originals of messages the live context may + // have truncated, and (after a clear) messages the live context no + // longer has. Only fall back to re-deriving for legacy wire records + // that predate the field. + if (record.keptUserMessageCount !== undefined) { + foldedLength = record.keptUserMessageCount + 1; + } else if (record.compactedCount < foldedLength) { + // Legacy record (predates keptUserMessageCount) that kept + // history.slice(compactedCount) verbatim. Mirror ContextMemory's + // legacy restore ([summary, ...tail]): `foldedLength` here still holds + // the pre-compaction live length, so the post-compaction length is the + // summary plus the tail kept after compactedCount. Re-deriving the + // kept-user count instead would diverge from the live context (and + // make MessageService mis-handle the messages endpoint for old sessions). + foldedLength = 1 + (foldedLength - record.compactedCount); + } else { + // Legacy record whose compactedCount covered the whole live history (no + // tail, matching live restore's `compactedCount < length` guard): fall + // back to the new kept-user + summary derivation. Derive only from + // entries at or after `clearFloor` — the live ContextMemory rebuilds + // `_history` from the post-`/clear` messages only, so counting pre-clear + // prompts here would overstate foldedLength and make MessageService skip + // unflushed live tail messages for old sessions compacted after a clear. + const keptUserMessages = selectRecentUserMessages( + collectCompactableUserMessages( + transcript.slice(clearFloor).map((entry) => entry.message), + ), + COMPACT_USER_MESSAGE_MAX_TOKENS, + ); + foldedLength = keptUserMessages.length + 1; + } + // Drop any open tool exchange and deferred messages exactly like + // ContextMemory.applyCompaction: late tool results become orphans and + // deferred injections are not rebuilt, so pending ids must not strand + // later appends in `deferred`. + resetOpenState(); break; } case 'context.undo': @@ -272,20 +319,6 @@ export function reduceWireRecords(records: Iterable): { return { entries: transcript as TranscriptEntry[], foldedLength }; } -/** Mirrors agent-core's `isRealUserPrompt` (context undo accounting). */ -function isRealUserPrompt(message: MutableMessage): boolean { - if (message.role !== 'user') return false; - const origin = message.origin; - if (origin === undefined || origin.kind === 'user') return true; - if (origin.kind === 'skill_activation') { - return origin.trigger === 'user-slash'; - } - if (origin.kind === 'plugin_command') { - return origin.trigger === 'user-slash'; - } - return false; -} - /** Mirrors agent-core's `toolResultOutputForModel` + `createToolMessage`. */ function toolResultContent(result: ExecutableToolResult): ContentPart[] { const output = result.output; diff --git a/packages/agent-core/src/services/session/sessionService.ts b/packages/agent-core/src/services/session/sessionService.ts index da1eb4fa8..009a63b80 100644 --- a/packages/agent-core/src/services/session/sessionService.ts +++ b/packages/agent-core/src/services/session/sessionService.ts @@ -1,6 +1,7 @@ import { Disposable, IInstantiationService, InstantiationType, registerSingleton } from '../../di'; import { Emitter } from '../../base/common/event'; import { ErrorCodes, KimiError } from '../../errors'; +import { isRealUserInput } from '../../agent/compaction'; import type { AgentContextData, ContextMessage } from '../../agent/context'; import type { JsonObject, ListSessionsPayload, SessionSummary } from '../../rpc'; import type { SessionMeta } from '../../session'; @@ -59,7 +60,7 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool if (message === undefined) continue; if (message.origin?.kind === 'injection') continue; if (message.origin?.kind === 'compaction_summary') return false; - if (isRealUserPrompt(message)) { + if (isRealUserInput(message)) { found++; if (found >= count) return true; } @@ -67,19 +68,6 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool return false; } -function isRealUserPrompt(message: ContextMessage): boolean { - if (message.role !== 'user') return false; - const origin = message.origin; - if (origin === undefined || origin.kind === 'user') return true; - if (origin.kind === 'skill_activation') { - return origin.trigger === 'user-slash'; - } - if (origin.kind === 'plugin_command') { - return origin.trigger === 'user-slash'; - } - return false; -} - function pageContextMessages( sessionId: string, sessionCreatedAtMs: number, diff --git a/packages/agent-core/src/session/index.ts b/packages/agent-core/src/session/index.ts index ab68824f9..6b64dab19 100644 --- a/packages/agent-core/src/session/index.ts +++ b/packages/agent-core/src/session/index.ts @@ -473,7 +473,7 @@ export class Session { this.options.kimiHomeDir, { additionalDirs: this.additionalDirs }, ); - agent.useProfile(profile, context); + agent.useProfile(profile, context, this.options.kimiHomeDir); const { agentsMdWarning } = context; if (agentsMdWarning !== undefined) { this.agentsMdWarning = agentsMdWarning; @@ -725,7 +725,8 @@ export class Session { ): Agent { const parentAgent = parentAgentId !== null ? this.getReadyAgent(parentAgentId) : undefined; const cwd = parentAgent?.config.cwd ?? this.toolKaos.getcwd(); - return new Agent({ + let agent!: Agent; + agent = new Agent({ ...config, type, kaos: this.toolKaos.withCwd(cwd), @@ -745,7 +746,14 @@ export class Session { pluginCommands: type === 'main' ? this.options.pluginCommands : undefined, experimentalFlags: this.experimentalFlags, additionalDirs: parentAgent?.getAdditionalDirs() ?? this.additionalDirs, + systemPromptContextProvider: () => + prepareSystemPromptContext( + this.systemContextKaos(agent.kaos.getcwd()), + this.options.kimiHomeDir, + { additionalDirs: agent.getAdditionalDirs() }, + ), }); + return agent; } private permissionOptions( @@ -818,6 +826,7 @@ export class Session { try { const agent = this.instantiateAgent(id, meta.homedir, meta.type, {}, parentAgentId); const result = await agent.resume(); + this.restoreAgentProfileHandle(agent, meta, parent?.agent); this.agents.set(id, agent); return { agent, warning: parent?.warning ?? result.warning }; } catch (error) { @@ -829,6 +838,34 @@ export class Session { } } + private restoreAgentProfileHandle( + agent: Agent, + meta: AgentMeta, + parentAgent: Agent | undefined, + ): void { + if (agent.config.systemPrompt === '') return; + const profile = this.resolvePersistedProfile(agent, meta, parentAgent); + if (profile === undefined) return; + agent.setActiveProfile(profile, this.options.kimiHomeDir); + } + + private resolvePersistedProfile( + agent: Agent, + meta: AgentMeta, + parentAgent: Agent | undefined, + ): ResolvedAgentProfile | undefined { + const profileName = agent.config.profileName; + if (profileName === undefined) return undefined; + if (meta.type === 'sub') { + const parentProfileName = parentAgent?.config.profileName; + return ( + DEFAULT_AGENT_PROFILES[parentProfileName ?? 'agent']?.subagents?.[profileName] ?? + DEFAULT_AGENT_PROFILES['agent']?.subagents?.[profileName] + ); + } + return DEFAULT_AGENT_PROFILES[profileName]; + } + private nextGeneratedAgentId(): string { while (true) { const id = `agent-${this.agentIdCounter++}`; diff --git a/packages/agent-core/src/session/subagent-host.ts b/packages/agent-core/src/session/subagent-host.ts index 1e6e249cf..7aa81fdf3 100644 --- a/packages/agent-core/src/session/subagent-host.ts +++ b/packages/agent-core/src/session/subagent-host.ts @@ -374,7 +374,7 @@ export class SessionSubagentHost { this.session.options.kimiHomeDir, { additionalDirs: child.getAdditionalDirs() }, ); - child.useProfile(profile, context); + child.useProfile(profile, context, this.session.options.kimiHomeDir); child.tools.inheritUserTools(parent.tools); } diff --git a/packages/agent-core/src/tools/background/task-list.ts b/packages/agent-core/src/tools/background/task-list.ts index 2d39e7972..a1bdb1489 100644 --- a/packages/agent-core/src/tools/background/task-list.ts +++ b/packages/agent-core/src/tools/background/task-list.ts @@ -34,7 +34,7 @@ export type TaskListInput = z.Infer; // ── Implementation ─────────────────────────────────────────────────── -function formatTaskList(tasks: BackgroundTaskInfo[], activeOnly: boolean): string { +export function formatTaskList(tasks: BackgroundTaskInfo[], activeOnly: boolean): string { // `active_only=false` mixes in terminal/lost tasks, so the count is no // longer purely "active" — use a neutral label to avoid mislabeling them. const label = activeOnly ? 'active_background_tasks' : 'background_tasks'; diff --git a/packages/agent-core/src/utils/tokens.ts b/packages/agent-core/src/utils/tokens.ts index fe567f732..845e2024b 100644 --- a/packages/agent-core/src/utils/tokens.ts +++ b/packages/agent-core/src/utils/tokens.ts @@ -1,6 +1,19 @@ import type { ContentPart, Message, Tool } from '@moonshot-ai/kosong'; -const messageTokenEstimateCache = new WeakMap(); +/** + * Structural subset of kosong's {@link Message} that token estimation reads. + * Accepting the subset (instead of the full `Message`) lets callers with + * message-shaped objects — such as the compaction helpers in `handoff.ts`, + * which carry only `role`/`content`/`origin` — estimate tokens without an + * unsafe cast, while full `Message` values still satisfy it. + */ +interface TokenEstimatableMessage { + readonly role: string; + readonly content: readonly ContentPart[]; + readonly toolCalls?: readonly { readonly name: string; readonly arguments: unknown }[]; +} + +const messageTokenEstimateCache = new WeakMap(); /** * Estimate token count from text using a character-based heuristic. @@ -41,7 +54,7 @@ export function estimateTokensForTools(tools: readonly Tool[]): number { return total; } -export function estimateTokensForMessage(message: Message): number { +export function estimateTokensForMessage(message: TokenEstimatableMessage): number { const cached = messageTokenEstimateCache.get(message); if (cached !== undefined) { return cached; @@ -67,11 +80,35 @@ export function estimateTokensForContentParts(parts: readonly ContentPart[]): nu return total; } +/** + * Transient per-part token floor for media (image/audio/video) whose real size + * cannot be cheaply derived from a data URL without decoding it. Mirrors the + * fixed ~2000-tokens-per-image estimate used elsewhere in the industry and, by + * the same reasoning, deliberately does NOT count the base64 payload as text — + * that would wildly over-count (a few MB of data URL would read as ~1M tokens). + * The value is transient: the next LLM round-trip returns the real usage and + * supersedes it. Its only job is to stop compaction triggers, the + * overflow-shrink budget, the kept-user budget, and `tokensAfter` from treating + * media parts as free. + */ +export const MEDIA_TOKEN_ESTIMATE = 2000; + export function estimateTokensForContentPart(part: ContentPart): number { - if (part.type === 'text') { - return estimateTokens(part.text); - } else if (part.type === 'think') { - return estimateTokens(part.think); + switch (part.type) { + case 'text': + return estimateTokens(part.text); + case 'think': + return estimateTokens(part.think); + case 'image_url': + case 'audio_url': + case 'video_url': + return MEDIA_TOKEN_ESTIMATE; + default: { + // Exhaustiveness guard: a new ContentPart kind must declare its estimate + // here rather than silently counting as 0 (the CMP-03 defect). + const _exhaustive: never = part; + void _exhaustive; + return 0; + } } - return 0; } diff --git a/packages/agent-core/test/agent/basic.test.ts b/packages/agent-core/test/agent/basic.test.ts index 1c9bfec61..2ecaf615e 100644 --- a/packages/agent-core/test/agent/basic.test.ts +++ b/packages/agent-core/test/agent/basic.test.ts @@ -9,7 +9,7 @@ it('creates an independent agent with a scoped experimental flag resolver', () = experimentalFlags: new FlagResolver({}, FLAG_DEFINITIONS), }); - expect(ctx.agent.experimentalFlags.enabled('micro_compaction')).toBe(true); + expect(ctx.agent.experimentalFlags.enabled('micro_compaction')).toBe(false); }); it('runs a text-only agent turn from prompt to completion', async () => { diff --git a/packages/agent-core/test/agent/compaction/anthropic-compliance.test.ts b/packages/agent-core/test/agent/compaction/anthropic-compliance.test.ts new file mode 100644 index 000000000..521f0cbd3 --- /dev/null +++ b/packages/agent-core/test/agent/compaction/anthropic-compliance.test.ts @@ -0,0 +1,248 @@ +// Anthropic-compliance smoke tests for compaction. +// +// Anthropic (and strict Anthropic-compatible backends) reject a request unless +// roles strictly alternate user/assistant AND every assistant `tool_use` is +// answered by a matching `tool_result` in the immediately following message. +// Compaction's output and its summarizer request must satisfy both — but the +// guarantee spans two layers: the projector merges only `origin.kind === 'user'` +// messages, so the user-role summary, skill/plugin activations, and injected +// reminders stay as CONSECUTIVE user messages in the projected output, and it is +// the Anthropic provider's own consecutive-user merge that finally collapses +// them. Tool pairing likewise depends on the projector's adjacency repair and +// (for the summarizer request) synthetic results for still-open calls. +// +// These tests drive the real compaction/projection functions, run their output +// through the real AnthropicChatProvider conversion, and assert the wire request +// is well-formed — so a regression in any single layer turns red here. +import { createProvider } from '@moonshot-ai/kosong'; +import type { Message, Tool } from '@moonshot-ai/kosong'; +import { describe, expect, it, vi } from 'vitest'; + +import type { ContextMessage } from '../../../src/agent/context'; +import { testAgent } from '../harness/agent'; + +const PROVIDER = { type: 'kimi', apiKey: 'test-key', model: 'kimi-code' } as const; +const CAPS = { + image_in: true, + video_in: true, + audio_in: false, + thinking: true, + tool_use: true, + max_context_tokens: 256_000, +} as const; + +type WireBlock = { type: string; id?: string; tool_use_id?: string; text?: string }; +type WireMessage = { role: string; content: WireBlock[] }; + +function makeAnthropicResponse() { + return { + id: 'msg_test_smoke', + type: 'message', + role: 'assistant', + model: 'k25', + content: [{ type: 'text', text: 'ok' }], + stop_reason: 'end_turn', + usage: { input_tokens: 1, output_tokens: 1 }, + }; +} + +/** + * Convert a projected `Message[]` through the real Anthropic provider and return + * the wire `messages` it would POST — mirroring kosong's own captureRequestBody. + */ +async function toAnthropicWire(history: Message[], tools: Tool[] = []): Promise { + const provider = createProvider({ + type: 'anthropic', + model: 'k25', + apiKey: 'test-key', + defaultMaxTokens: 1024, + stream: false, + }); + let captured: { messages?: WireMessage[] } | undefined; + (provider as unknown as { _client: { messages: { create: unknown } } })._client.messages.create = + vi.fn().mockImplementation((params: unknown) => { + captured = params as { messages?: WireMessage[] }; + return Promise.resolve(makeAnthropicResponse()); + }); + + const stream = await provider.generate('', tools, history); + for await (const part of stream) { + void part; + } + if (captured?.messages === undefined) { + throw new Error('Expected provider.generate() to call messages.create with messages'); + } + return captured.messages; +} + +/** Assert the wire request satisfies Anthropic's alternation + tool-pairing rules. */ +function assertValidAnthropic(messages: WireMessage[]): void { + expect(messages.length).toBeGreaterThan(0); + expect(messages[0]!.role).toBe('user'); + + for (let i = 1; i < messages.length; i++) { + expect( + messages[i]!.role, + `roles must alternate, but messages[${String(i - 1)}] and [${String(i)}] are both ${messages[i]!.role}`, + ).not.toBe(messages[i - 1]!.role); + } + + for (let i = 0; i < messages.length; i++) { + const message = messages[i]!; + for (const block of message.content) { + if (block.type === 'tool_use') { + expect(message.role, 'tool_use must be on an assistant message').toBe('assistant'); + const next = messages[i + 1]; + const answered = + next?.content.some((b) => b.type === 'tool_result' && b.tool_use_id === block.id) ?? false; + expect(answered, `tool_use ${String(block.id)} must be answered in the next message`).toBe( + true, + ); + } + if (block.type === 'tool_result') { + expect(message.role, 'tool_result must be on a user message').toBe('user'); + const prev = messages[i - 1]; + const hasUse = + prev?.content.some((b) => b.type === 'tool_use' && b.id === block.tool_use_id) ?? false; + expect( + hasUse, + `tool_result ${String(block.tool_use_id)} must immediately follow its tool_use`, + ).toBe(true); + } + } + } +} + +const BASH_TOOL: Tool = { + name: 'Bash', + description: 'Run a shell command', + parameters: { type: 'object', properties: { command: { type: 'string' } } }, +}; + +describe('compaction — Anthropic wire compliance', () => { + it('post-compaction context plus a follow-up tool turn is a valid Anthropic request', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + // A couple of real user prompts so some survive compaction verbatim. + ctx.appendExchange(1, 'first request', 'assistant one', 40); + ctx.appendExchange(2, 'second request', 'assistant two', 40); + + ctx.agent.context.applyCompaction({ + summary: 'Working summary.', + compactedCount: ctx.agent.context.history.length, + tokensBefore: 100, + }); + // A follow-up turn that calls a tool, appended after the summary. + ctx.appendToolExchange(); + + const wire = await toAnthropicWire(ctx.agent.context.messages, [BASH_TOOL]); + // [merged kept users + summary + new user] -> one user; then assistant + // tool_use; then user tool_result. + assertValidAnthropic(wire); + expect(wire.some((m) => m.content.some((b) => b.type === 'tool_use'))).toBe(true); + expect(wire.some((m) => m.content.some((b) => b.type === 'tool_result'))).toBe(true); + }); + + it('collapses mixed-origin kept users and the summary into a single Anthropic user turn', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + // Genuine user input the projector merges, plus a user-slash skill activation + // it does NOT merge (different origin) — both kept by compaction. + ctx.agent.context.appendUserMessage([{ type: 'text', text: 'real prompt' }], { kind: 'user' }); + ctx.agent.context.appendUserMessage([{ type: 'text', text: '/do-thing' }], { + kind: 'skill_activation', + activationId: 'a1', + skillName: 'do-thing', + trigger: 'user-slash', + }); + + ctx.agent.context.applyCompaction({ + summary: 'Working summary.', + compactedCount: ctx.agent.context.history.length, + tokensBefore: 100, + }); + + // Projected output still has consecutive user messages (skill + summary are + // not merged by the projector); only the Anthropic merge collapses them. + const projected = ctx.agent.context.messages; + expect(projected.filter((m) => m.role === 'user').length).toBeGreaterThan(1); + + const wire = await toAnthropicWire(projected); + assertValidAnthropic(wire); + expect(wire).toHaveLength(1); + expect(wire[0]!.role).toBe('user'); + }); + + it('keeps the request valid across repeated compactions', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.appendExchange(1, 'first request', 'assistant one', 40); + ctx.agent.context.applyCompaction({ + summary: 'First summary.', + compactedCount: ctx.agent.context.history.length, + tokensBefore: 100, + }); + ctx.appendExchange(2, 'second request', 'assistant two', 40); + ctx.agent.context.applyCompaction({ + summary: 'Second summary.', + compactedCount: ctx.agent.context.history.length, + tokensBefore: 100, + }); + ctx.appendToolExchange(); + + const wire = await toAnthropicWire(ctx.agent.context.messages, [BASH_TOOL]); + assertValidAnthropic(wire); + }); + + it('produces a valid summarizer request when a tool result is non-adjacent to its call', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + // A background-task notification (user role) landed between the tool call and + // its result, so they are non-adjacent in history. + const messy: ContextMessage[] = [ + { role: 'user', content: [{ type: 'text', text: 'run it' }], toolCalls: [], origin: { kind: 'user' } }, + { + role: 'assistant', + content: [{ type: 'text', text: 'calling' }], + toolCalls: [{ type: 'function', id: 'call_1', name: 'Bash', arguments: '{"command":"ls"}' }], + }, + { + role: 'user', + content: [{ type: 'text', text: 'background task finished' }], + toolCalls: [], + origin: { kind: 'background_task', taskId: 't', status: 'completed', notificationId: 'n' }, + }, + { role: 'tool', content: [{ type: 'text', text: 'a.ts b.ts' }], toolCalls: [], toolCallId: 'call_1' }, + ]; + + // Mirrors FullCompaction's summarizer projection. + const projected = ctx.agent.context.project(messy, { synthesizeMissing: true }); + const wire = await toAnthropicWire(projected, [BASH_TOOL]); + assertValidAnthropic(wire); + }); + + it('closes a still-open tool call in the summarizer request with a synthetic result', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + // History ends on an assistant tool call whose result never arrived (sliced + // out by overflow shrink, or interrupted) — a dangling tool_use. + const dangling: ContextMessage[] = [ + { role: 'user', content: [{ type: 'text', text: 'do it' }], toolCalls: [], origin: { kind: 'user' } }, + { + role: 'assistant', + content: [{ type: 'text', text: 'calling' }], + toolCalls: [{ type: 'function', id: 'call_x', name: 'Bash', arguments: '{}' }], + }, + ]; + + const projected = ctx.agent.context.project(dangling, { synthesizeMissing: true }); + const wire = await toAnthropicWire(projected, [BASH_TOOL]); + assertValidAnthropic(wire); + // The dangling call is closed by a synthetic tool_result. + const lastUser = wire.at(-1)!; + expect(lastUser.role).toBe('user'); + expect(lastUser.content.some((b) => b.type === 'tool_result' && b.tool_use_id === 'call_x')).toBe( + true, + ); + }); +}); diff --git a/packages/agent-core/test/agent/compaction/compaction-scenarios.test.ts b/packages/agent-core/test/agent/compaction/compaction-scenarios.test.ts new file mode 100644 index 000000000..dd22ec6c5 --- /dev/null +++ b/packages/agent-core/test/agent/compaction/compaction-scenarios.test.ts @@ -0,0 +1,433 @@ +// Compaction scenario + probe tests. +// +// Two kinds of tests live here: +// * GUARD tests lock in behavior we rely on (so future refactors can't +// silently regress it). +// * PROBE tests exercise the high-risk scenarios surfaced in review and in +// our own audit, asserting the DESIRED behavior. Where the current +// implementation does NOT meet that bar, the probe is marked `it.fails`: +// the suite stays green, but the test documents the exact defect and will +// start failing (forcing its removal) the day the behavior is fixed. +// +// Compaction is a hot path, so these intentionally drive the real +// Agent/ContextMemory/FullCompaction machinery through the test harness rather +// than mocking it. +import type { ContentPart, Message } from '@moonshot-ai/kosong'; +import { describe, expect, it } from 'vitest'; + +import type { AgentOptions } from '../../../src/agent'; +import { COMPACTION_SUMMARY_PREFIX } from '../../../src/agent/compaction'; +import type { ContextMessage } from '../../../src/agent/context'; +import { FLAG_DEFINITIONS, FlagResolver } from '../../../src/flags'; +import { testAgent, type TestAgentContext } from '../harness/agent'; + +type GenerateFn = NonNullable; + +const PROVIDER = { type: 'kimi', apiKey: 'test-key', model: 'kimi-code' } as const; +const CAPS = { + image_in: true, + video_in: true, + audio_in: false, + thinking: true, + tool_use: true, + max_context_tokens: 256_000, +} as const; + +function textResult(text: string): Awaited> { + return { + id: 'mock-compaction-summary', + message: { role: 'assistant', content: [{ type: 'text', text }], toolCalls: [] }, + usage: { inputOther: 1, output: 1, inputCacheRead: 0, inputCacheCreation: 0 }, + finishReason: 'completed', + rawFinishReason: 'stop', + }; +} + +function historyTexts(ctx: TestAgentContext): string[] { + return ctx.agent.context.history.map((message) => + message.content.map((part) => (part.type === 'text' ? part.text : `[${part.type}]`)).join(''), + ); +} + +function summaryMessageText(ctx: TestAgentContext): string { + const summary = ctx.agent.context.history.find( + (message) => message.origin?.kind === 'compaction_summary', + ); + return summary?.content.map((part) => (part.type === 'text' ? part.text : '')).join('') ?? ''; +} + +describe('compaction — guard tests', () => { + it('repeated compaction folds the prior summary into the new one, never stacking two summaries', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.appendExchange(1, 'user one', 'assistant one', 40); + + ctx.mockNextResponse({ type: 'text', text: 'First summary.' }); + await ctx.rpc.beginCompaction({}); + await ctx.once('compaction.completed'); + + ctx.agent.context.appendUserMessage([{ type: 'text', text: 'user two' }]); + ctx.mockNextResponse({ type: 'text', text: 'Second summary.' }); + await ctx.rpc.beginCompaction({}); + await ctx.once('compaction.completed'); + + const summaries = ctx.agent.context.history.filter( + (message) => message.origin?.kind === 'compaction_summary', + ); + // Exactly one summary survives; the first was re-summarized, not carried. + expect(summaries).toHaveLength(1); + expect(summaryMessageText(ctx)).toContain('Second summary.'); + expect(historyTexts(ctx).join('\n')).not.toContain('First summary.'); + }); + + it('closes a dangling tool_use in the compaction summary request via synthesizeMissing', async () => { + // Full compaction projects its summarizer input with { synthesizeMissing: true } + // so an unresolved tool_use (whose result is sliced out / not yet recorded) + // is answered by a synthetic tool_result — keeping the summary request + // well-formed for strict providers instead of 400-ing on a dangling call. + let summarizerMessages: Message[] | undefined; + const capture: GenerateFn = async (_provider, _system, _tools, messages) => { + summarizerMessages = messages; + return textResult('Compacted summary.'); + }; + const ctx = testAgent({ generate: capture }); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.appendUnresolvedToolExchange(0); // assistant with 2 tool calls, no results + + await ctx.rpc.beginCompaction({}); + await ctx.once('compaction.completed'); + + const msgs = summarizerMessages ?? []; + const assistantIndex = msgs.findIndex( + (message) => message.role === 'assistant' && message.toolCalls.length > 0, + ); + expect(assistantIndex).toBeGreaterThanOrEqual(0); + for (const toolCall of msgs[assistantIndex]!.toolCalls) { + const answered = msgs + .slice(assistantIndex + 1) + .some((message) => message.role === 'tool' && message.toolCallId === toolCall.id); + expect(answered).toBe(true); + } + }); + + // Mutual exclusion: compaction and turn processing must not run concurrently, + // or a turn mutating the context mid-summary loses output. Auto compaction is + // structurally safe (it runs while the turn blocks at a step boundary); the + // manual/SDK path is guarded explicitly here. + it('rejects a manual compaction while a turn is active', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.agent.context.appendUserMessage([{ type: 'text', text: 'seed' }], { kind: 'user' }); + ctx.mockNextResponse({ type: 'text', text: 'turn done' }); + + // launch() sets the active turn synchronously, so a turn is active before the + // worker yields — exactly the window an SDK beginCompaction could land in. + ctx.agent.turn.prompt([{ type: 'text', text: 'go' }]); + expect(ctx.agent.turn.hasActiveTurn).toBe(true); + + await expect(ctx.rpc.beginCompaction({})).rejects.toThrow(/turn/i); + + await ctx.agent.turn.waitForCurrentTurn(); + }); + + it('defers a prompt submitted during compaction and runs it afterward', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.appendExchange(1, 'user one', 'assistant one', 40); + ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' }); + ctx.mockNextResponse({ type: 'text', text: 'answer to the deferred prompt' }); + + // begin() sets the compacting flag synchronously before the summarizer yields. + void ctx.rpc.beginCompaction({}); + expect(ctx.agent.fullCompaction.isCompacting).toBe(true); + + // A prompt arriving mid-compaction is buffered (deferred), not rejected: null + // means "not launched now", and it must run once compaction finishes. + const turnId = ctx.agent.turn.prompt([{ type: 'text', text: 'DEFERRED-PROMPT' }]); + expect(turnId).toBeNull(); + + await ctx.once('compaction.completed'); + await ctx.agent.turn.waitForCurrentTurn(); + + // Ran after compaction — neither lost nor stuck. + expect(historyTexts(ctx).join('\n')).toContain('DEFERRED-PROMPT'); + }); + + it('defers a steer arriving during compaction and delivers it afterward', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.appendExchange(1, 'user one', 'assistant one', 40); + ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' }); + ctx.mockNextResponse({ type: 'text', text: 'handled the steer' }); + + void ctx.rpc.beginCompaction({}); + expect(ctx.agent.fullCompaction.isCompacting).toBe(true); + + // A background-task/cron steer mid-compaction must be buffered (null = buffered, + // which is exactly what those fire-and-forget callers assume), not dropped. + const turnId = ctx.agent.turn.steer([{ type: 'text', text: 'DEFERRED-STEER' }], { + kind: 'background_task', + taskId: 't', + status: 'completed', + notificationId: 'n', + }); + expect(turnId).toBeNull(); + + await ctx.once('compaction.completed'); + await ctx.agent.turn.waitForCurrentTurn(); + + expect(historyTexts(ctx).join('\n')).toContain('DEFERRED-STEER'); + }); +}); + +describe('compaction — probe tests (high-risk scenarios)', () => { + // PROBE #1 / CMP-02 — messages appended while the summarizer request is in + // flight (a live step racing a manual/SDK compaction). The summary only covers + // the pre-compaction snapshot, and the all-user rebuild would drop the appended + // assistant/tool tail — so compaction detects the changed history and cancels, + // leaving the appended turn intact for a later clean-boundary compaction. + it('preserves an assistant turn appended while the summarizer call is in flight', async () => { + let ctx!: TestAgentContext; + const appendDuringGenerate: GenerateFn = async () => { + // Simulate the turn loop completing a step while compaction awaits. + ctx.agent.context.appendLoopEvent({ + type: 'step.begin', + uuid: 'race-step', + turnId: '', + step: 9, + }); + ctx.agent.context.appendLoopEvent({ + type: 'content.part', + uuid: 'race-part', + turnId: '', + step: 9, + stepUuid: 'race-step', + part: { type: 'text', text: 'RACE-ASSISTANT-OUTPUT' }, + }); + ctx.agent.context.appendLoopEvent({ + type: 'step.end', + uuid: 'race-step', + turnId: '', + step: 9, + finishReason: 'end_turn', + }); + return textResult('Compacted summary.'); + }; + ctx = testAgent({ generate: appendDuringGenerate }); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.appendExchange(1, 'user one', 'assistant one', 40); + + await ctx.rpc.beginCompaction({}); + await ctx.once('compaction.cancelled'); + + expect(historyTexts(ctx).join('\n')).toContain('RACE-ASSISTANT-OUTPUT'); + }); + + // PROBE #1b — a user-ROLE message that compaction would drop (background-task + // notification, hook/cron reminder, shell output) appended mid-summary. It is + // neither summarized (added after the snapshot) nor kept (applyCompaction keeps + // only real user input), so it would silently vanish; the race guard must cancel + // on any tail compaction would drop, not just non-user roles. + it('cancels compaction when a droppable user-role tail is appended mid-summary', async () => { + let ctx!: TestAgentContext; + const appendDuringGenerate: GenerateFn = async () => { + ctx.agent.context.appendUserMessage([{ type: 'text', text: 'BG-NOTIFY-OUTPUT' }], { + kind: 'background_task', + taskId: 't', + status: 'completed', + notificationId: 'n', + }); + return textResult('Compacted summary.'); + }; + ctx = testAgent({ generate: appendDuringGenerate }); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.appendExchange(1, 'user one', 'assistant one', 40); + + await ctx.rpc.beginCompaction({}); + await Promise.race([ctx.once('compaction.completed'), ctx.once('compaction.cancelled')]); + + // Cancelled, so the notification survives in history rather than being dropped. + expect(historyTexts(ctx).join('\n')).toContain('BG-NOTIFY-OUTPUT'); + }); + + // PROBE #2 — empty/truncated summarizer responses drop one oldest message and + // retry. A dedicated shrink counter, bounded by MAX_COMPACTION_RETRY_ATTEMPTS, + // keeps a model that always returns empty from issuing ~one call per message. + it('bounds summarizer calls by the retry limit when the model keeps returning empty', async () => { + let calls = 0; + // Empty 7 times, then a valid summary. The bounded shrink counter gives up by + // ~call 6, so compaction errors out before ever reaching the 8th (valid) + // response; an unbounded impl would tolerate all 7 and complete on the 8th. + const flakyEmpty: GenerateFn = async () => { + calls += 1; + return calls <= 7 ? textResult('') : textResult('Compacted summary.'); + }; + const ctx = testAgent({ generate: flakyEmpty }); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + for (let i = 1; i <= 5; i++) { + ctx.appendExchange(i, `user ${String(i)}`, `assistant ${String(i)}`, 40); + } + + await ctx.rpc.beginCompaction({}); + await Promise.race([ctx.once('compaction.completed'), ctx.once('error')]); + + // A retry budget of MAX_COMPACTION_RETRY_ATTEMPTS(5) should bound calls. + expect(calls).toBeLessThanOrEqual(6); + }); + + // PROBE #3 / CMP-08 — the kept-user budget is a fixed 20k and ignores the + // model window, so on a small-window model the post-compaction context can + // still exceed the trigger, re-compacting every turn without converging. + it.fails('keeps the post-compaction context below the auto-compaction trigger on a small window', async () => { + const SMALL_WINDOW = 16_000; + const ctx = testAgent(); + ctx.configure({ + provider: PROVIDER, + modelCapabilities: { ...CAPS, max_context_tokens: SMALL_WINDOW }, + }); + // ~7.5k tokens of user text per message (30k ascii chars / 4). + for (let i = 1; i <= 3; i++) { + ctx.appendExchange(i, 'u'.repeat(30_000), `assistant ${String(i)}`, 40); + } + + ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' }); + await ctx.rpc.beginCompaction({}); + await ctx.once('compaction.completed'); + + // tokenCount after compaction should leave headroom below the 85% trigger, + // otherwise the next turn immediately re-compacts and never converges. + expect(ctx.agent.context.tokenCount).toBeLessThan(SMALL_WINDOW * 0.85); + }); + + // PROBE #4 / CMP-01 — compaction started while a tool exchange is still open + // (SDK/REST caller mid-tool) clears pendingToolResultIds, so the tool.result + // that arrives afterwards is treated as an orphan and silently dropped. + it.fails('does not drop a tool result that arrives after a compaction started mid-exchange', async () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.appendUnresolvedToolExchange(0); // assistant with 2 tool calls, no results yet + + ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' }); + await ctx.rpc.beginCompaction({}); + await ctx.once('compaction.completed'); + + // The tool finishes after compaction; its result must not vanish. + ctx.agent.context.appendLoopEvent({ + type: 'tool.result', + parentUuid: 'call_unresolved_one', + toolCallId: 'call_unresolved_one', + result: { output: 'LATE-TOOL-RESULT' }, + }); + + expect(historyTexts(ctx).join('\n')).toContain('LATE-TOOL-RESULT'); + }); + + // CMP-12 fix — restoring a legacy `context.apply_compaction` record (pre-rework: + // no keptUserMessageCount; the old `[summary, ...history.slice(compactedCount)]` + // semantics kept a verbatim recent tail). On restore we reproduce that shape so + // an upgraded session does not lose its recent assistant/tool tail. + it('preserves the verbatim tail when restoring a legacy compaction record', () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + ctx.appendExchange(1, 'summarized user', 'TAIL-ASSISTANT', 40); + + // Goes through the real restore path so `records.restoring` gates the legacy + // reconstruction. No keptUserMessageCount + compactedCount < length marks the + // pre-rework record that kept history.slice(compactedCount) as a tail. + ctx.agent.records.restore({ + type: 'context.apply_compaction', + summary: 'Legacy summary.', + compactedCount: 1, + tokensBefore: 100, + tokensAfter: 50, + }); + + expect(historyTexts(ctx).join('\n')).toContain('TAIL-ASSISTANT'); + }); + + // PROBE #6 — when the summarizer request overflows, historyForModel is shrunk + // to a recent suffix but still projected through MicroCompaction.compact() + // with the cutoff computed for the FULL history. The absolute cutoff applied + // to the shifted suffix can clear recent tool results the summary needs. + it.fails('does not clear recent tool results when projecting a shrunk suffix under an active micro-compaction cutoff', () => { + // This defect only exists when micro-compaction is active, so enable the + // flag explicitly rather than inheriting the ambient KIMI_CODE_EXPERIMENTAL + // master switch — otherwise the probe's pass/fail flips with the runner's + // environment (on locally with the master switch, off in CI by default). + const ctx = testAgent({ + experimentalFlags: new FlagResolver( + { KIMI_CODE_EXPERIMENTAL_MICRO_COMPACTION: '1' }, + FLAG_DEFINITIONS, + ), + }); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + + const bigToolOutput = 'TOOL-OUTPUT-CONTENT '.repeat(60); // > minContentTokens(100) + const full: ContextMessage[] = []; + for (let i = 0; i < 20; i++) { + if (i === 15) { + full.push({ + role: 'tool', + content: [{ type: 'text', text: bigToolOutput } satisfies ContentPart], + toolCalls: [], + toolCallId: `tool-${String(i)}`, + }); + } else { + full.push({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: [{ type: 'text', text: `m${String(i)}` }], + toolCalls: [], + origin: i % 2 === 0 ? { kind: 'user' } : undefined, + }); + } + } + + // Cutoff computed for the full history: keep the recent 10 (indices >= 10). + ctx.agent.microCompaction.apply(10); + + // In the full history the tool result is at index 15 (>= cutoff) -> kept. + const projectedFull = ctx.agent.context.project(full); + const fullToolText = projectedFull + .map((m) => m.content.map((p) => (p.type === 'text' ? p.text : '')).join('')) + .join('\n'); + expect(fullToolText).toContain('TOOL-OUTPUT-CONTENT'); + + // After an overflow shrink drops the oldest 10, the SAME tool result sits at + // suffix index 5; the unchanged cutoff(10) now covers it. It must still be + // preserved (it is a recent result the summary depends on). + const shrunkSuffix = full.slice(10); + const projectedSuffix = ctx.agent.context.project(shrunkSuffix); + const suffixToolText = projectedSuffix + .map((m) => m.content.map((p) => (p.type === 'text' ? p.text : '')).join('')) + .join('\n'); + expect(suffixToolText).toContain('TOOL-OUTPUT-CONTENT'); + }); + + // PROBE #7 / CMP-07 — when the oldest kept user message overflows the budget it + // is truncated to text only, dropping any image/audio/video it carried: media + // can't be partially truncated, and keeping it whole would overshoot the + // budget. Recent messages that fit keep their media; only this boundary message + // loses its attachments. Documented as an accepted limitation rather than fixed. + it.fails('keeps media on the oldest kept user message instead of dropping it on truncation', () => { + const ctx = testAgent(); + ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS }); + // Oldest user message: an image + long text that will overflow the budget. + ctx.agent.context.appendUserMessage( + [ + { type: 'image_url', imageUrl: { url: 'data:image/png;base64,AAAA' } }, + { type: 'text', text: 'x'.repeat(120_000) }, // ~30k tokens of text + ], + { kind: 'user' }, + ); + ctx.agent.context.appendUserMessage([{ type: 'text', text: 'recent user' }], { kind: 'user' }); + + ctx.agent.context.applyCompaction({ + summary: 'Summary.', + compactedCount: 2, + tokensBefore: 100, + }); + + const keptParts = ctx.agent.context.history.flatMap((message) => message.content); + expect(keptParts.some((part) => part.type === 'image_url')).toBe(true); + }); +}); diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index 4113de935..58ea539c8 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -18,10 +18,14 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { KimiConfig } from '../../../src/config'; import type { AgentOptions } from '../../../src/agent'; -import { DefaultCompactionStrategy, type CompactionStrategy } from '../../../src/agent/compaction'; +import { + COMPACTION_SUMMARY_PREFIX, + DefaultCompactionStrategy, + type CompactionStrategy, +} from '../../../src/agent/compaction'; import { FLAG_DEFINITIONS, MASTER_ENV } from '../../../src/flags'; import { HookEngine, type HookEngineTriggerArgs } from '../../../src/session/hooks'; -import { estimateTokensForMessages } from '../../../src/utils/tokens'; +import { estimateTokens, estimateTokensForMessages } from '../../../src/utils/tokens'; import { recordingTelemetry, type TelemetryRecord } from '../../fixtures/telemetry'; import type { TestAgentContext, TestAgentOptions } from '../harness/agent'; import { testAgent } from '../harness/agent'; @@ -44,138 +48,6 @@ const CATALOGUED_MODEL_CAPABILITIES = { const MICRO_COMPACTION_FLAG_ENV = getMicroCompactionFlagEnv(); describe('FullCompaction', () => { - it('keeps an oversized trailing user message as recent', () => { - const strategy = testCompactionStrategy(); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', `pending user ${'x'.repeat(1_200)}`), - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('keeps consecutive trailing user messages as recent', () => { - const strategy = testCompactionStrategy(); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', `pending user one ${'x'.repeat(1_200)}`), - textMessage('user', `pending user two ${'x'.repeat(1_200)}`), - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('compacts the prefix when the trailing exchange itself is oversized', () => { - const strategy = testCompactionStrategy(); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', 'recent user'), - textMessage('assistant', `recent assistant ${'x'.repeat(1_200)}`), - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('returns 0 when there is nothing to compact', () => { - const strategy = testCompactionStrategy(); - expect(strategy.computeCompactCount([], 'auto')).toBe(0); - expect(strategy.computeCompactCount([textMessage('user', 'only pending')], 'auto')).toBe(0); - expect( - strategy.computeCompactCount( - [ - textMessage('user', 'a'), - textMessage('user', 'b'), - textMessage('user', 'c'), - ], - 'auto', - ), - ).toBe(0); - }); - - it('returns 0 when no intermediate split exists and the last message is also unsplittable', () => { - const strategy = testCompactionStrategy(); - const messages: Message[] = [ - textMessage('user', 'inspect'), - { - role: 'assistant', - content: [], - toolCalls: [{ type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }], - }, - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(0); - }); - - it('does not split inside a parallel tool exchange', () => { - const strategy = testCompactionStrategy(); - const messages: Message[] = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', 'run both tools'), - { - role: 'assistant', - content: [], - toolCalls: [ - { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }, - { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' }, - ], - }, - { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' }, - { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' }, - textMessage('user', 'next prompt'), - ]; - - // The only valid split is before the parallel exchange (after 'old assistant'), - // never between tool_a and tool_b — that would leave tool_b as an orphan. - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('reserves response context by default before the ratio threshold is reached', () => { - const strategy = new DefaultCompactionStrategy(() => 256_000); - - expect(strategy.shouldCompact(210_000)).toBe(true); - expect(strategy.shouldBlock(210_000)).toBe(true); - }); - - it('backs off overflow compaction by at least five percent of the context window', () => { - const strategy = testCompactionStrategy(1_000); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - ...Array.from({ length: 20 }, () => [ - textMessage('user', 'continue'), - textMessage('assistant', ''), - ]).flat(), - ]; - - const reduced = strategy.reduceCompactOnOverflow(messages); - const removed = messages.slice(reduced); - - expect(reduced).toBeGreaterThan(0); - expect(estimateTokensForMessages(removed)).toBeGreaterThanOrEqual(50); - }); - - it('ignores reserved context when the reserve is not smaller than the model window', () => { - const strategy = new DefaultCompactionStrategy(() => 32_000, { - triggerRatio: 0.85, - blockRatio: 0.85, - reservedContextSize: 50_000, - maxCompactionPerTurn: 3, - maxRecentMessages: 3, - maxRecentUserMessages: Infinity, - maxRecentSizeRatio: 0.2, - minOverflowReductionRatio: 0.05, - }); - - expect(strategy.shouldCompact(1)).toBe(false); - expect(strategy.shouldBlock(1)).toBe(false); - expect(strategy.shouldCompact(28_000)).toBe(true); - expect(strategy.shouldBlock(28_000)).toBe(true); - }); - it('runs manual compaction and applies the compacted context', async () => { const records: TelemetryRecord[] = []; const ctx = testAgent({ telemetry: recordingTelemetry(records) }); @@ -204,12 +76,12 @@ describe('FullCompaction', () => { [wire] context.append_message { "message": { "role": "user", "content": [ { "type": "text", "text": "recent user three" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "