diff --git a/.changeset/rework-compaction-strategy.md b/.changeset/rework-compaction-strategy.md
new file mode 100644
index 000000000..554ba6615
--- /dev/null
+++ b/.changeset/rework-compaction-strategy.md
@@ -0,0 +1,10 @@
+---
+"@moonshot-ai/kimi-code": minor
+---
+
+Rework conversation compaction:
+
+- Keep only recent user prompts plus a single user-role summary; drop assistant and tool messages.
+- Repair tool_use/tool_result adjacency before sending, fixing a strict-provider HTTP 400 when a tool call and its result became non-adjacent.
+- Merge consecutive user turns for strict providers (Gemini/Vertex), fixing an HTTP 400 ("roles must alternate") after compaction or when a turn is steered in right after a tool result.
+- Micro-compaction now defaults off.
diff --git a/apps/vis/server/src/lib/context-projector.ts b/apps/vis/server/src/lib/context-projector.ts
index 290ef0c7b..341556696 100644
--- a/apps/vis/server/src/lib/context-projector.ts
+++ b/apps/vis/server/src/lib/context-projector.ts
@@ -1,3 +1,9 @@
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+} from '@moonshot-ai/agent-core';
 import type {
   ContentPart,
   ContextMessage,
@@ -238,19 +244,21 @@ export function projectContext(
         break;
       case 'context.apply_compaction': {
         openSteps = new Map();
-        // Mirror agent-core's actual `applyCompaction` behaviour
-        // (`packages/agent-core/src/agent/context/index.ts`): history becomes
-        // `[summaryBubble, ...history.slice(compactedCount)]`. The summary is
-        // an *assistant* message tagged `origin.kind = 'compaction_summary'`
-        // (using 'system' would skew role counts and any downstream diff
-        // against agent-core history). The post-compaction tail is preserved
-        // rather than dropped, so messages still in context stay visible.
+        // Mirror agent-core's `applyCompaction`
+        // (`packages/agent-core/src/agent/context/index.ts`): the live history
+        // becomes the most recent real user messages (verbatim, within a token
+        // budget) followed by a single user-role summary tagged
+        // `origin.kind = 'compaction_summary'`. Assistant messages, tool calls,
+        // and tool results are dropped. The selection rule
+        // (`selectRecentUserMessages` / `collectCompactableUserMessages`) is the
+        // same helper agent-core's `ContextMemory` and the web transcript
+        // reducer apply, so all three views stay in sync.
         const summaryBubble: ProjectedMessage = {
           lineNo: entry.lineNo,
           time: rec.time,
           source: 'compaction_summary',
           message: {
-            role: 'assistant',
+            role: 'user',
             content: [{ type: 'text', text: rec.summary }],
             toolCalls: [],
             origin: { kind: 'compaction_summary' },
@@ -262,34 +270,62 @@ export function projectContext(
             tokensAfter: rec.tokensAfter,
           },
         };
+        const modelSummaryBubble: ProjectedMessage =
+          rec.contextSummary === undefined
+            ? summaryBubble
+            : {
+                ...summaryBubble,
+                message: {
+                  ...summaryBubble.message,
+                  content: [{ type: 'text', text: rec.contextSummary }],
+                } as ContextMessage,
+              };
         if (mode === 'model') {
-          // Drop the first `rec.compactedCount` HISTORY entries (NOT array
-          // entries): agent-core's `compactedCount` indexes into `_history`,
-          // which never contains our synthetic 'undo'/'clear' markers. Walk the
-          // array counting only history entries (`isHistoryEntry`) until
-          // `compactedCount` are passed, then slice there — any UI-only markers
-          // in the dropped region go with it (correct: they precede the
-          // compaction). With no markers this is exactly `slice(compactedCount)`.
-          let sliceAt = messages.length;
-          let passed = 0;
-          for (let i = 0; i < messages.length; i++) {
-            if (passed >= rec.compactedCount) {
-              sliceAt = i;
-              break;
-            }
-            if (isHistoryEntry(messages[i]!)) passed++;
+          // Rebuild the model's-eye view. New records carry `keptUserMessageCount`
+          // and use the kept-user selection below; legacy records fall back to the
+          // old verbatim-tail shape (handled first).
+          const historyEntries = messages.filter(isHistoryEntry);
+          if (rec.keptUserMessageCount === undefined && rec.compactedCount < historyEntries.length) {
+            // Legacy (pre-rework) record: it has no `keptUserMessageCount`, so
+            // agent-core's ContextMemory restore reproduces the old
+            // `[summary, ...history.slice(compactedCount)]` semantics — a verbatim
+            // recent tail (assistant/tool included), not the new kept-user
+            // selection. Mirror that exact shape so opening an older compacted
+            // session in model mode shows the same tail the resumed agent still
+            // holds, instead of hiding it behind the new selection.
+            messages = [modelSummaryBubble, ...historyEntries.slice(rec.compactedCount)];
+          } else {
+            // `realUserEntries` is filtered with the exact
+            // `collectCompactableUserMessages` predicate so it stays aligned with
+            // the selection below (genuine user input only — no injections, system
+            // triggers, or prior summaries). `selectRecentUserMessages` keeps a
+            // contiguous suffix of that subsequence, with only the oldest kept
+            // message possibly truncated, so each kept message maps back onto its
+            // original ProjectedMessage wrapper (preserving line/time); we swap in
+            // the (possibly truncated) message object.
+            const realUserEntries = historyEntries.filter(
+              (pm) => collectCompactableUserMessages([pm.message]).length === 1,
+            );
+            const keptUserMessages = selectRecentUserMessages(
+              realUserEntries.map((pm) => pm.message),
+              COMPACT_USER_MESSAGE_MAX_TOKENS,
+            );
+            const suffixStart = realUserEntries.length - keptUserMessages.length;
+            const keptEntries: ProjectedMessage[] = keptUserMessages.map((message, i) => {
+              const original = realUserEntries[suffixStart + i]!;
+              return original.message === message ? original : { ...original, message };
+            });
+            messages = [...keptEntries, modelSummaryBubble];
           }
-          if (passed < rec.compactedCount) sliceAt = messages.length;
-          messages = [summaryBubble, ...messages.slice(sliceAt)];
         } else {
           // Full history: keep ALL preceding messages, just append the summary
           // marker inline so the compacted prefix stays visible.
           messages.push(summaryBubble);
         }
         // Mirror agent-core applyCompaction() → microCompaction.reset() (cutoff
-        // → 0): the message list is rebuilt as [summary, ...tail], so the old
-        // index-based cutoff no longer points at the same messages. (In full
-        // mode the blanking pass does not run, so this is a no-op there.)
+        // → 0): the message list is rebuilt, so the old index-based cutoff no
+        // longer points at the same messages. (In full mode the blanking pass
+        // does not run, so this is a no-op there.)
         microCutoff = 0;
         // Mirror agent-core applyCompaction() → _tokenCount = result.tokensAfter:
         // the live context-window fill is now the post-compaction count. Derived
@@ -328,7 +364,7 @@ export function projectContext(
         // Mirror agent-core `undo` (`agent/context/index.ts`): walk from the
         // end, skip `origin.kind === 'injection'`, stop at
         // `origin.kind === 'compaction_summary'`, remove others, counting real
-        // user prompts via `isRealUserPrompt` until `count` is reached. Then
+        // user prompts via `isRealUserInput` until `count` is reached. Then
         // leave an undo marker.
         //
         // `computeUndoCutoff` is the single source of truth for that skip/stop
@@ -581,22 +617,11 @@ function isHistoryEntry(pm: ProjectedMessage): boolean {
   return pm.source !== 'undo' && pm.source !== 'clear';
 }
 
-/** Mirrors agent-core `isRealUserPrompt` (`agent/context/index.ts`): a message
- *  counts toward an undo only if it is a genuine user prompt. */
-function isRealUserPrompt(message: ContextMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') return origin.trigger === 'user-slash';
-  if (origin.kind === 'plugin_command') return origin.trigger === 'user-slash';
-  return false;
-}
-
 /** Single source of truth for the `context.undo` backward walk, shared by both
  *  projection modes. Mirrors agent-core `undo` (`agent/context/index.ts`): walk
  *  from the end, skip `origin.kind === 'injection'` (those are KEPT even when
  *  they sit inside the undo window), stop at `origin.kind === 'compaction_summary'`,
- *  and count real user prompts via `isRealUserPrompt` until `count` is reached.
+ *  and count real user prompts via `isRealUserInput` until `count` is reached.
  *
  *  Returns the `cutoff` (lowest index to remove from, inclusive) plus the
  *  `removedMessageCount` (number of non-skipped messages in the window). In
@@ -617,7 +642,7 @@ function computeUndoCutoff(
     if (origin?.kind === 'compaction_summary') break; // stop
     removedMessageCount++;
     cutoff = i;
-    if (isRealUserPrompt(messages[i]!.message) && ++removedUserCount >= count) break;
+    if (isRealUserInput(messages[i]!.message) && ++removedUserCount >= count) break;
   }
   return { cutoff, removedMessageCount };
 }
diff --git a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl
index 317df60b2..9f44d9a7d 100644
--- a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl
+++ b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl
@@ -1,5 +1,6 @@
 {"type":"metadata","protocol_version":"1.1","created_at":1779256791085}
 {"type":"config.update","cwd":"/tmp/work","profileName":"agent","systemPrompt":"You are Kimi.","time":1779256791100}
 {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"before compaction"}],"toolCalls":[]},"time":1779256800001}
-{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":1,"tokensBefore":100,"tokensAfter":30,"time":1779256800500}
+{"type":"context.append_message","message":{"role":"assistant","content":[{"type":"text","text":"assistant reply"}],"toolCalls":[]},"time":1779256800200}
+{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":2,"tokensBefore":100,"tokensAfter":30,"time":1779256800500}
 {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"after compaction"}],"toolCalls":[]},"time":1779256801000}
diff --git a/apps/vis/server/test/lib/context-projector.test.ts b/apps/vis/server/test/lib/context-projector.test.ts
index e3eb40fc4..dbeaea844 100644
--- a/apps/vis/server/test/lib/context-projector.test.ts
+++ b/apps/vis/server/test/lib/context-projector.test.ts
@@ -275,33 +275,130 @@ describe('context-projector', () => {
       { lineNo: 4, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [] } }, raw: {} },
     ];
     const proj = projectContext(entries as any);
-    expect(proj.messages[0]!.source).toBe('compaction_summary');
-    // Compaction summary is an assistant message (agent-core's own
+    // Model view: the kept user prompt + user-role summary + the new prompt.
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'compaction_summary', 'append_message',
+    ]);
+    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old' });
+    // The compaction summary is a user message (agent-core's own
     // representation), not a synthetic system message.
-    expect(proj.messages[0]!.message.role).toBe('assistant');
-    expect(proj.messages[0]!.message.origin).toEqual({ kind: 'compaction_summary' });
-    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old stuff' });
-    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'new' });
+    expect(proj.messages[1]!.message.role).toBe('user');
+    expect(proj.messages[1]!.message.origin).toEqual({ kind: 'compaction_summary' });
+    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'old stuff' });
+    expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'new' });
+  });
+
+  it('uses contextSummary only for the model view and raw summary for full history', () => {
+    const entries = [
+      { lineNo: 1, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'old' }], toolCalls: [] } }, raw: {} },
+      { lineNo: 2, data: { type: 'context.apply_compaction' as const,
+          summary: 'raw summary', contextSummary: 'prefixed summary', compactedCount: 1, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+    ];
+
+    const model = projectContext(entries as any);
+    expect(model.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'old' },
+      { text: 'prefixed summary' },
+    ]);
+
+    const full = projectContext(entries as any, 'full');
+    expect(full.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'old' },
+      { text: 'raw summary' },
+    ]);
   });
 
-  it('apply_compaction keeps the post-compaction tail (slice(compactedCount))', () => {
+  it('apply_compaction keeps the most recent user messages and drops the assistant/tool tail', () => {
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const,
           message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} },
       { lineNo: 2, data: { type: 'context.append_message' as const,
           message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm1' }], toolCalls: [] } }, raw: {} },
       { lineNo: 3, data: { type: 'context.append_message' as const,
-          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (kept)' }], toolCalls: [] } }, raw: {} },
+          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (dropped)' }], toolCalls: [] } }, raw: {} },
       { lineNo: 4, data: { type: 'context.apply_compaction' as const,
-          summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+          summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
     ];
     const proj = projectContext(entries as any);
-    // [summary, m2] — m0 and m1 (the first compactedCount=2) are dropped, m2 kept.
-    expect(proj.messages).toHaveLength(2);
-    expect(proj.messages[0]!.source).toBe('compaction_summary');
-    expect(proj.messages[0]!.compaction).toEqual({ compactedCount: 2, tokensBefore: 100, tokensAfter: 10 });
-    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm2 (kept)' });
-    expect(proj.messages[1]!.lineNo).toBe(3);
+    // [m0, m1, summary] — real user prompts are kept verbatim, the assistant
+    // tail is dropped.
+    expect(proj.messages).toHaveLength(3);
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'compaction_summary',
+    ]);
+    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' });
+    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' });
+    expect(proj.messages[2]!.compaction).toEqual({ compactedCount: 3, tokensBefore: 100, tokensAfter: 10 });
+    expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'sum' });
+  });
+
+  it('apply_compaction mirrors the legacy verbatim tail for records without keptUserMessageCount (model)', () => {
+    // A pre-rework record has no keptUserMessageCount. agent-core's restore keeps
+    // the old `[summary, ...history.slice(compactedCount)]` tail (assistant/tool
+    // included), so the model view must do the same instead of applying the new
+    // kept-user selection — otherwise it would hide the assistant tail the resumed
+    // agent still has, and surface a pre-compaction user message the agent dropped.
+    const entries = [
+      { lineNo: 1, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'u0 (compacted away)' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} },
+      { lineNo: 2, data: { type: 'context.append_message' as const,
+          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'a1' }], toolCalls: [] } }, raw: {} },
+      { lineNo: 3, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'u2 (tail)' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} },
+      { lineNo: 4, data: { type: 'context.append_message' as const,
+          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'a3 (tail)' }], toolCalls: [] } }, raw: {} },
+      // Legacy record: no keptUserMessageCount, compactedCount(2) < history(4).
+      { lineNo: 5, data: { type: 'context.apply_compaction' as const,
+          summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+    ];
+
+    const model = projectContext(entries as any);
+    // [summary, u2, a3] — the verbatim tail beyond compactedCount, summary first.
+    expect(model.messages.map((m) => m.source)).toEqual([
+      'compaction_summary', 'append_message', 'append_message',
+    ]);
+    expect(model.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'sum' }, { text: 'u2 (tail)' }, { text: 'a3 (tail)' },
+    ]);
+  });
+
+  it('apply_compaction drops shell/local-command/background messages in model mode only', () => {
+    const entries = [
+      { lineNo: 1, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'real user' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} },
+      { lineNo: 2, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: '! pwd' }], toolCalls: [], origin: { kind: 'shell_command' as const, phase: 'input' as const } } }, raw: {} },
+      { lineNo: 3, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'local output' }], toolCalls: [], origin: { kind: 'injection' as const, variant: 'local-command-stdout' } } }, raw: {} },
+      { lineNo: 4, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'background done' }], toolCalls: [], origin: { kind: 'background_task' as const, taskId: 'task', status: 'completed' as const, notificationId: 'notification' } } }, raw: {} },
+      { lineNo: 5, data: { type: 'context.append_message' as const,
+          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'assistant reply' }], toolCalls: [] } }, raw: {} },
+      { lineNo: 6, data: { type: 'context.apply_compaction' as const,
+          summary: 'sum', compactedCount: 5, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+      { lineNo: 7, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} },
+    ];
+
+    const model = projectContext(entries as any);
+    expect(model.messages.map((m) => m.source)).toEqual([
+      'append_message', 'compaction_summary', 'append_message',
+    ]);
+    expect(model.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'real user' }, { text: 'sum' }, { text: 'new' },
+    ]);
+
+    const full = projectContext(entries as any, 'full');
+    expect(full.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'append_message', 'append_message',
+      'append_message', 'compaction_summary', 'append_message',
+    ]);
+    expect(full.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'real user' }, { text: '! pwd' }, { text: 'local output' },
+      { text: 'background done' }, { text: 'assistant reply' }, { text: 'sum' },
+      { text: 'new' },
+    ]);
   });
 
   // ---- Fix ④: UI-only markers must not offset agent-core history indices ------
@@ -311,7 +408,7 @@ describe('context-projector', () => {
   // real history entries (append_message + compaction_summary), skipping
   // 'undo'/'clear' markers.
 
-  it('apply_compaction slices by history index, skipping a preceding undo marker (model)', () => {
+  it('apply_compaction keeps user messages across a preceding undo marker (model)', () => {
     const userMsg = (text: string) => ({
       role: 'user' as const, content: [{ type: 'text' as const, text }], toolCalls: [],
       origin: { kind: 'user' as const },
@@ -319,14 +416,10 @@ describe('context-projector', () => {
     // Step 1: append u1, u2 then undo(1) → removes u2, leaves [u1, <undo marker>].
     // Step 2: append u3, u4 → array is [u1, <undo marker>, u3, u4].
     // History entries (agent-core _history, which has NO marker) are the three
-    // real messages [u1, u3, u4]. A compaction with compactedCount=2 drops the
-    // first 2 HISTORY entries (u1, u3) — and the undo marker that sits within
-    // that compacted prefix is dropped with it — keeping exactly [summary, u4].
-    //
-    // The naive `messages.slice(compactedCount=2)` would instead cut the ARRAY at
-    // index 2, yielding [summary, u3, u4] — it WRONGLY retains the already-
-    // compacted u3 because the undo marker offset the index by one. This test
-    // pins the correct history-aware behaviour and FAILS against the naive slice.
+    // real user prompts [u1, u3, u4]. Compaction keeps all of them (they fit the
+    // budget) and appends the summary, dropping only the synthetic undo marker.
+    // This pins that the marker does not offset the kept-user selection — a naive
+    // array-slice would have retained the wrong prompts.
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const, message: userMsg('u1') }, raw: {} },
       { lineNo: 2, data: { type: 'context.append_message' as const, message: userMsg('u2') }, raw: {} },
@@ -334,12 +427,16 @@ describe('context-projector', () => {
       { lineNo: 4, data: { type: 'context.append_message' as const, message: userMsg('u3') }, raw: {} },
       { lineNo: 5, data: { type: 'context.append_message' as const, message: userMsg('u4') }, raw: {} },
       { lineNo: 6, data: { type: 'context.apply_compaction' as const,
-          summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+          summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
     ];
     const proj = projectContext(entries as any);
-    // Correct: [summary, u4]. The marker and the first 2 history entries are gone.
-    expect(proj.messages.map((m) => m.source)).toEqual(['compaction_summary', 'append_message']);
-    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'u4' });
+    // Correct: [u1, u3, u4, summary]. The marker is gone, all real prompts kept.
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'append_message', 'compaction_summary',
+    ]);
+    expect(proj.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'u1' }, { text: 'u3' }, { text: 'u4' }, { text: 'sum' },
+    ]);
   });
 
   it('micro-blanking uses the history index, skipping a preceding undo marker (model)', () => {
@@ -688,7 +785,7 @@ describe('context-projector', () => {
   // marker but do NOT mutate/drop the surrounding message list. 'model' mode
   // (the default) keeps the existing model's-eye behaviour byte-identical.
 
-  it("defaults to 'model' mode when no 2nd arg is passed (compaction drops the prefix)", () => {
+  it("defaults to 'model' mode when no 2nd arg is passed (keeps recent user messages + summary)", () => {
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const,
           message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} },
@@ -697,10 +794,14 @@ describe('context-projector', () => {
       { lineNo: 3, data: { type: 'context.apply_compaction' as const,
           summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
     ];
-    // No 2nd arg → 'model' default: prefix dropped, only the summary remains.
+    // No 2nd arg → 'model' default: the real user prompts are kept verbatim and
+    // the summary is appended after them.
     const proj = projectContext(entries as any);
-    expect(proj.messages).toHaveLength(1);
-    expect(proj.messages[0]!.source).toBe('compaction_summary');
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'compaction_summary',
+    ]);
+    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' });
+    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' });
   });
 
   it("full mode keeps the pre-compaction messages plus the summary marker plus the tail", () => {
diff --git a/apps/vis/server/test/routes/context.test.ts b/apps/vis/server/test/routes/context.test.ts
index 486e6175d..6352747e9 100644
--- a/apps/vis/server/test/routes/context.test.ts
+++ b/apps/vis/server/test/routes/context.test.ts
@@ -69,28 +69,31 @@ describe('context route', () => {
     cleanup = c;
     const app = contextRoute(home);
 
-    // Default (model view): the pre-compaction message is dropped, leaving
-    // [summary, after-compaction].
+    // Default (model view): the real user prompt before compaction is KEPT, the
+    // assistant reply is dropped, then the summary, then the post-compaction tail.
     const modelRes = await app.request('/session_fixture/context?agent=main');
     expect(modelRes.status).toBe(200);
     const modelBody = (await modelRes.json()) as {
       messages: { source: string; message: { content: { type: string; text?: string }[] } }[];
     };
     expect(modelBody.messages.map((m) => m.source)).toEqual([
-      'compaction_summary', 'append_message',
+      'append_message', 'compaction_summary', 'append_message',
     ]);
+    expect(modelBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' });
+    expect(modelBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' });
 
-    // Full history: the pre-compaction message is KEPT, then the summary marker,
-    // then the post-compaction tail.
+    // Full history: every pre-compaction message (user prompt + assistant reply)
+    // is KEPT, then the summary marker, then the post-compaction tail.
     const fullRes = await app.request('/session_fixture/context?agent=main&history=full');
     expect(fullRes.status).toBe(200);
     const fullBody = (await fullRes.json()) as {
       messages: { source: string; message: { content: { type: string; text?: string }[] } }[];
     };
     expect(fullBody.messages.map((m) => m.source)).toEqual([
-      'append_message', 'compaction_summary', 'append_message',
+      'append_message', 'append_message', 'compaction_summary', 'append_message',
     ]);
     expect(fullBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' });
-    expect(fullBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' });
+    expect(fullBody.messages[1]!.message.content[0]).toMatchObject({ text: 'assistant reply' });
+    expect(fullBody.messages[3]!.message.content[0]).toMatchObject({ text: 'after compaction' });
   });
 });
diff --git a/docs/en/configuration/config-files.md b/docs/en/configuration/config-files.md
index 17a00a379..1f2187063 100644
--- a/docs/en/configuration/config-files.md
+++ b/docs/en/configuration/config-files.md
@@ -52,7 +52,7 @@ max_running_tasks = 4
 keep_alive_on_exit = false
 
 [experimental]
-micro_compaction = true
+micro_compaction = false
 
 [[permission.rules]]
 decision = "allow"
@@ -181,11 +181,11 @@ You can also switch models temporarily without touching the config file — by s
 
 ## `experimental`
 
-`experimental` stores persistent overrides for experimental-feature flags. Currently, `micro_compaction` is the only user-facing entry and defaults to `true`; set it to `false` only when you need to disable automatic trimming of older large tool results.
+`experimental` stores persistent overrides for experimental-feature flags. Currently, `micro_compaction` is the only user-facing entry and defaults to `false`; set it to `true` to enable automatic trimming of older large tool results.
 
 | Field | Type | Default | Description |
 | --- | --- | --- | --- |
-| `micro_compaction` | `boolean` | `true` | Trim older large tool results from context while preserving recent conversation |
+| `micro_compaction` | `boolean` | `false` | Trim older large tool results from context while preserving recent conversation |
 
 ## `services`
 
diff --git a/docs/en/configuration/env-vars.md b/docs/en/configuration/env-vars.md
index 10518832e..3d57e29a7 100644
--- a/docs/en/configuration/env-vars.md
+++ b/docs/en/configuration/env-vars.md
@@ -124,7 +124,7 @@ Switches that control the behavior of subsystems such as telemetry, background t
 | `KIMI_CODE_BACKGROUND_KEEP_ALIVE_ON_EXIT` | Whether to keep background tasks when the session closes; takes higher priority than `config.toml`. The default is to stop them on exit | Truthy: `1`/`true`/`yes`/`on`; falsy: `0`/`false`/`no`/`off` |
 | `KIMI_CODE_PLUGIN_MARKETPLACE_URL` | Override the plugin marketplace JSON loaded by `/plugins`; useful for dev loopback servers, staging CDN files, or alternate marketplace directories | `https://code.kimi.com/kimi-code/plugins/marketplace.json`; also accepts `http://`, `file://` URLs, and local paths |
 | `KIMI_CODE_AGENT_SWARM_MAX_CONCURRENCY` | Cap how many AgentSwarm subagents run concurrently during the initial ramp; leave unset for no cap | Positive integer; invalid values fail fast |
-| `KIMI_CODE_EXPERIMENTAL_FLAG` | Enable all registered experimental features for this process; `micro_compaction` is already enabled by default | `1`, `true`, `yes`, `on` |
+| `KIMI_CODE_EXPERIMENTAL_FLAG` | Enable all registered experimental features for this process | `1`, `true`, `yes`, `on` |
 | `KIMI_CODE_EXPERIMENTAL_MICRO_COMPACTION` | Override [`[experimental].micro_compaction`](./config-files.md#experimental) for this process | Truthy or falsy |
 | `KIMI_SHELL_PATH` | Override the Git Bash path on Windows (used when auto-detection fails) | Absolute path |
 | `KIMI_MODEL_MAX_COMPLETION_TOKENS` | Hard cap on `max_completion_tokens` per LLM step; applies to the `kimi` provider only | Positive integer; `0` or negative disables clamping |
diff --git a/docs/zh/configuration/config-files.md b/docs/zh/configuration/config-files.md
index ffab2e001..c214ce76b 100644
--- a/docs/zh/configuration/config-files.md
+++ b/docs/zh/configuration/config-files.md
@@ -52,7 +52,7 @@ max_running_tasks = 4
 keep_alive_on_exit = false
 
 [experimental]
-micro_compaction = true
+micro_compaction = false
 
 [[permission.rules]]
 decision = "allow"
@@ -181,11 +181,11 @@ max_context_size = 1047576
 
 ## `experimental`
 
-`experimental` 存放实验功能 flag 的持久化覆盖。目前 `micro_compaction` 是唯一用户可见的字段，默认值为 `true`；只有在需要关闭自动清理较旧的大型工具结果时，才需要把它设为 `false`。
+`experimental` 存放实验功能 flag 的持久化覆盖。目前 `micro_compaction` 是唯一用户可见的字段，默认值为 `false`；如需自动清理较旧的大型工具结果，把它设为 `true`。
 
 | 字段 | 类型 | 默认值 | 说明 |
 | --- | --- | --- | --- |
-| `micro_compaction` | `boolean` | `true` | 清理较旧的大型工具结果内容，同时保留最近对话 |
+| `micro_compaction` | `boolean` | `false` | 清理较旧的大型工具结果内容，同时保留最近对话 |
 
 ## `services`
 
diff --git a/docs/zh/configuration/env-vars.md b/docs/zh/configuration/env-vars.md
index ddf76795a..130010a6a 100644
--- a/docs/zh/configuration/env-vars.md
+++ b/docs/zh/configuration/env-vars.md
@@ -124,7 +124,7 @@ kimi
 | `KIMI_CODE_BACKGROUND_KEEP_ALIVE_ON_EXIT` | 会话关闭时是否保留后台任务，优先级高于 `config.toml`。默认会在退出时停止后台任务 | 真值：`1`/`true`/`yes`/`on`；假值：`0`/`false`/`no`/`off` |
 | `KIMI_CODE_PLUGIN_MARKETPLACE_URL` | 覆盖 `/plugins` 加载的 plugin marketplace JSON，适合 dev loopback server、测试 CDN 文件或替换 marketplace 目录 | `https://code.kimi.com/kimi-code/plugins/marketplace.json`；也接受 `http://`、`file://` URL 和本地路径 |
 | `KIMI_CODE_AGENT_SWARM_MAX_CONCURRENCY` | 限制 AgentSwarm 初始提升并发阶段可同时运行的子 Agent 数量；不设置表示不限制 | 正整数；非法值会立即失败 |
-| `KIMI_CODE_EXPERIMENTAL_FLAG` | 在当前进程启用所有已注册的实验功能；`micro_compaction` 已默认开启 | `1`、`true`、`yes`、`on` |
+| `KIMI_CODE_EXPERIMENTAL_FLAG` | 在当前进程启用所有已注册的实验功能 | `1`、`true`、`yes`、`on` |
 | `KIMI_CODE_EXPERIMENTAL_MICRO_COMPACTION` | 覆盖当前进程的 [`[experimental].micro_compaction`](./config-files.md#experimental) | 真值或假值 |
 | `KIMI_SHELL_PATH` | Windows 上覆盖 Git Bash 路径（自动探测失败时使用） | 绝对路径 |
 | `KIMI_MODEL_MAX_COMPLETION_TOKENS` | 单步 LLM 请求的 `max_completion_tokens` 硬上限，仅对 `kimi` 供应商生效 | 正整数；`0` 或负数禁用 clamp |
diff --git a/packages/agent-core/src/agent/compaction/compaction-instruction.md b/packages/agent-core/src/agent/compaction/compaction-instruction.md
index 49b0d80b4..921068742 100644
--- a/packages/agent-core/src/agent/compaction/compaction-instruction.md
+++ b/packages/agent-core/src/agent/compaction/compaction-instruction.md
@@ -1,69 +1,42 @@
+You are about to run out of context. Write a first-person handoff note to
+yourself so you can seamlessly continue this task after the earlier
+conversation is cleared.
 
 --- This message is a direct task, not part of the above conversation ---
 
-You are now given a task to compact this conversation context according to specific priorities and output requirements.
-
-Output text only. DO NOT CALL ANY TOOLS. Calling tools will be rejected and fails the task. You already have all the information you need in the conversation history. You have only one chance.
-
-The goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared work.
-
+Write the note as your own continuing train of thought — first person, present
+tense, the way you would reason through the next move. Do not write a
+third-party report about someone else's work, and do not impose rigid section
+headings; let the shape follow the task.
+
+Make the note self-sufficient: the next turn will see only your most recent user
+messages and this note — every assistant message, tool call, and tool result
+above will be gone. In your own words, preserve what you genuinely need to
+continue:
+
+- The latest user request, quoted verbatim, and what it is actually asking for.
+- The instructions and constraints currently in force (user preferences,
+  project rules, environment and tooling limits) — condensed to what still
+  matters.
+- What has actually been done, at high fidelity: keep the exact commands that
+  were run, the exact file paths touched, and whether each succeeded or failed.
+  Keep only the final working version of any code; drop intermediate attempts
+  and already-resolved errors.
+- The precise next action — including the exact next command or tool call you
+  intend to make — and any required format for the final answer.
+
+Be honest about uncertainty. If an earlier step claimed something was done but
+was never verified (tests "passing", a fix "working", a file "created"), say so
+plainly and treat it as unverified rather than fact — re-check before relying
+on it.
+
+Be concise. Include the critical data, identifiers, and references needed to
+continue, and omit anything that does not change the next move.
+
+Respond with text only. Do not call any tools — you already have everything you
+need in the conversation history.
+
+{% if customInstruction %}
+Optional user instruction:
 {{ customInstruction }}
-
-<!-- Compression Priorities (in order) -->
-
-1. **Current Task State**: What is being worked on RIGHT NOW
-2. **Errors & Solutions**: All encountered errors and their resolutions
-3. **Code Evolution**: Final working versions only (remove intermediate attempts)
-4. **System Context**: Project structure, dependencies, environment setup
-5. **Design Decisions**: Architectural choices and their rationale
-6. **TODO Items**: Unfinished tasks and known issues
-
-<!-- Required Output Structure -->
-
-## Current Focus
-
-[What we're working on now]
-
-## Environment
-
-- [Key setup/config points]
-- ...
-
-## Completed Tasks
-
-- [Task]: [Brief outcome]
-- ...
-
-## Active Issues
-
-- [Issue]: [Status/Next steps]
-- ...
-
-## Code State
-
-### [Critical file name]
-
-[Brief description of the file's purpose and current state]
-
-```
-[The latest version of critical code snippets in this file, <20 lines]
-```
-
-### [Critical file name]
-
-- [Useful classes/methods/functions]: [Brief description/usage]
-- ...
-
-<!-- Omit non-critical code, intermediate attempts, and resolved errors -->
-
-## Important Context
-
-- [Any crucial information not covered above]
-- ...
-
-## All User Messages
-
-- [Detailed non tool use user message]
-- ...
-
-<!-- Must output a summary matching the above template in the **final answer**, not in thinking. -->
+{% endif %}
diff --git a/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md
new file mode 100644
index 000000000..157724c84
--- /dev/null
+++ b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md
@@ -0,0 +1 @@
+The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 36be40575..752de8b24 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -22,9 +22,14 @@ import {
   retryBackoffDelays,
   sleepForRetry,
 } from '../../loop/retry';
-import { renderPrompt } from '../../utils/render-prompt';
+import {
+  renderTodoList,
+  TODO_STORE_KEY,
+  type TodoItem,
+} from '../../tools/builtin/state/todo-list';
 import {
   estimateTokens,
+  estimateTokensForMessage,
   estimateTokensForMessages,
   estimateTokensForTools,
 } from '../../utils/tokens';
@@ -32,14 +37,15 @@ import {
   applyCompletionBudget,
   resolveCompletionBudget,
 } from '../../utils/completion-budget';
+import { renderPrompt } from '../../utils/render-prompt';
 import compactionInstructionTemplate from './compaction-instruction.md?raw';
-import { renderTodoList, type TodoItem } from '../../tools/builtin/state/todo-list';
 import type { CompactionBeginData, CompactionResult } from './types';
 import {
   DEFAULT_COMPACTION_CONFIG,
   DefaultCompactionStrategy,
   type CompactionStrategy,
 } from './strategy';
+import { buildCompactionSummaryText, isRealUserInput } from './handoff';
 
 export const MAX_COMPACTION_RETRY_ATTEMPTS = 5;
 
@@ -62,6 +68,18 @@ export class FullCompaction {
     blockedByTurn: boolean;
   } | null = null;
   private readonly observedMaxContextTokensByModel = new Map<string, number>();
+  // Token count right after the last successful compaction. While no new
+  // content has been appended (tokenCountWithPending <= this value), the
+  // history is already in its minimal compacted form ([kept user prompts,
+  // summary]); re-compacting would only nest summaries, so
+  // checkAutoCompaction skips in that case even if an observed overflow
+  // limit still flags the context as oversized.
+  private lastCompactedTokenCount: number | null = null;
+  // Counts provider-overflow recoveries in this turn that have not yet been
+  // followed by a successful step. Trips MAX_OVERFLOW_COMPACTION_ATTEMPTS to
+  // stop an overflow -> compact -> overflow loop when compaction can no
+  // longer shrink the request below the model window.
+  private consecutiveOverflowCompactions = 0;
   protected readonly strategy: CompactionStrategy;
 
   constructor(
@@ -77,7 +95,7 @@ export class FullCompaction {
           reservedContextSize:
             agent.kimiConfig?.loopControl?.reservedContextSize ??
             DEFAULT_COMPACTION_CONFIG.reservedContextSize,
-        }
+        },
       );
   }
 
@@ -139,9 +157,21 @@ export class FullCompaction {
       });
       return;
     }
-    const compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source);
-    if (compactedCount === 0) {
-      throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No prefix that can be compacted in current history.');
+    if (this.agent.context.history.length === 0) {
+      throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No messages to compact in current history.');
+    }
+    // Manual (SDK/REST) compaction must not start while a turn is running: the
+    // turn keeps mutating the context (streaming content, appending messages)
+    // while the summarizer is in flight, and that output is then neither
+    // summarized nor preserved by the rebuild. Auto compaction is exempt — it is
+    // triggered from within the turn at a step boundary, which blocks the turn
+    // for the duration. Refuse manual compaction here so it only runs at a clean
+    // boundary; the caller can retry once the turn finishes.
+    if (data.source === 'manual' && this.agent.turn.hasActiveTurn) {
+      throw new KimiError(
+        ErrorCodes.COMPACTION_UNABLE,
+        'Cannot compact while a turn is active. Wait for it to finish, then retry.',
+      );
     }
     this.agent.records.logRecord({
       type: 'full_compaction.begin',
@@ -155,7 +185,7 @@ export class FullCompaction {
     const abortController = new AbortController();
     this.compacting = {
       abortController,
-      promise: this.compactionWorker(abortController.signal, data, compactedCount),
+      promise: this.compactionWorker(abortController.signal, data),
       blockedByTurn: false,
     };
   }
@@ -194,9 +224,20 @@ export class FullCompaction {
 
   resetForTurn(): void {
     this.compactionCountInTurn = 0;
+    this.lastCompactedTokenCount = null;
+    this.consecutiveOverflowCompactions = 0;
   }
 
   async handleOverflowError(signal: AbortSignal, error: unknown) {
+    this.consecutiveOverflowCompactions += 1;
+    const maxAttempts = this.strategy.maxOverflowCompactionAttempts;
+    if (this.consecutiveOverflowCompactions > maxAttempts) {
+      throw new KimiError(
+        ErrorCodes.CONTEXT_OVERFLOW,
+        `Compaction failed to bring the context under the model window after ${String(maxAttempts)} attempts.`,
+        { cause: error instanceof Error ? error : undefined },
+      );
+    }
     const didStartCompaction = this.beginAutoCompaction();
     if (!didStartCompaction && !this.compacting) throw error;
     // Always block on overflow errors
@@ -211,6 +252,10 @@ export class FullCompaction {
   }
 
   async afterStep(): Promise<void> {
+    // A completed step means a generate() succeeded, so any prior
+    // overflow -> compact cycle produced a request that now fits; clear the
+    // loop guard.
+    this.consecutiveOverflowCompactions = 0;
     if (this.strategy.checkAfterStep) {
       this.checkAutoCompaction(false);
     }
@@ -219,6 +264,12 @@ export class FullCompaction {
 
   private checkAutoCompaction(throwOnLimit: boolean = true): boolean {
     if (this.compacting) return true;
+    if (
+      this.lastCompactedTokenCount !== null &&
+      this.tokenCountWithPending <= this.lastCompactedTokenCount
+    ) {
+      return false;
+    }
     if (!this.strategy.shouldCompact(this.tokenCountWithPending)) return false;
     return this.beginAutoCompaction(throwOnLimit);
   }
@@ -258,34 +309,26 @@ export class FullCompaction {
   private async compactionWorker(
     signal: AbortSignal,
     data: Readonly<CompactionBeginData>,
-    compactedCount: number,
   ): Promise<void> {
     try {
-      const finalResult = {
-        summary: '',
-        compactedCount: 1,
-        tokensBefore: 0,
-        tokensAfter: 0,
-      };
-
-      for (let round = 1; ; round++) {
-        const result = await this.compactionRound(round, signal, data, compactedCount);
-        if (!result) return;
-
-        finalResult.summary = result.summary;
-        finalResult.compactedCount += result.compactedCount - 1;
-        finalResult.tokensBefore += result.tokensBefore - finalResult.tokensAfter;
-        finalResult.tokensAfter = result.tokensAfter;
-
-        if (result.tokensBefore - result.tokensAfter < 1024) break;
-        if (!this.strategy.shouldBlock(result.tokensAfter)) break;
-        compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source);
-        if (compactedCount === 0) break;
+      const result = await this.compactionRound(signal, data);
+      if (!result) return;
+      // Stay "compacting" through reinjection: a follow-up prompt/steer that lands
+      // now is buffered (TurnFlow defers on `isCompacting`) until the
+      // post-compaction reminders are back, so the first post-compaction turn
+      // never builds a request before they are reinjected. Only after reinjection
+      // do we clear the flag, announce completion, and replay deferred input.
+      try {
+        await this.agent.refreshSystemPrompt();
+      } catch (error) {
+        this.agent.log.error('failed to refresh system prompt after compaction', { error });
       }
+      await this.agent.injection.injectAfterCompaction();
       this.markCompleted();
-      this.agent.emitEvent({ type: 'compaction.completed', result: finalResult });
-      await this.agent.injection.injectGoal();
-      this.triggerPostCompactHook(data, finalResult);
+      const { contextSummary: _contextSummary, ...eventResult } = result;
+      void _contextSummary;
+      this.agent.emitEvent({ type: 'compaction.completed', result: eventResult });
+      this.triggerPostCompactHook(data, result);
     } catch (error) {
       if (isAbortError(error)) return;
       const blockedByTurn = this.compacting?.blockedByTurn === true;
@@ -298,22 +341,40 @@ export class FullCompaction {
         type: 'error',
         ...toKimiErrorPayload(error),
       });
+    } finally {
+      // Replay prompts/steers deferred while compaction held the context — on the
+      // success path (after reinjection above), on an A1 prefix/tail cancel
+      // (`!result`), and on failure/abort. `compacting` is null by now in every
+      // path, so the replay's launch actually starts a turn instead of re-buffering.
+      this.agent.turn.onCompactionFinished();
+    }
+  }
+
+  private buildInstruction(customInstruction: string | undefined): string {
+    return renderPrompt(compactionInstructionTemplate, {
+      customInstruction: customInstruction?.trim() ?? '',
+    }).trimEnd();
+  }
+
+  private postProcessSummary(summary: string): string {
+    const storeData = this.agent.tools.storeData();
+    const todos = (storeData[TODO_STORE_KEY] as readonly TodoItem[] | undefined) ?? [];
+    if (todos.length === 0) {
+      return summary;
     }
+    const todoMarkdown = renderTodoList(todos, '## TODO List');
+    return `${summary.trim()}\n\n${todoMarkdown}`;
   }
 
   private async compactionRound(
-    round: number,
     signal: AbortSignal,
     data: Readonly<CompactionBeginData>,
-    initialCompactedCount: number,
-  ) {
+  ): Promise<CompactionResult | undefined> {
     const startedAt = Date.now();
     const originalHistory = [...this.agent.context.history];
     const tokensBefore = estimateTokensForMessages(originalHistory);
     let retryCount = 0;
     try {
-      let compactedCount = initialCompactedCount;
-
       await this.triggerPreCompactHook(data, tokensBefore, signal);
 
       const model = this.agent.config.model;
@@ -337,15 +398,22 @@ export class FullCompaction {
         }),
         capability,
       });
+      const instruction = this.buildInstruction(data.instruction);
 
       const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS);
-      let usage: TokenUsage | null;
-      let summary: string;
+      let usage: TokenUsage | null = null;
+      let summary: string | undefined;
+      // Compact the whole history, trimming old messages only when the
+      // summarizer request itself cannot fit. Any trimmed messages are not
+      // covered by the produced summary; `droppedCount` reports that blind spot.
+      let historyForModel = originalHistory;
+      let droppedCount = 0;
+      let overflowShrinkCount = 0;
+      let emptyOrTruncatedShrinkCount = 0;
       while (true) {
-        const messagesToCompact = originalHistory.slice(0, compactedCount);
         const messages = [
-          ...this.agent.context.project(messagesToCompact),
-          createUserMessage(renderPrompt(compactionInstructionTemplate, { customInstruction: data.instruction ?? '' })),
+          ...this.agent.context.project(historyForModel, { synthesizeMissing: true }),
+          createUserMessage(instruction),
         ];
         const estimatedCompactionRequestTokens = this.estimateRequestTokens(messages);
         try {
@@ -371,14 +439,40 @@ export class FullCompaction {
           if (isContextOverflow) {
             this.observeContextOverflow(estimatedCompactionRequestTokens);
           }
-          if (
-            isContextOverflow ||
+          if (isContextOverflow && historyForModel.length > 1) {
+            overflowShrinkCount += 1;
+            if (overflowShrinkCount > MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS) {
+              throw error;
+            }
+            const before = historyForModel.length;
+            historyForModel = shrinkCompactionHistoryAfterOverflow(
+              historyForModel,
+              overflowShrinkCount,
+            );
+            droppedCount += before - historyForModel.length;
+            retryCount = 0;
+            continue;
+          }
+          const shouldShrinkAfterEmptyOrTruncated =
             error instanceof CompactionTruncatedError ||
-            error instanceof APIEmptyResponseError // e.g. think-only
-          ) {
-            compactedCount = this.strategy.reduceCompactOnOverflow(messagesToCompact);
+            error instanceof APIEmptyResponseError;
+          if (shouldShrinkAfterEmptyOrTruncated && historyForModel.length > 1) {
+            // Each empty/truncated summary drops the oldest message and retries,
+            // but without its own bound this would issue ~one request per message
+            // (resetting retryCount sidesteps the transient-error budget). Cap the
+            // shrink attempts by the same retry budget so a model that keeps
+            // returning empty cannot fan out into a request per history entry.
+            emptyOrTruncatedShrinkCount += 1;
+            if (emptyOrTruncatedShrinkCount > MAX_COMPACTION_RETRY_ATTEMPTS) {
+              throw error;
+            }
+            const before = historyForModel.length;
+            historyForModel = dropOldestMessageAndLeadingToolResults(historyForModel);
+            droppedCount += before - historyForModel.length;
+            retryCount = 0;
+            continue;
           }
-          else if (!isRetryableGenerateError(error)) {
+          if (!isRetryableGenerateError(error)) {
             throw error;
           }
           if (retryCount + 1 >= MAX_COMPACTION_RETRY_ATTEMPTS) {
@@ -396,23 +490,33 @@ export class FullCompaction {
       const newHistory = this.agent.context.history;
       for (let i = 0; i < originalHistory.length; i++) {
         if (newHistory[i] !== originalHistory[i]) {
-          // History changed during compaction, likely due to undo
+          // The compacted prefix changed under us (e.g. undo). Bail.
           this.cancel();
           return undefined;
         }
       }
+      // The prefix is intact, but the tail grew while the summarizer was in
+      // flight (a live step racing a manual/SDK compaction). A real user message
+      // is safe — the all-user rebuild picks recent user input back up from the
+      // grown history — but anything compaction would drop (an assistant/tool
+      // turn, or a user-role message like a background-task notification, hook/
+      // cron reminder, or shell output) was neither summarized (the summary only
+      // covers originalHistory) nor kept, so it would silently vanish. Cancel and
+      // let a later clean-boundary compaction handle it.
+      if (newHistory.slice(originalHistory.length).some((message) => !isRealUserInput(message))) {
+        this.cancel();
+        return undefined;
+      }
 
-      summary = this.postProcessSummary(summary);
-
-      const recent = originalHistory.slice(compactedCount);
-      const tokensAfter = estimateTokens(summary) + estimateTokensForMessages(recent);
-
-      const result: CompactionResult = {
-        summary,
-        compactedCount,
+      const rawSummary = this.postProcessSummary(summary ?? '');
+      const contextSummary = buildCompactionSummaryText(rawSummary);
+      const result = this.agent.context.applyCompaction({
+        summary: rawSummary,
+        contextSummary,
+        compactedCount: originalHistory.length,
         tokensBefore,
-        tokensAfter,
-      };
+        droppedCount: droppedCount === 0 ? undefined : droppedCount,
+      });
 
       // Telemetry keys are snake_case, but the `context.apply_compaction`
       // record written below keeps its persisted camelCase field names
@@ -424,22 +528,23 @@ export class FullCompaction {
         tokens_after: result.tokensAfter,
         duration_ms: Date.now() - startedAt,
         compacted_count: result.compactedCount,
+        dropped_count: result.droppedCount,
         retry_count: retryCount,
-        round,
+        round: 1,
         thinking_effort: this.agent.config.thinkingEffort,
         ...(usage === null
           ? {}
           : { input_tokens: inputTotal(usage), output_tokens: usage.output }),
       });
-      this.agent.context.applyCompaction(result);
+      this.lastCompactedTokenCount = result.tokensAfter;
       return result;
     } catch (error) {
-      if (isAbortError(error)) return;
+      if (isAbortError(error)) return undefined;
       this.agent.telemetry.track('compaction_failed', {
         source: data.source,
         tokens_before: tokensBefore,
         duration_ms: Date.now() - startedAt,
-        round,
+        round: 1,
         retry_count: retryCount,
         thinking_effort: this.agent.config.thinkingEffort,
         error_type: error instanceof Error ? error.name : 'Unknown',
@@ -478,16 +583,52 @@ export class FullCompaction {
       },
     });
   }
+}
 
-  private postProcessSummary(summary: string): string {
-    const storeData = this.agent.tools.storeData();
-    const todos = (storeData['todo'] as readonly TodoItem[] | undefined) ?? [];
-    if (todos.length === 0) {
-      return summary;
-    }
-    const todoMarkdown = renderTodoList(todos, '## TODO List');
-    return `${summary.trim()}\n\n${todoMarkdown}`;
+const MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS = 3;
+const COMPACTION_OVERFLOW_SHRINK_RATIOS = [0.7, 0.5, 0.35] as const;
+
+function shrinkCompactionHistoryAfterOverflow<T extends Message>(
+  messages: readonly T[],
+  attempt: number,
+): T[] {
+  if (messages.length <= 1) return messages.slice();
+  const ratio = COMPACTION_OVERFLOW_SHRINK_RATIOS[
+    Math.min(attempt - 1, COMPACTION_OVERFLOW_SHRINK_RATIOS.length - 1)
+  ]!;
+  const tokenBudget = Math.floor(estimateTokensForMessages(messages) * ratio);
+  return takeRecentMessagesWithinTokenBudget(messages, tokenBudget);
+}
+
+function takeRecentMessagesWithinTokenBudget<T extends Message>(
+  messages: readonly T[],
+  tokenBudget: number,
+): T[] {
+  let start = messages.length;
+  let tokens = 0;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const messageTokens = estimateTokensForMessage(messages[i]!);
+    if (tokens + messageTokens > tokenBudget) break;
+    tokens += messageTokens;
+    start = i;
+  }
+  if (start === 0) start = 1;
+  return dropLeadingToolResults(messages.slice(start));
+}
+
+function dropOldestMessageAndLeadingToolResults<T extends { readonly role: string }>(
+  messages: readonly T[],
+): T[] {
+  if (messages.length <= 1) return messages.slice();
+  return dropLeadingToolResults(messages.slice(1));
+}
+
+function dropLeadingToolResults<T extends { readonly role: string }>(messages: readonly T[]): T[] {
+  let start = 0;
+  while (start < messages.length && messages[start]!.role === 'tool') {
+    start += 1;
   }
+  return messages.slice(start);
 }
 
 function extractCompactionSummary(response: GenerateResult): string {
diff --git a/packages/agent-core/src/agent/compaction/handoff.ts b/packages/agent-core/src/agent/compaction/handoff.ts
new file mode 100644
index 000000000..0ee8bd0f4
--- /dev/null
+++ b/packages/agent-core/src/agent/compaction/handoff.ts
@@ -0,0 +1,166 @@
+import type { ContentPart } from '@moonshot-ai/kosong';
+import { estimateTokensForMessage } from '../../utils/tokens';
+import type { PromptOrigin } from '../context/types';
+import summaryPrefixTemplate from './compaction-summary-prefix.md?raw';
+
+/**
+ * Compaction handoff helpers.
+ *
+ * Compaction rewrites the model context as: the most recent user messages
+ * (verbatim, within a token budget) followed by a single user-role summary
+ * that is prefixed with `COMPACTION_SUMMARY_PREFIX`. Assistant messages,
+ * tool calls, and tool results are dropped. These helpers apply the exact
+ * same rule for both the live context rewrite and the transcript reducer.
+ */
+
+export const COMPACTION_SUMMARY_PREFIX = summaryPrefixTemplate.trimEnd();
+export const COMPACT_USER_MESSAGE_MAX_TOKENS = 20_000;
+
+/**
+ * Structural subset of kosong's `Message` that the handoff helpers inspect.
+ * Both `ContextMessage` (the live context) and the wire-transcript reducer's
+ * mutable message satisfy this shape, so one set of helpers serves both
+ * layers without introducing a shared nominal type. `origin` is what tells
+ * real user input apart from injections and compaction summaries.
+ */
+interface MessageLike {
+  readonly role: string;
+  readonly content: readonly ContentPart[];
+  readonly origin?: PromptOrigin | undefined;
+}
+
+export type CompactionUserDisposition = 'keep' | 'drop';
+
+/**
+ * Single source of truth for whether a user-role message survives compaction as
+ * genuine user input. Only real user prompts and user-slash skill
+ * activations are kept verbatim. Everything else user-role is
+ * either rebuilt by injectors after compaction or intentionally ephemeral, so
+ * it is dropped from the live context even when transcript/replay retains it
+ * for UI rendering. New `PromptOrigin` kinds must update this switch.
+ */
+export function compactionUserMessageDisposition(
+  origin: PromptOrigin | undefined,
+): CompactionUserDisposition {
+  if (origin === undefined) return 'keep';
+  switch (origin.kind) {
+    case 'user':
+      return 'keep';
+    case 'skill_activation':
+    case 'plugin_command':
+      return origin.trigger === 'user-slash' ? 'keep' : 'drop';
+    case 'injection':
+    case 'shell_command':
+    case 'compaction_summary':
+    case 'system_trigger':
+    case 'background_task':
+    case 'cron_job':
+    case 'cron_missed':
+    case 'hook_result':
+    case 'retry':
+      return 'drop';
+    default: {
+      const _exhaustive: never = origin;
+      void _exhaustive;
+      return 'drop';
+    }
+  }
+}
+
+function extractText(content: readonly ContentPart[]): string {
+  let text = '';
+  for (const part of content) {
+    if (part.type === 'text') {
+      text += part.text;
+    }
+  }
+  return text;
+}
+
+export function isCompactionSummaryMessage(message: MessageLike): boolean {
+  return message.origin?.kind === 'compaction_summary';
+}
+
+/**
+ * Keep only genuine user input (real user prompts and user-slash skill
+ * activations). See `compactionUserMessageDisposition` for the full keep/drop
+ * policy and the rationale for each origin.
+ */
+export function isRealUserInput(message: MessageLike): boolean {
+  return message.role === 'user' && compactionUserMessageDisposition(message.origin) === 'keep';
+}
+
+export function collectCompactableUserMessages<T extends MessageLike>(messages: readonly T[]): T[] {
+  return messages.filter(
+    (message) => isRealUserInput(message) && !isCompactionSummaryMessage(message),
+  );
+}
+
+function truncateTextToTokens(text: string, maxTokens: number): string {
+  if (maxTokens <= 0) return '';
+  // Single pass: walk the string once, mirroring estimateTokens' heuristic
+  // (ASCII ~4 chars/token, non-ASCII ~1 char/token) and stop at the first
+  // code point that would push the running total over the budget. This keeps
+  // CJK-heavy inputs from the O(n^2) cost of re-estimating shrinking prefixes.
+  let asciiCount = 0;
+  let nonAsciiCount = 0;
+  let end = 0;
+  for (const char of text) {
+    if (char.codePointAt(0)! <= 127) {
+      asciiCount++;
+    } else {
+      nonAsciiCount++;
+    }
+    if (Math.ceil(asciiCount / 4) + nonAsciiCount > maxTokens) break;
+    end += char.length;
+  }
+  return text.slice(0, end);
+}
+
+function truncateUserMessage<T extends MessageLike>(message: T, maxTokens: number): T {
+  const text = truncateTextToTokens(extractText(message.content), maxTokens);
+  // Truncating to text only drops any image/audio/video the oldest kept message
+  // carried: media cannot be partially truncated, and keeping it whole would
+  // overshoot the budget, so the boundary message loses its attachments. Recent
+  // messages that fit the budget are kept verbatim (media included); only this
+  // boundary message is affected. Spread the original to preserve every field
+  // (notably `origin`); clearing tool calls is safe (real user input never
+  // carries them). The cast back to `T` is unavoidable: TypeScript cannot prove
+  // the spread-then-override still equals T.
+  return {
+    ...message,
+    content: [{ type: 'text', text }],
+    toolCalls: [],
+  } as unknown as T;
+}
+
+/**
+ * Keep the most recent user messages whose cumulative estimated size fits
+ * `maxTokens`. The oldest kept message is truncated to the remaining budget
+ * when it would otherwise overflow; older messages are dropped.
+ */
+export function selectRecentUserMessages<T extends MessageLike>(
+  messages: readonly T[],
+  maxTokens: number = COMPACT_USER_MESSAGE_MAX_TOKENS,
+): T[] {
+  const selected: T[] = [];
+  let remaining = maxTokens;
+  for (let i = messages.length - 1; i >= 0 && remaining > 0; i--) {
+    const message = messages[i]!;
+    const tokens = estimateTokensForMessage(message);
+    if (tokens <= remaining) {
+      selected.push(message);
+      remaining -= tokens;
+    } else {
+      selected.push(truncateUserMessage(message, remaining));
+      break;
+    }
+  }
+  selected.reverse();
+  return selected;
+}
+
+export function buildCompactionSummaryText(summary: string): string {
+  const suffix = summary.trim();
+  return `${COMPACTION_SUMMARY_PREFIX}\n${suffix.length > 0 ? suffix : '(no summary available)'}`;
+}
diff --git a/packages/agent-core/src/agent/compaction/index.ts b/packages/agent-core/src/agent/compaction/index.ts
index 4f92ac9fe..49978abf1 100644
--- a/packages/agent-core/src/agent/compaction/index.ts
+++ b/packages/agent-core/src/agent/compaction/index.ts
@@ -2,3 +2,4 @@ export * from './full';
 export * from './micro';
 export * from './strategy';
 export * from './types';
+export * from './handoff';
diff --git a/packages/agent-core/src/agent/compaction/strategy.ts b/packages/agent-core/src/agent/compaction/strategy.ts
index edf9132e0..d409d6e8d 100644
--- a/packages/agent-core/src/agent/compaction/strategy.ts
+++ b/packages/agent-core/src/agent/compaction/strategy.ts
@@ -1,43 +1,48 @@
-import type { Message } from "@moonshot-ai/kosong";
-import { estimateTokensForMessage } from "../../utils/tokens";
-import type { CompactionSource } from "./types";
+import type { CompactionSource } from './types';
 
 export interface CompactionConfig {
+  /** Fraction of the model context window that triggers auto-compaction. */
   triggerRatio: number;
+  /** Fraction of the model context window that blocks the turn on compaction. */
   blockRatio: number;
+  /** Reserved output budget; compaction triggers early to leave this much room. */
   reservedContextSize: number;
+  /** Maximum number of auto-compactions allowed in a single turn. */
   maxCompactionPerTurn: number;
-  maxRecentMessages: number;
-  maxRecentUserMessages: number;
-  maxRecentSizeRatio: number;
-  minOverflowReductionRatio: number;
+  /**
+   * Consecutive provider-overflow recoveries (overflow -> compact -> overflow
+   * again) allowed in a single turn before giving up. Caps the loop when
+   * compaction can no longer shrink the request below the model window.
+   */
+  maxOverflowCompactionAttempts: number;
 }
 
+/**
+ * Auto-compact at 85% of the resolved context window. `blockRatio` matches
+ * `triggerRatio` so compaction runs synchronously with no background
+ * compaction.
+ */
 export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
   triggerRatio: 0.85,
-  blockRatio: 0.85, // Same as triggerRatio to disable async compaction
+  blockRatio: 0.85,
   reservedContextSize: 50_000,
   maxCompactionPerTurn: Infinity,
-  maxRecentMessages: 4,
-  maxRecentUserMessages: Infinity,
-  maxRecentSizeRatio: 0.2,
-  minOverflowReductionRatio: 0.05,
+  maxOverflowCompactionAttempts: 3,
 };
 
 export interface CompactionStrategy {
   shouldCompact(usedSize: number): boolean;
   shouldBlock(usedSize: number): boolean;
-  computeCompactCount(messages: readonly Message[], source: CompactionSource): number;
-  reduceCompactOnOverflow(messages: readonly Message[]): number;
   readonly checkAfterStep: boolean;
   readonly maxCompactionPerTurn: number;
+  readonly maxOverflowCompactionAttempts: number;
 }
 
 export class DefaultCompactionStrategy implements CompactionStrategy {
   constructor(
     protected readonly maxSizeProvider: () => number,
-    protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG
-  ) { }
+    protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG,
+  ) {}
 
   protected get maxSize(): number {
     return this.maxSizeProvider();
@@ -64,111 +69,6 @@ export class DefaultCompactionStrategy implements CompactionStrategy {
     return reservedSize > 0 && reservedSize < this.maxSize && usedSize + reservedSize >= this.maxSize;
   }
 
-  computeCompactCount(messages: readonly Message[], source: CompactionSource): number {
-    // Return value: N messages to be compacted (0 means no compaction possible)
-    // LLM Input: messages.slice(0, N) + [user:instruction]
-    // Preserved recent messages: messages.slice(N)
-
-    // Manual compaction
-    if (source === 'manual') {
-      for (let i = messages.length - 1; i > 0; i--) {
-        if (canSplitAfter(messages, i)) {
-          return this.fitCompactCountToWindow(messages, i + 1);
-        }
-      }
-      return 0;
-    }
-
-    // Auto compaction rules (in order of precedence):
-    // 1. The split after messages[N-1] must be safe per `canSplitAfter`:
-    //    messages[N-1] is not a user or asst-with-tool-calls, and the retained
-    //    suffix messages.slice(N) has no orphan tool result.
-    // 2. At least one recent message must be preserved
-    // 3. At most maxRecentMessages recent messages should be preserved
-    // 4. At most maxRecentUserMessages recent user messages should be preserved
-    // 5. At most maxRecentSizeRatio * maxSize recent messages should be preserved
-    // 6. N should be as small as possible
-
-    let recentMessages = 1;
-    let recentUserMessages = 0;
-    let recentSize = 0;
-    let bestN: number | undefined;
-
-    for (; recentMessages < messages.length; recentMessages++) {
-      const splitIndex = messages.length - recentMessages - 1;
-      const m2 = messages[messages.length - recentMessages]!;
-
-      if (m2.role === 'user') {
-        recentUserMessages++;
-      }
-      recentSize += estimateTokensForMessage(m2);
-
-      if (canSplitAfter(messages, splitIndex)) {
-        bestN = splitIndex + 1;
-      }
-
-      const reachesMax = recentMessages >= this.config.maxRecentMessages
-        || recentUserMessages >= this.config.maxRecentUserMessages
-        || recentSize >= this.maxSize * this.config.maxRecentSizeRatio;
-      if (reachesMax && bestN !== undefined) {
-        break;
-      }
-    }
-
-    return this.fitCompactCountToWindow(messages, bestN ?? 0);
-  }
-
-  reduceCompactOnOverflow(messages: readonly Message[]): number {
-    const minReducedSize = Math.max(
-      1,
-      Math.ceil(this.maxSize * this.config.minOverflowReductionRatio),
-    );
-    let reducedSize = 0;
-    let bestN: number | undefined;
-
-    for (let i = messages.length - 2; i > 0; i--) {
-      reducedSize += estimateTokensForMessage(messages[i + 1]!);
-      if (canSplitAfter(messages, i)) {
-        bestN = i + 1;
-        if (reducedSize >= minReducedSize) {
-          return i + 1;
-        }
-      }
-    }
-    return bestN ?? messages.length;
-  }
-
-  private fitCompactCountToWindow(
-    messages: readonly Message[],
-    compactedCount: number,
-  ): number {
-    if (this.maxSize <= 0 || compactedCount <= 0) {
-      return compactedCount;
-    }
-
-    let compactedSize = 0;
-    for (let i = 0; i < compactedCount; i++) {
-      compactedSize += estimateTokensForMessage(messages[i]!);
-    }
-    if (compactedSize <= this.maxSize) {
-      return compactedCount;
-    }
-
-    let bestN: number | undefined;
-    for (let n = compactedCount - 1; n > 0; n--) {
-      compactedSize -= estimateTokensForMessage(messages[n]!);
-      if (!canSplitAfter(messages, n - 1)) {
-        continue;
-      }
-      bestN = n;
-      if (compactedSize <= this.maxSize) {
-        return n;
-      }
-    }
-
-    return bestN ?? compactedCount;
-  }
-
   get checkAfterStep(): boolean {
     return this.config.triggerRatio !== this.config.blockRatio;
   }
@@ -176,45 +76,10 @@ export class DefaultCompactionStrategy implements CompactionStrategy {
   get maxCompactionPerTurn(): number {
     return this.config.maxCompactionPerTurn;
   }
-}
 
-/**
- * Decide whether a compaction split is safe to place immediately after
- * `messages[index]`. A split is safe only when:
- *   - `messages[index]` itself is not a user message or an assistant message
- *     with pending tool calls (cutting either of those off from what follows
- *     would break the conversation), AND
- *   - the next message is not a tool result. The history is well-formed:
- *     tool results only appear after their owning `asst_w_tc` and all tool
- *     results for one exchange land consecutively before the next non-tool
- *     message. So if the suffix starts with a tool result, its `asst_w_tc`
- *     must be in the compacted prefix, which would orphan that result
- *     (e.g. splitting between tool_a and tool_b of a parallel call), AND
- *   - the compacted prefix itself does not end with an unresolved tool
- *     exchange, because pending tool results must remain in the retained tail.
- */
-function canSplitAfter(messages: readonly Message[], index: number): boolean {
-  const m = messages[index];
-  if (m === undefined) return false;
-  if (m.role === 'user') return false;
-  if (m.role === 'assistant' && m.toolCalls.length > 0) return false;
-  if (messages[index + 1]?.role === 'tool') return false;
-  if (prefixEndsWithOpenToolExchange(messages, index)) return false;
-  return true;
-}
-
-function prefixEndsWithOpenToolExchange(messages: readonly Message[], index: number): boolean {
-  if (messages[index]?.role !== 'tool') return false;
-
-  let toolResultCount = 0;
-  for (let i = index; i >= 0; i--) {
-    const message = messages[i];
-    if (message === undefined) return false;
-    if (message.role === 'tool') {
-      toolResultCount++;
-      continue;
-    }
-    return message.role === 'assistant' && message.toolCalls.length > toolResultCount;
+  get maxOverflowCompactionAttempts(): number {
+    return this.config.maxOverflowCompactionAttempts;
   }
-  return false;
 }
+
+export type { CompactionSource };
diff --git a/packages/agent-core/src/agent/compaction/types.ts b/packages/agent-core/src/agent/compaction/types.ts
index 820365cdc..cef3c5308 100644
--- a/packages/agent-core/src/agent/compaction/types.ts
+++ b/packages/agent-core/src/agent/compaction/types.ts
@@ -1,10 +1,46 @@
 export interface CompactionResult {
+  /** Human-facing summary text produced by the compaction model. */
   summary: string;
+  /**
+   * Exact summary message stored in the live model context. It includes the
+   * compaction prefix that tells the next model this is handoff context rather
+   * than a real user prompt. Optional for backward compatibility with older
+   * wire records, where `summary` was also the model-context text.
+   */
+  contextSummary?: string;
   compactedCount: number;
   tokensBefore: number;
   tokensAfter: number;
+  /**
+   * Number of real user messages kept verbatim ahead of the summary in the
+   * post-compaction live context. Written by `ContextMemory.applyCompaction`
+   * (the single derivation point for the post-compaction shape) so the
+   * wire-transcript reducer can reproduce the live folded length without
+   * re-deriving it from the full transcript. Optional for backward
+   * compatibility with older wire records.
+   */
+  keptUserMessageCount?: number;
+  /**
+   * Number of oldest messages trimmed from the summarizer input when the
+   * compaction request itself overflowed the model window. These messages are
+   * not covered by the produced summary — a real-user message among them may
+   * still be retained verbatim in the live context via `keptUserMessageCount`,
+   * but assistant/tool messages are lost. Surfacing the count lets records and
+   * telemetry report the summary's blind spot honestly. Optional for backward
+   * compatibility with older wire records.
+   */
+  droppedCount?: number;
 }
 
+/**
+ * Inputs `ContextMemory.applyCompaction` needs to derive a `CompactionResult`.
+ * `tokensAfter` / `keptUserMessageCount` / `droppedCount` are optional: the live
+ * path fills in what it knows, while restore passes the persisted record so its
+ * historical values are preserved verbatim.
+ */
+export type CompactionInput = Pick<CompactionResult, 'summary' | 'compactedCount' | 'tokensBefore'> &
+  Partial<Pick<CompactionResult, 'contextSummary' | 'tokensAfter' | 'keptUserMessageCount' | 'droppedCount'>>;
+
 export type CompactionSource = 'manual' | 'auto';
 
 export interface CompactionBeginData {
diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts
index 8e2c699fb..15c2c64d3 100644
--- a/packages/agent-core/src/agent/context/index.ts
+++ b/packages/agent-core/src/agent/context/index.ts
@@ -3,10 +3,17 @@ import { createToolMessage, type ContentPart, type Message } from '@moonshot-ai/
 import type { Agent } from '..';
 import { ErrorCodes, KimiError } from '../../errors';
 import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop';
-import { estimateTokensForMessages } from '../../utils/tokens';
+import { estimateTokens, estimateTokensForMessages } from '../../utils/tokens';
 import { escapeXml } from '../../utils/xml-escape';
-import type { CompactionResult } from '../compaction';
-import { project, trimTrailingOpenToolExchange } from './projector';
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+  type CompactionInput,
+  type CompactionResult,
+} from '../compaction';
+import { project, type ProjectOptions, trimTrailingOpenToolExchange } from './projector';
 import {
   USER_PROMPT_ORIGIN,
   type AgentContextData,
@@ -172,7 +179,7 @@ export class ContextMemory {
         this._tokenCount -= estimateTokensForMessages([message]);
       }
 
-      if (isRealUserPrompt(message)) {
+      if (isRealUserInput(message)) {
         removedUserCount++;
         if (removedUserCount >= count) break;
       }
@@ -205,7 +212,36 @@ export class ContextMemory {
     }
   }
 
-  applyCompaction(result: CompactionResult): void {
+  applyCompaction(input: CompactionInput): CompactionResult {
+    // Single derivation point for the post-compaction shape: the most recent
+    // real user messages (verbatim, within the token budget) followed by a
+    // user-role summary. `tokensAfter` and `keptUserMessageCount` are derived
+    // here from the actual `_history` so the live context, the wire record,
+    // and the transcript reducer all agree — re-deriving them elsewhere (e.g.
+    // from the full transcript, which still holds the untruncated originals of
+    // messages the live context truncated) would diverge.
+    const keptUserMessages = selectRecentUserMessages(
+      collectCompactableUserMessages(this._history),
+      COMPACT_USER_MESSAGE_MAX_TOKENS,
+    );
+    // Live compaction omits these so they are derived from the actual
+    // `_history`; restore passes the persisted record so its historical values
+    // are preserved verbatim. Older wire records did not have `contextSummary`,
+    // so their `summary` remains the model-context text during restore.
+    const contextSummary = input.contextSummary ?? input.summary;
+    const tokensAfter =
+      input.tokensAfter ??
+      estimateTokens(contextSummary) + estimateTokensForMessages(keptUserMessages);
+    const keptUserMessageCount = input.keptUserMessageCount ?? keptUserMessages.length;
+    const result: CompactionResult = {
+      summary: input.summary,
+      contextSummary,
+      compactedCount: input.compactedCount,
+      tokensBefore: input.tokensBefore,
+      tokensAfter,
+      keptUserMessageCount,
+      droppedCount: input.droppedCount,
+    };
     this.agent.records.logRecord({
       type: 'context.apply_compaction',
       ...result,
@@ -213,27 +249,48 @@ export class ContextMemory {
     this.agent.replayBuilder.patchLast('compaction', {
       result: {
         summary: result.summary,
+        contextSummary: result.contextSummary,
         compactedCount: result.compactedCount,
         tokensBefore: result.tokensBefore,
         tokensAfter: result.tokensAfter,
+        keptUserMessageCount: result.keptUserMessageCount,
+        droppedCount: result.droppedCount,
       },
     });
-    this._history = [
-      {
-        role: 'assistant',
-        content: [{ type: 'text', text: result.summary }],
-        toolCalls: [],
-        origin: { kind: 'compaction_summary' },
-      },
-      ...this._history.slice(result.compactedCount),
-    ];
+    const summaryMessage: ContextMessage = {
+      role: 'user',
+      content: [{ type: 'text', text: contextSummary }],
+      toolCalls: [],
+      origin: { kind: 'compaction_summary' },
+    };
+    // Wire backward-compat: a pre-rework `context.apply_compaction` record (which
+    // has no `keptUserMessageCount`) used `[summary, ...history.slice(compactedCount)]`
+    // semantics and kept a verbatim recent tail. Reproduce that exact shape on
+    // restore so resuming a session compacted by an older version does not
+    // silently drop the recent assistant/tool tail beyond `compactedCount`. Gated
+    // on `records.restoring`, so the live/forward path — which always sets
+    // `contextSummary` and `keptUserMessageCount` — is unaffected. The projector's
+    // tool-adjacency repair keeps the restored tail well-formed for strict
+    // providers; compaction only runs at a clean step boundary, so the tail has no
+    // open tool exchange to track.
+    const isLegacyRestore =
+      this.agent.records.restoring !== null &&
+      input.keptUserMessageCount === undefined &&
+      input.compactedCount < this._history.length;
+    this._history = isLegacyRestore
+      ? [summaryMessage, ...this._history.slice(input.compactedCount)]
+      : [...keptUserMessages, summaryMessage];
     this.openSteps.clear();
-    this.flushDeferredMessagesIfToolExchangeClosed();
+    this.pendingToolResultIds.clear();
+    // Drop deferred messages (mostly injections/system reminders) instead of
+    // flushing them: initial context is rebuilt every turn.
+    this.deferredMessages = [];
     this._tokenCount = result.tokensAfter;
     this.tokenCountCoveredMessageCount = this._history.length;
     this.agent.microCompaction.reset();
-    this.agent.injection.onContextCompacted(result.compactedCount);
+    this.agent.injection.onContextCompacted();
     this.agent.emitStatusUpdated();
+    return result;
   }
 
   data(): AgentContextData {
@@ -256,8 +313,8 @@ export class ContextMemory {
     return this._history;
   }
 
-  project(messages: readonly ContextMessage[]): Message[] {
-    return project(this.agent.microCompaction.compact(messages));
+  project(messages: readonly ContextMessage[], options?: ProjectOptions): Message[] {
+    return project(this.agent.microCompaction.compact(messages), options);
   }
 
   get messages(): Message[] {
@@ -461,19 +518,6 @@ function isEmptyOutputText(output: string): boolean {
   return output.length === 0 || output.trim() === TOOL_OUTPUT_EMPTY_TEXT;
 }
 
-function isRealUserPrompt(message: ContextMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') {
-    return origin.trigger === 'user-slash';
-  }
-  if (origin.kind === 'plugin_command') {
-    return origin.trigger === 'user-slash';
-  }
-  return false;
-}
-
 function formatUndoUnavailableMessage(
   requestedCount: number,
   undoableCount: number,
diff --git a/packages/agent-core/src/agent/context/projector.ts b/packages/agent-core/src/agent/context/projector.ts
index 02e574c3d..c10de2f9a 100644
--- a/packages/agent-core/src/agent/context/projector.ts
+++ b/packages/agent-core/src/agent/context/projector.ts
@@ -3,8 +3,96 @@ import type { ContentPart, Message, TextPart } from '@moonshot-ai/kosong';
 import { ErrorCodes, KimiError } from '../../errors';
 import type { ContextMessage } from './types';
 
-export function project(history: readonly ContextMessage[]): Message[] {
-  return mergeAdjacentUserMessages(history);
+export interface ProjectOptions {
+  /**
+   * When `true`, emit a synthetic `tool_result` for any assistant `tool_use`
+   * whose result is not present in the provided messages. Used by full
+   * compaction, where the compacted prefix is a slice that may exclude a
+   * delayed result preserved in the retained tail; the synthetic result keeps
+   * the exchange closed so the summary request is not rejected. Leave `false`
+   * for normal turns, where a missing result means the call is still in-flight
+   * and must not be closed prematurely.
+   */
+  readonly synthesizeMissing?: boolean;
+}
+
+export function project(history: readonly ContextMessage[], options?: ProjectOptions): Message[] {
+  return repairToolExchangeAdjacency(mergeAdjacentUserMessages(history), options);
+}
+
+// Strict providers (Anthropic) require every assistant `tool_use` to be answered
+// by a matching `tool_result` in the immediately following message(s). A
+// misordered history — where a `tool_result` is not adjacent to its `tool_use`,
+// e.g. because a user message (background-task notification, flushed steer)
+// landed in between, or because an interrupted / nested step delayed the result
+// — is rejected with HTTP 400 ("`tool_use` without `tool_result` immediately
+// after"). Micro compaction only exposed this latent misordering by busting the
+// prompt cache and forcing a full revalidation.
+//
+// Repair the adjacency so every assistant `tool_use` is immediately followed by
+// its matching `tool_result` message(s). Matching results are moved up from
+// wherever they appear later in the history; any intervening messages keep their
+// relative order and simply follow the repaired exchange. A tool call with no
+// recorded result anywhere later in the history is left untouched by default —
+// it is still in-flight (pending) rather than orphaned, and the
+// trailing-open-exchange trim plus the interrupted-result synthesis during replay
+// own those cases. With `synthesizeMissing`, a synthetic `tool_result` is emitted
+// for such calls instead; full compaction uses this to keep a sliced prefix
+// closed when a delayed result lives in the retained tail. This is purely a
+// projection-time fix: the underlying history is left untouched, so replay and
+// transcripts keep their original order, while the model always sees a
+// well-formed tool exchange.
+const SYNTHETIC_TOOL_RESULT_TEXT =
+  'Tool result is not available in the current context. Do not assume the tool completed successfully.';
+
+function repairToolExchangeAdjacency(
+  messages: readonly Message[],
+  options?: ProjectOptions,
+): Message[] {
+  const out: Message[] = [];
+  const consumed = new Set<number>();
+  for (let i = 0; i < messages.length; i++) {
+    if (consumed.has(i)) continue;
+    const message = messages[i]!;
+    if (message.role !== 'assistant' || message.toolCalls.length === 0) {
+      out.push(message);
+      continue;
+    }
+
+    out.push(message);
+    const pending = new Set(message.toolCalls.map((toolCall) => toolCall.id));
+    for (let j = i + 1; j < messages.length && pending.size > 0; j++) {
+      if (consumed.has(j)) continue;
+      const next = messages[j]!;
+      const toolCallId = next.toolCallId;
+      if (next.role === 'tool' && toolCallId !== undefined && pending.has(toolCallId)) {
+        out.push(next);
+        consumed.add(j);
+        pending.delete(toolCallId);
+      }
+    }
+    if (options?.synthesizeMissing === true) {
+      // Close any tool call whose result is absent from the provided messages.
+      // Only used by full compaction, where the prefix is a slice that may
+      // exclude a delayed result preserved in the retained tail. For normal
+      // turns a missing result means the call is still in-flight, so it is left
+      // for the trailing-open-exchange trim and replay's interrupted-result
+      // synthesis instead of being closed here.
+      for (const missingId of pending) {
+        out.push(makeSyntheticToolResult(missingId));
+      }
+    }
+  }
+  return out;
+}
+
+function makeSyntheticToolResult(toolCallId: string): Message {
+  return {
+    role: 'tool',
+    content: [{ type: 'text', text: SYNTHETIC_TOOL_RESULT_TEXT }],
+    toolCalls: [],
+    toolCallId,
+  };
 }
 
 function mergeAdjacentUserMessages(history: readonly ContextMessage[]): Message[] {
diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts
index bead3466f..3e841ef6f 100644
--- a/packages/agent-core/src/agent/index.ts
+++ b/packages/agent-core/src/agent/index.ts
@@ -14,7 +14,11 @@ import type { PluginCommandOrigin } from './context';
 
 import type { McpConnectionManager } from '../mcp';
 import { FlagResolver, type ExperimentalFlagResolver } from '../flags';
-import type { PreparedSystemPromptContext, ResolvedAgentProfile } from '../profile';
+import {
+  prepareSystemPromptContext,
+  type PreparedSystemPromptContext,
+  type ResolvedAgentProfile,
+} from '../profile';
 import type { ModelProvider } from '../session/provider-manager';
 import type { SessionSubagentHost } from '../session/subagent-host';
 import { noopTelemetryClient, type TelemetryClient } from '../telemetry';
@@ -86,6 +90,7 @@ export interface AgentOptions {
   readonly experimentalFlags?: ExperimentalFlagResolver;
   readonly replay?: ReplayBuilderOptions;
   readonly additionalDirs?: readonly string[];
+  readonly systemPromptContextProvider?: (() => Promise<PreparedSystemPromptContext>) | undefined;
 }
 
 export class Agent {
@@ -132,6 +137,9 @@ export class Agent {
   readonly replayBuilder: ReplayBuilder;
 
   private additionalDirs: readonly string[];
+  private activeProfile?: ResolvedAgentProfile;
+  private brandHome?: string;
+  private readonly systemPromptContextProvider?: (() => Promise<PreparedSystemPromptContext>) | undefined;
 
   constructor(options: AgentOptions) {
     this.type = options.type ?? 'main';
@@ -151,6 +159,7 @@ export class Agent {
     this.telemetry = options.telemetry ?? noopTelemetryClient;
     this.experimentalFlags = options.experimentalFlags ?? new FlagResolver();
     this.additionalDirs = normalizeAdditionalDirs(options.additionalDirs ?? []);
+    this.systemPromptContextProvider = options.systemPromptContextProvider;
 
     this.llmRequestLogger = new LlmRequestLogger(this.log);
     this.blobStore = options.homedir
@@ -254,7 +263,41 @@ export class Agent {
     });
   }
 
-  useProfile(profile: ResolvedAgentProfile, context?: PreparedSystemPromptContext): void {
+  useProfile(
+    profile: ResolvedAgentProfile,
+    context?: PreparedSystemPromptContext,
+    brandHome?: string,
+  ): void {
+    this.setActiveProfile(profile, brandHome);
+    this.updateSystemPromptFromProfile(profile, context);
+    this.tools.setActiveTools(profile.tools);
+  }
+
+  setActiveProfile(profile: ResolvedAgentProfile, brandHome?: string): void {
+    this.activeProfile = profile;
+    this.brandHome = brandHome;
+  }
+
+  /**
+   * Re-render the system prompt with freshly gathered runtime context (cwd
+   * listing, AGENTS.md, additional-dirs info, skill list). Called after
+   * compaction so the post-compaction turns do not keep a snapshot captured
+   * at session bootstrap. Invalidates the prompt-cache prefix by design.
+   */
+  async refreshSystemPrompt(): Promise<void> {
+    if (this.activeProfile === undefined) return;
+    const context = this.systemPromptContextProvider === undefined
+      ? await prepareSystemPromptContext(this.kaos, this.brandHome, {
+          additionalDirs: this.additionalDirs,
+        })
+      : await this.systemPromptContextProvider();
+    this.updateSystemPromptFromProfile(this.activeProfile, context);
+  }
+
+  private updateSystemPromptFromProfile(
+    profile: ResolvedAgentProfile,
+    context?: PreparedSystemPromptContext,
+  ): void {
     const systemPrompt = profile.systemPrompt({
       osEnv: this.kaos.osEnv,
       cwd: this.config.cwd,
@@ -264,7 +307,6 @@ export class Agent {
       additionalDirsInfo: context?.additionalDirsInfo,
     });
     this.config.update({ profileName: profile.name, systemPrompt });
-    this.tools.setActiveTools(profile.tools);
   }
 
   async resume(options?: AgentRecordsReplayOptions): Promise<{ warning?: string }> {
diff --git a/packages/agent-core/src/agent/injection/injector.ts b/packages/agent-core/src/agent/injection/injector.ts
index 504e412de..d13e18159 100644
--- a/packages/agent-core/src/agent/injection/injector.ts
+++ b/packages/agent-core/src/agent/injection/injector.ts
@@ -9,11 +9,8 @@ export abstract class DynamicInjector {
     this.injectedAt = null;
   }
 
-  onContextCompacted(compactedCount: number): void {
-    if (this.injectedAt !== null) {
-      const newInjectedAt = this.injectedAt - compactedCount + 1;
-      this.injectedAt = newInjectedAt >= 0 ? newInjectedAt : null;
-    }
+  onContextCompacted(): void {
+    this.injectedAt = null;
   }
 
   onContextMessageRemoved(index: number): void {
diff --git a/packages/agent-core/src/agent/injection/manager.ts b/packages/agent-core/src/agent/injection/manager.ts
index 99c9cd07e..812aa6188 100644
--- a/packages/agent-core/src/agent/injection/manager.ts
+++ b/packages/agent-core/src/agent/injection/manager.ts
@@ -1,3 +1,5 @@
+import { formatTaskList } from '#/tools/background/task-list';
+
 import type { Agent } from '..';
 import { GoalInjector } from './goal';
 import type { DynamicInjector } from './injector';
@@ -6,6 +8,9 @@ import { PluginSessionStartInjector } from './plugin-session-start';
 import { PlanModeInjector } from './plan-mode';
 import { TodoListReminderInjector } from './todo-list';
 
+const ACTIVE_BACKGROUND_TASK_GUIDANCE =
+  'The conversation was compacted, so the earlier messages that started these background tasks are gone — but the tasks are still running from before. Do not start duplicates. Use TaskOutput to fetch a task’s result, TaskList to list them, and TaskStop to cancel one.';
+
 export class InjectionManager {
   private readonly injectors: DynamicInjector[];
   // Goal context is injected at continuation boundaries (turn start, each
@@ -40,16 +45,40 @@ export class InjectionManager {
     await this.activeGoalInjector()?.inject();
   }
 
+  async injectAfterCompaction(): Promise<void> {
+    await this.injectGoal();
+    this.injectActiveBackgroundTasks();
+    await this.inject();
+  }
+
+  /**
+   * Post-compaction only: re-surface still-running background tasks. Folding the
+   * live context to [recent user prompts, summary] drops the messages that
+   * started them and their status updates, so without this the model can forget
+   * a task is running and spawn a duplicate. Appended as an `injection`-origin
+   * reminder, so the next compaction drops and rebuilds it — kept fresh, never
+   * stacked. Runs only on the live path: restore replays the persisted reminder
+   * and `FullCompaction.begin` short-circuits before compaction there.
+   */
+  private injectActiveBackgroundTasks(): void {
+    const tasks = this.agent.background.list(true);
+    if (tasks.length === 0) return;
+    this.agent.context.appendSystemReminder(
+      `${ACTIVE_BACKGROUND_TASK_GUIDANCE}\n\n${formatTaskList(tasks, true)}`,
+      { kind: 'injection', variant: 'background_task_status' },
+    );
+  }
+
   onContextClear(): void {
     for (const injector of this.lifecycleInjectors()) {
       injector.onContextClear();
     }
   }
 
-  onContextCompacted(compactedCount: number): void {
+  onContextCompacted(): void {
     for (const injector of this.lifecycleInjectors()) {
       try {
-        injector.onContextCompacted(compactedCount);
+        injector.onContextCompacted();
       } catch {
         continue;
       }
diff --git a/packages/agent-core/src/agent/injection/permission-mode.ts b/packages/agent-core/src/agent/injection/permission-mode.ts
index 638ed6760..ffe5389ad 100644
--- a/packages/agent-core/src/agent/injection/permission-mode.ts
+++ b/packages/agent-core/src/agent/injection/permission-mode.ts
@@ -15,13 +15,20 @@ const AUTO_MODE_EXIT_REMINDER = [
 export class PermissionModeInjector extends DynamicInjector {
   protected override readonly injectionVariant = 'permission_mode';
   private lastMode: PermissionMode | undefined;
+  private refreshAfterCompaction = false;
+
+  override onContextCompacted(): void {
+    this.injectedAt = null;
+    this.refreshAfterCompaction = true;
+  }
 
   getInjection(): string | undefined {
     const mode = this.agent.permission.mode;
     const previousMode = this.lastMode;
 
-    if (mode === previousMode) return undefined;
+    if (!this.refreshAfterCompaction && mode === previousMode) return undefined;
 
+    this.refreshAfterCompaction = false;
     this.lastMode = mode;
     if (mode === 'auto') return AUTO_MODE_ENTER_REMINDER;
     if (previousMode === 'auto') return AUTO_MODE_EXIT_REMINDER;
diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts
index 0765e6ca4..08f2178d0 100644
--- a/packages/agent-core/src/agent/turn/index.ts
+++ b/packages/agent-core/src/agent/turn/index.ts
@@ -137,7 +137,11 @@ export class TurnFlow {
       input,
       origin,
     });
-    if (this.activeTurn) {
+    // Buffer while a turn is active OR a manual compaction holds the context;
+    // `onCompactionFinished` replays the buffer once compaction's full lifecycle
+    // (summary + reinjection) is done. Returning null means "buffered" — which is
+    // exactly what fire-and-forget callers (background notifications, cron) assume.
+    if (this.activeTurn || this.agent.fullCompaction.isCompacting) {
       this.steerBuffer.push({ input, origin });
       return null;
     }
@@ -161,6 +165,18 @@ export class TurnFlow {
       return null;
     }
 
+    // While a manual/SDK compaction holds the context, defer the launch instead
+    // of rejecting it: buffer the input and replay it from `onCompactionFinished`
+    // once compaction's full lifecycle (summary + reinjection) completes. The
+    // deferred turn's eventual `turn.started` lets PromptService associate the
+    // pending prompt, so a prompt submitted mid-compaction completes normally
+    // rather than getting stuck "running". (Auto compaction runs inside an active
+    // turn, so the `activeTurn` check above already covers it.)
+    if (this.agent.fullCompaction.isCompacting) {
+      this.steerBuffer.push({ input, origin });
+      return null;
+    }
+
     // Per-turn setup (telemetry, usage window, `turn.started`, appending the
     // prompt) now lives in `runOneTurn`, so a goal-driven run emits a clean
     // start/end pair per continuation turn rather than one mega-turn.
@@ -289,6 +305,25 @@ export class TurnFlow {
     return true;
   }
 
+  /**
+   * Replay inputs (prompts or steers) that were deferred while a manual compaction
+   * held the context. Called by `FullCompaction` once the compaction lifecycle
+   * (summary + reinjection) is done — and on cancel/failure — so deferred input is
+   * never lost or stuck. If a turn is somehow already active (e.g. one that raced
+   * and cancelled the compaction), let it consume the buffer like any other steer;
+   * otherwise launch a fresh turn from the first buffered item, with the rest
+   * draining into it via `flushSteerBuffer`.
+   */
+  onCompactionFinished(): void {
+    if (this.steerBuffer.length === 0) return;
+    if (this.activeTurn !== null) {
+      this.flushSteerBuffer();
+      return;
+    }
+    const next = this.steerBuffer.shift()!;
+    this.launch(next.input, next.origin);
+  }
+
   finishResume(): void {
     if (this.activeTurn === 'resuming') {
       this.activeTurn = null;
@@ -662,9 +697,15 @@ export class TurnFlow {
           },
           hooks: {
             beforeStep: async ({ signal: stepSignal }) => {
-              this.flushSteerBuffer();
               this.agent.microCompaction.detect();
               await this.agent.fullCompaction.beforeStep(stepSignal);
+              // Flush steered messages (background-task / cron notifications,
+              // user interrupts) AFTER compaction so they land in the
+              // post-compaction context instead of being dropped by it. The
+              // keep/drop decision lives in
+              // `compactionUserMessageDisposition()`; these origins are not
+              // re-injected later, so append them only after compaction runs.
+              this.flushSteerBuffer();
               await this.agent.injection.inject();
               deduper.beginStep();
               return;
diff --git a/packages/agent-core/src/flags/registry.ts b/packages/agent-core/src/flags/registry.ts
index 16f88d592..fcce75ece 100644
--- a/packages/agent-core/src/flags/registry.ts
+++ b/packages/agent-core/src/flags/registry.ts
@@ -17,7 +17,7 @@ export const FLAG_DEFINITIONS = [
     title: 'Micro compaction',
     description: 'Trim older large tool results from context while keeping recent conversation intact.',
     env: 'KIMI_CODE_EXPERIMENTAL_MICRO_COMPACTION',
-    default: true,
+    default: false,
     surface: 'core',
   },
 ] as const satisfies readonly FlagDefinitionInput[];
diff --git a/packages/agent-core/src/index.ts b/packages/agent-core/src/index.ts
index 14dcec22a..ae63a8604 100644
--- a/packages/agent-core/src/index.ts
+++ b/packages/agent-core/src/index.ts
@@ -62,6 +62,12 @@ export type {
 export { AGENT_WIRE_PROTOCOL_VERSION } from './agent/records';
 export type { AgentConfigUpdateData } from './agent/config';
 export type { CompactionBeginData, CompactionResult } from './agent/compaction';
+export {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+} from './agent/compaction';
 export type {
   PermissionApprovalResultRecord,
   PermissionMode,
diff --git a/packages/agent-core/src/profile/default/system.md b/packages/agent-core/src/profile/default/system.md
index d1102d395..9934290dc 100644
--- a/packages/agent-core/src/profile/default/system.md
+++ b/packages/agent-core/src/profile/default/system.md
@@ -143,4 +143,5 @@ At any time, you should be HELPFUL, CONCISE, ACCURATE, and CANDID. Be thorough i
 - Deliver the complete change. Never stub out code with placeholders like `// ... rest unchanged` or leave the user to fill in the gaps; write out every line you mean to change.
 - After a change, sweep for comments and docstrings that now describe the old behavior, and bring them in line with what the code actually does.
 - Before calling a task done, verify it: run the checks that cover your change and look at the result instead of assuming. Don't mark work complete while tests are red or the implementation is still partial — this holds whether or not you are tracking the work in a `TodoList`.
+- When the context fills up it is compacted automatically, so you may suddenly see a summary of the work so far in place of the full thread. Assume compaction happened while you were working: continue naturally from the summary instead of restarting, and make reasonable assumptions about anything it omits rather than redoing settled work. Treat any "done" it reports as unverified until you re-check.
 - Before you finalize a reply, re-read the user's latest request and confirm you are answering that one — not an earlier ask left over from a resume, interruption, mid-task steer, or context compaction.
diff --git a/packages/agent-core/src/services/message/transcript.ts b/packages/agent-core/src/services/message/transcript.ts
index 5003e39e2..a8563370e 100644
--- a/packages/agent-core/src/services/message/transcript.ts
+++ b/packages/agent-core/src/services/message/transcript.ts
@@ -3,8 +3,10 @@
  * agent from its `wire.jsonl` record log.
  *
  * Why: `ContextMemory.applyCompaction` rewrites the in-memory history as
- * `[compaction_summary, ...tail]`, so `getContext().history` only reflects the
- * model's CURRENT context. The wire log, however, keeps every record. The TUI
+ * `[...keptUserMessages, compaction_summary]` (the most recent real user
+ * prompts, verbatim within a token budget, followed by a single user-role
+ * summary), so `getContext().history` only reflects the model's CURRENT
+ * context. The wire log, however, keeps every record. The TUI
  * shows the full transcript on resume because `ReplayBuilder` captures every
  * `pushHistory` during record replay and is never folded by compaction. This
  * module reproduces that exact view for daemon REST consumers (web), without
@@ -19,8 +21,11 @@
  *                                     open assistant message; tool.result appends a
  *                                     tool message with the same `<system>` status
  *                                     wrapping as `toolResultOutputForModel`
- *   - `context.apply_compaction`    → keep the prefix, insert the summary message
- *                                     at the fold point (origin `compaction_summary`)
+ *   - `context.apply_compaction`    → keep the full history, append the
+ *                                     user-role summary marker (origin
+ *                                     `compaction_summary`), and recover
+ *                                     `foldedLength` from the recorded
+ *                                     `keptUserMessageCount`
  *   - `context.undo`                → remove tail messages exactly like
  *                                     `ContextMemory.undo` (skip injections, stop at
  *                                     compaction summaries / `context.clear` floors)
@@ -45,6 +50,12 @@ import path from 'node:path';
 import type { AgentRecord } from '../../agent/records';
 import type { ContextMessage } from '../../agent/context';
 import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop';
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+} from '../../agent/compaction';
 
 type ContentPart = ContextMessage['content'][number];
 
@@ -212,7 +223,7 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
       if (message.origin?.kind === 'compaction_summary') break;
       transcript.splice(i, 1);
       foldedLength = Math.max(0, foldedLength - 1);
-      if (isRealUserPrompt(message)) {
+      if (isRealUserInput(message)) {
         removedUserCount++;
         if (removedUserCount >= count) break;
       }
@@ -238,22 +249,58 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
         applyLoopEvent(record.event, record.time);
         break;
       case 'context.apply_compaction': {
-        // ContextMemory drops history[0..compactedCount] and prepends the
-        // summary; we keep the prefix and insert the summary at the fold
-        // point so the transcript shows both.
-        const tailLength = Math.max(0, foldedLength - record.compactedCount);
-        transcript.splice(Math.max(0, transcript.length - tailLength), 0, {
+        // Mirrors ContextMemory.applyCompaction: the live context becomes the
+        // most recent user messages followed by a user-role summary. The
+        // transcript keeps the full history and appends the summary marker;
+        // foldedLength tracks the post-compaction live context length.
+        transcript.push({
           message: {
-            role: 'assistant',
+            role: 'user',
             content: [{ type: 'text', text: record.summary }],
             toolCalls: [],
             origin: { kind: 'compaction_summary' },
           },
           time: record.time,
         });
-        foldedLength = tailLength + 1;
-        openSteps.clear();
-        flushDeferredIfToolExchangeClosed();
+        // Prefer the kept-user count recorded by the live
+        // ContextMemory.applyCompaction. Re-deriving it from the full
+        // transcript would diverge from the live context: the transcript still
+        // holds the untruncated originals of messages the live context may
+        // have truncated, and (after a clear) messages the live context no
+        // longer has. Only fall back to re-deriving for legacy wire records
+        // that predate the field.
+        if (record.keptUserMessageCount !== undefined) {
+          foldedLength = record.keptUserMessageCount + 1;
+        } else if (record.compactedCount < foldedLength) {
+          // Legacy record (predates keptUserMessageCount) that kept
+          // history.slice(compactedCount) verbatim. Mirror ContextMemory's
+          // legacy restore ([summary, ...tail]): `foldedLength` here still holds
+          // the pre-compaction live length, so the post-compaction length is the
+          // summary plus the tail kept after compactedCount. Re-deriving the
+          // kept-user count instead would diverge from the live context (and
+          // make MessageService mis-handle the messages endpoint for old sessions).
+          foldedLength = 1 + (foldedLength - record.compactedCount);
+        } else {
+          // Legacy record whose compactedCount covered the whole live history (no
+          // tail, matching live restore's `compactedCount < length` guard): fall
+          // back to the new kept-user + summary derivation. Derive only from
+          // entries at or after `clearFloor` — the live ContextMemory rebuilds
+          // `_history` from the post-`/clear` messages only, so counting pre-clear
+          // prompts here would overstate foldedLength and make MessageService skip
+          // unflushed live tail messages for old sessions compacted after a clear.
+          const keptUserMessages = selectRecentUserMessages(
+            collectCompactableUserMessages(
+              transcript.slice(clearFloor).map((entry) => entry.message),
+            ),
+            COMPACT_USER_MESSAGE_MAX_TOKENS,
+          );
+          foldedLength = keptUserMessages.length + 1;
+        }
+        // Drop any open tool exchange and deferred messages exactly like
+        // ContextMemory.applyCompaction: late tool results become orphans and
+        // deferred injections are not rebuilt, so pending ids must not strand
+        // later appends in `deferred`.
+        resetOpenState();
         break;
       }
       case 'context.undo':
@@ -272,20 +319,6 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
   return { entries: transcript as TranscriptEntry[], foldedLength };
 }
 
-/** Mirrors agent-core's `isRealUserPrompt` (context undo accounting). */
-function isRealUserPrompt(message: MutableMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') {
-    return origin.trigger === 'user-slash';
-  }
-  if (origin.kind === 'plugin_command') {
-    return origin.trigger === 'user-slash';
-  }
-  return false;
-}
-
 /** Mirrors agent-core's `toolResultOutputForModel` + `createToolMessage`. */
 function toolResultContent(result: ExecutableToolResult): ContentPart[] {
   const output = result.output;
diff --git a/packages/agent-core/src/services/session/sessionService.ts b/packages/agent-core/src/services/session/sessionService.ts
index da1eb4fa8..009a63b80 100644
--- a/packages/agent-core/src/services/session/sessionService.ts
+++ b/packages/agent-core/src/services/session/sessionService.ts
@@ -1,6 +1,7 @@
 import { Disposable, IInstantiationService, InstantiationType, registerSingleton } from '../../di';
 import { Emitter } from '../../base/common/event';
 import { ErrorCodes, KimiError } from '../../errors';
+import { isRealUserInput } from '../../agent/compaction';
 import type { AgentContextData, ContextMessage } from '../../agent/context';
 import type { JsonObject, ListSessionsPayload, SessionSummary } from '../../rpc';
 import type { SessionMeta } from '../../session';
@@ -59,7 +60,7 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool
     if (message === undefined) continue;
     if (message.origin?.kind === 'injection') continue;
     if (message.origin?.kind === 'compaction_summary') return false;
-    if (isRealUserPrompt(message)) {
+    if (isRealUserInput(message)) {
       found++;
       if (found >= count) return true;
     }
@@ -67,19 +68,6 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool
   return false;
 }
 
-function isRealUserPrompt(message: ContextMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') {
-    return origin.trigger === 'user-slash';
-  }
-  if (origin.kind === 'plugin_command') {
-    return origin.trigger === 'user-slash';
-  }
-  return false;
-}
-
 function pageContextMessages(
   sessionId: string,
   sessionCreatedAtMs: number,
diff --git a/packages/agent-core/src/session/index.ts b/packages/agent-core/src/session/index.ts
index ab68824f9..6b64dab19 100644
--- a/packages/agent-core/src/session/index.ts
+++ b/packages/agent-core/src/session/index.ts
@@ -473,7 +473,7 @@ export class Session {
       this.options.kimiHomeDir,
       { additionalDirs: this.additionalDirs },
     );
-    agent.useProfile(profile, context);
+    agent.useProfile(profile, context, this.options.kimiHomeDir);
     const { agentsMdWarning } = context;
     if (agentsMdWarning !== undefined) {
       this.agentsMdWarning = agentsMdWarning;
@@ -725,7 +725,8 @@ export class Session {
   ): Agent {
     const parentAgent = parentAgentId !== null ? this.getReadyAgent(parentAgentId) : undefined;
     const cwd = parentAgent?.config.cwd ?? this.toolKaos.getcwd();
-    return new Agent({
+    let agent!: Agent;
+    agent = new Agent({
       ...config,
       type,
       kaos: this.toolKaos.withCwd(cwd),
@@ -745,7 +746,14 @@ export class Session {
       pluginCommands: type === 'main' ? this.options.pluginCommands : undefined,
       experimentalFlags: this.experimentalFlags,
       additionalDirs: parentAgent?.getAdditionalDirs() ?? this.additionalDirs,
+      systemPromptContextProvider: () =>
+        prepareSystemPromptContext(
+          this.systemContextKaos(agent.kaos.getcwd()),
+          this.options.kimiHomeDir,
+          { additionalDirs: agent.getAdditionalDirs() },
+        ),
     });
+    return agent;
   }
 
   private permissionOptions(
@@ -818,6 +826,7 @@ export class Session {
     try {
       const agent = this.instantiateAgent(id, meta.homedir, meta.type, {}, parentAgentId);
       const result = await agent.resume();
+      this.restoreAgentProfileHandle(agent, meta, parent?.agent);
       this.agents.set(id, agent);
       return { agent, warning: parent?.warning ?? result.warning };
     } catch (error) {
@@ -829,6 +838,34 @@ export class Session {
     }
   }
 
+  private restoreAgentProfileHandle(
+    agent: Agent,
+    meta: AgentMeta,
+    parentAgent: Agent | undefined,
+  ): void {
+    if (agent.config.systemPrompt === '') return;
+    const profile = this.resolvePersistedProfile(agent, meta, parentAgent);
+    if (profile === undefined) return;
+    agent.setActiveProfile(profile, this.options.kimiHomeDir);
+  }
+
+  private resolvePersistedProfile(
+    agent: Agent,
+    meta: AgentMeta,
+    parentAgent: Agent | undefined,
+  ): ResolvedAgentProfile | undefined {
+    const profileName = agent.config.profileName;
+    if (profileName === undefined) return undefined;
+    if (meta.type === 'sub') {
+      const parentProfileName = parentAgent?.config.profileName;
+      return (
+        DEFAULT_AGENT_PROFILES[parentProfileName ?? 'agent']?.subagents?.[profileName] ??
+        DEFAULT_AGENT_PROFILES['agent']?.subagents?.[profileName]
+      );
+    }
+    return DEFAULT_AGENT_PROFILES[profileName];
+  }
+
   private nextGeneratedAgentId(): string {
     while (true) {
       const id = `agent-${this.agentIdCounter++}`;
diff --git a/packages/agent-core/src/session/subagent-host.ts b/packages/agent-core/src/session/subagent-host.ts
index 1e6e249cf..7aa81fdf3 100644
--- a/packages/agent-core/src/session/subagent-host.ts
+++ b/packages/agent-core/src/session/subagent-host.ts
@@ -374,7 +374,7 @@ export class SessionSubagentHost {
       this.session.options.kimiHomeDir,
       { additionalDirs: child.getAdditionalDirs() },
     );
-    child.useProfile(profile, context);
+    child.useProfile(profile, context, this.session.options.kimiHomeDir);
     child.tools.inheritUserTools(parent.tools);
   }
 
diff --git a/packages/agent-core/src/tools/background/task-list.ts b/packages/agent-core/src/tools/background/task-list.ts
index 2d39e7972..a1bdb1489 100644
--- a/packages/agent-core/src/tools/background/task-list.ts
+++ b/packages/agent-core/src/tools/background/task-list.ts
@@ -34,7 +34,7 @@ export type TaskListInput = z.Infer<typeof TaskListInputSchema>;
 
 // ── Implementation ───────────────────────────────────────────────────
 
-function formatTaskList(tasks: BackgroundTaskInfo[], activeOnly: boolean): string {
+export function formatTaskList(tasks: BackgroundTaskInfo[], activeOnly: boolean): string {
   // `active_only=false` mixes in terminal/lost tasks, so the count is no
   // longer purely "active" — use a neutral label to avoid mislabeling them.
   const label = activeOnly ? 'active_background_tasks' : 'background_tasks';
diff --git a/packages/agent-core/src/utils/tokens.ts b/packages/agent-core/src/utils/tokens.ts
index fe567f732..845e2024b 100644
--- a/packages/agent-core/src/utils/tokens.ts
+++ b/packages/agent-core/src/utils/tokens.ts
@@ -1,6 +1,19 @@
 import type { ContentPart, Message, Tool } from '@moonshot-ai/kosong';
 
-const messageTokenEstimateCache = new WeakMap<Message, number>();
+/**
+ * Structural subset of kosong's {@link Message} that token estimation reads.
+ * Accepting the subset (instead of the full `Message`) lets callers with
+ * message-shaped objects — such as the compaction helpers in `handoff.ts`,
+ * which carry only `role`/`content`/`origin` — estimate tokens without an
+ * unsafe cast, while full `Message` values still satisfy it.
+ */
+interface TokenEstimatableMessage {
+  readonly role: string;
+  readonly content: readonly ContentPart[];
+  readonly toolCalls?: readonly { readonly name: string; readonly arguments: unknown }[];
+}
+
+const messageTokenEstimateCache = new WeakMap<TokenEstimatableMessage, number>();
 
 /**
  * Estimate token count from text using a character-based heuristic.
@@ -41,7 +54,7 @@ export function estimateTokensForTools(tools: readonly Tool[]): number {
   return total;
 }
 
-export function estimateTokensForMessage(message: Message): number {
+export function estimateTokensForMessage(message: TokenEstimatableMessage): number {
   const cached = messageTokenEstimateCache.get(message);
   if (cached !== undefined) {
     return cached;
@@ -67,11 +80,35 @@ export function estimateTokensForContentParts(parts: readonly ContentPart[]): nu
   return total;
 }
 
+/**
+ * Transient per-part token floor for media (image/audio/video) whose real size
+ * cannot be cheaply derived from a data URL without decoding it. Mirrors the
+ * fixed ~2000-tokens-per-image estimate used elsewhere in the industry and, by
+ * the same reasoning, deliberately does NOT count the base64 payload as text —
+ * that would wildly over-count (a few MB of data URL would read as ~1M tokens).
+ * The value is transient: the next LLM round-trip returns the real usage and
+ * supersedes it. Its only job is to stop compaction triggers, the
+ * overflow-shrink budget, the kept-user budget, and `tokensAfter` from treating
+ * media parts as free.
+ */
+export const MEDIA_TOKEN_ESTIMATE = 2000;
+
 export function estimateTokensForContentPart(part: ContentPart): number {
-  if (part.type === 'text') {
-    return estimateTokens(part.text);
-  } else if (part.type === 'think') {
-    return estimateTokens(part.think);
+  switch (part.type) {
+    case 'text':
+      return estimateTokens(part.text);
+    case 'think':
+      return estimateTokens(part.think);
+    case 'image_url':
+    case 'audio_url':
+    case 'video_url':
+      return MEDIA_TOKEN_ESTIMATE;
+    default: {
+      // Exhaustiveness guard: a new ContentPart kind must declare its estimate
+      // here rather than silently counting as 0 (the CMP-03 defect).
+      const _exhaustive: never = part;
+      void _exhaustive;
+      return 0;
+    }
   }
-  return 0;
 }
diff --git a/packages/agent-core/test/agent/basic.test.ts b/packages/agent-core/test/agent/basic.test.ts
index 1c9bfec61..2ecaf615e 100644
--- a/packages/agent-core/test/agent/basic.test.ts
+++ b/packages/agent-core/test/agent/basic.test.ts
@@ -9,7 +9,7 @@ it('creates an independent agent with a scoped experimental flag resolver', () =
     experimentalFlags: new FlagResolver({}, FLAG_DEFINITIONS),
   });
 
-  expect(ctx.agent.experimentalFlags.enabled('micro_compaction')).toBe(true);
+  expect(ctx.agent.experimentalFlags.enabled('micro_compaction')).toBe(false);
 });
 
 it('runs a text-only agent turn from prompt to completion', async () => {
diff --git a/packages/agent-core/test/agent/compaction/anthropic-compliance.test.ts b/packages/agent-core/test/agent/compaction/anthropic-compliance.test.ts
new file mode 100644
index 000000000..521f0cbd3
--- /dev/null
+++ b/packages/agent-core/test/agent/compaction/anthropic-compliance.test.ts
@@ -0,0 +1,248 @@
+// Anthropic-compliance smoke tests for compaction.
+//
+// Anthropic (and strict Anthropic-compatible backends) reject a request unless
+// roles strictly alternate user/assistant AND every assistant `tool_use` is
+// answered by a matching `tool_result` in the immediately following message.
+// Compaction's output and its summarizer request must satisfy both — but the
+// guarantee spans two layers: the projector merges only `origin.kind === 'user'`
+// messages, so the user-role summary, skill/plugin activations, and injected
+// reminders stay as CONSECUTIVE user messages in the projected output, and it is
+// the Anthropic provider's own consecutive-user merge that finally collapses
+// them. Tool pairing likewise depends on the projector's adjacency repair and
+// (for the summarizer request) synthetic results for still-open calls.
+//
+// These tests drive the real compaction/projection functions, run their output
+// through the real AnthropicChatProvider conversion, and assert the wire request
+// is well-formed — so a regression in any single layer turns red here.
+import { createProvider } from '@moonshot-ai/kosong';
+import type { Message, Tool } from '@moonshot-ai/kosong';
+import { describe, expect, it, vi } from 'vitest';
+
+import type { ContextMessage } from '../../../src/agent/context';
+import { testAgent } from '../harness/agent';
+
+const PROVIDER = { type: 'kimi', apiKey: 'test-key', model: 'kimi-code' } as const;
+const CAPS = {
+  image_in: true,
+  video_in: true,
+  audio_in: false,
+  thinking: true,
+  tool_use: true,
+  max_context_tokens: 256_000,
+} as const;
+
+type WireBlock = { type: string; id?: string; tool_use_id?: string; text?: string };
+type WireMessage = { role: string; content: WireBlock[] };
+
+function makeAnthropicResponse() {
+  return {
+    id: 'msg_test_smoke',
+    type: 'message',
+    role: 'assistant',
+    model: 'k25',
+    content: [{ type: 'text', text: 'ok' }],
+    stop_reason: 'end_turn',
+    usage: { input_tokens: 1, output_tokens: 1 },
+  };
+}
+
+/**
+ * Convert a projected `Message[]` through the real Anthropic provider and return
+ * the wire `messages` it would POST — mirroring kosong's own captureRequestBody.
+ */
+async function toAnthropicWire(history: Message[], tools: Tool[] = []): Promise<WireMessage[]> {
+  const provider = createProvider({
+    type: 'anthropic',
+    model: 'k25',
+    apiKey: 'test-key',
+    defaultMaxTokens: 1024,
+    stream: false,
+  });
+  let captured: { messages?: WireMessage[] } | undefined;
+  (provider as unknown as { _client: { messages: { create: unknown } } })._client.messages.create =
+    vi.fn().mockImplementation((params: unknown) => {
+      captured = params as { messages?: WireMessage[] };
+      return Promise.resolve(makeAnthropicResponse());
+    });
+
+  const stream = await provider.generate('', tools, history);
+  for await (const part of stream) {
+    void part;
+  }
+  if (captured?.messages === undefined) {
+    throw new Error('Expected provider.generate() to call messages.create with messages');
+  }
+  return captured.messages;
+}
+
+/** Assert the wire request satisfies Anthropic's alternation + tool-pairing rules. */
+function assertValidAnthropic(messages: WireMessage[]): void {
+  expect(messages.length).toBeGreaterThan(0);
+  expect(messages[0]!.role).toBe('user');
+
+  for (let i = 1; i < messages.length; i++) {
+    expect(
+      messages[i]!.role,
+      `roles must alternate, but messages[${String(i - 1)}] and [${String(i)}] are both ${messages[i]!.role}`,
+    ).not.toBe(messages[i - 1]!.role);
+  }
+
+  for (let i = 0; i < messages.length; i++) {
+    const message = messages[i]!;
+    for (const block of message.content) {
+      if (block.type === 'tool_use') {
+        expect(message.role, 'tool_use must be on an assistant message').toBe('assistant');
+        const next = messages[i + 1];
+        const answered =
+          next?.content.some((b) => b.type === 'tool_result' && b.tool_use_id === block.id) ?? false;
+        expect(answered, `tool_use ${String(block.id)} must be answered in the next message`).toBe(
+          true,
+        );
+      }
+      if (block.type === 'tool_result') {
+        expect(message.role, 'tool_result must be on a user message').toBe('user');
+        const prev = messages[i - 1];
+        const hasUse =
+          prev?.content.some((b) => b.type === 'tool_use' && b.id === block.tool_use_id) ?? false;
+        expect(
+          hasUse,
+          `tool_result ${String(block.tool_use_id)} must immediately follow its tool_use`,
+        ).toBe(true);
+      }
+    }
+  }
+}
+
+const BASH_TOOL: Tool = {
+  name: 'Bash',
+  description: 'Run a shell command',
+  parameters: { type: 'object', properties: { command: { type: 'string' } } },
+};
+
+describe('compaction — Anthropic wire compliance', () => {
+  it('post-compaction context plus a follow-up tool turn is a valid Anthropic request', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    // A couple of real user prompts so some survive compaction verbatim.
+    ctx.appendExchange(1, 'first request', 'assistant one', 40);
+    ctx.appendExchange(2, 'second request', 'assistant two', 40);
+
+    ctx.agent.context.applyCompaction({
+      summary: 'Working summary.',
+      compactedCount: ctx.agent.context.history.length,
+      tokensBefore: 100,
+    });
+    // A follow-up turn that calls a tool, appended after the summary.
+    ctx.appendToolExchange();
+
+    const wire = await toAnthropicWire(ctx.agent.context.messages, [BASH_TOOL]);
+    // [merged kept users + summary + new user] -> one user; then assistant
+    // tool_use; then user tool_result.
+    assertValidAnthropic(wire);
+    expect(wire.some((m) => m.content.some((b) => b.type === 'tool_use'))).toBe(true);
+    expect(wire.some((m) => m.content.some((b) => b.type === 'tool_result'))).toBe(true);
+  });
+
+  it('collapses mixed-origin kept users and the summary into a single Anthropic user turn', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    // Genuine user input the projector merges, plus a user-slash skill activation
+    // it does NOT merge (different origin) — both kept by compaction.
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'real prompt' }], { kind: 'user' });
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: '/do-thing' }], {
+      kind: 'skill_activation',
+      activationId: 'a1',
+      skillName: 'do-thing',
+      trigger: 'user-slash',
+    });
+
+    ctx.agent.context.applyCompaction({
+      summary: 'Working summary.',
+      compactedCount: ctx.agent.context.history.length,
+      tokensBefore: 100,
+    });
+
+    // Projected output still has consecutive user messages (skill + summary are
+    // not merged by the projector); only the Anthropic merge collapses them.
+    const projected = ctx.agent.context.messages;
+    expect(projected.filter((m) => m.role === 'user').length).toBeGreaterThan(1);
+
+    const wire = await toAnthropicWire(projected);
+    assertValidAnthropic(wire);
+    expect(wire).toHaveLength(1);
+    expect(wire[0]!.role).toBe('user');
+  });
+
+  it('keeps the request valid across repeated compactions', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.appendExchange(1, 'first request', 'assistant one', 40);
+    ctx.agent.context.applyCompaction({
+      summary: 'First summary.',
+      compactedCount: ctx.agent.context.history.length,
+      tokensBefore: 100,
+    });
+    ctx.appendExchange(2, 'second request', 'assistant two', 40);
+    ctx.agent.context.applyCompaction({
+      summary: 'Second summary.',
+      compactedCount: ctx.agent.context.history.length,
+      tokensBefore: 100,
+    });
+    ctx.appendToolExchange();
+
+    const wire = await toAnthropicWire(ctx.agent.context.messages, [BASH_TOOL]);
+    assertValidAnthropic(wire);
+  });
+
+  it('produces a valid summarizer request when a tool result is non-adjacent to its call', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    // A background-task notification (user role) landed between the tool call and
+    // its result, so they are non-adjacent in history.
+    const messy: ContextMessage[] = [
+      { role: 'user', content: [{ type: 'text', text: 'run it' }], toolCalls: [], origin: { kind: 'user' } },
+      {
+        role: 'assistant',
+        content: [{ type: 'text', text: 'calling' }],
+        toolCalls: [{ type: 'function', id: 'call_1', name: 'Bash', arguments: '{"command":"ls"}' }],
+      },
+      {
+        role: 'user',
+        content: [{ type: 'text', text: 'background task finished' }],
+        toolCalls: [],
+        origin: { kind: 'background_task', taskId: 't', status: 'completed', notificationId: 'n' },
+      },
+      { role: 'tool', content: [{ type: 'text', text: 'a.ts b.ts' }], toolCalls: [], toolCallId: 'call_1' },
+    ];
+
+    // Mirrors FullCompaction's summarizer projection.
+    const projected = ctx.agent.context.project(messy, { synthesizeMissing: true });
+    const wire = await toAnthropicWire(projected, [BASH_TOOL]);
+    assertValidAnthropic(wire);
+  });
+
+  it('closes a still-open tool call in the summarizer request with a synthetic result', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    // History ends on an assistant tool call whose result never arrived (sliced
+    // out by overflow shrink, or interrupted) — a dangling tool_use.
+    const dangling: ContextMessage[] = [
+      { role: 'user', content: [{ type: 'text', text: 'do it' }], toolCalls: [], origin: { kind: 'user' } },
+      {
+        role: 'assistant',
+        content: [{ type: 'text', text: 'calling' }],
+        toolCalls: [{ type: 'function', id: 'call_x', name: 'Bash', arguments: '{}' }],
+      },
+    ];
+
+    const projected = ctx.agent.context.project(dangling, { synthesizeMissing: true });
+    const wire = await toAnthropicWire(projected, [BASH_TOOL]);
+    assertValidAnthropic(wire);
+    // The dangling call is closed by a synthetic tool_result.
+    const lastUser = wire.at(-1)!;
+    expect(lastUser.role).toBe('user');
+    expect(lastUser.content.some((b) => b.type === 'tool_result' && b.tool_use_id === 'call_x')).toBe(
+      true,
+    );
+  });
+});
diff --git a/packages/agent-core/test/agent/compaction/compaction-scenarios.test.ts b/packages/agent-core/test/agent/compaction/compaction-scenarios.test.ts
new file mode 100644
index 000000000..dd22ec6c5
--- /dev/null
+++ b/packages/agent-core/test/agent/compaction/compaction-scenarios.test.ts
@@ -0,0 +1,433 @@
+// Compaction scenario + probe tests.
+//
+// Two kinds of tests live here:
+//   * GUARD tests lock in behavior we rely on (so future refactors can't
+//     silently regress it).
+//   * PROBE tests exercise the high-risk scenarios surfaced in review and in
+//     our own audit, asserting the DESIRED behavior. Where the current
+//     implementation does NOT meet that bar, the probe is marked `it.fails`:
+//     the suite stays green, but the test documents the exact defect and will
+//     start failing (forcing its removal) the day the behavior is fixed.
+//
+// Compaction is a hot path, so these intentionally drive the real
+// Agent/ContextMemory/FullCompaction machinery through the test harness rather
+// than mocking it.
+import type { ContentPart, Message } from '@moonshot-ai/kosong';
+import { describe, expect, it } from 'vitest';
+
+import type { AgentOptions } from '../../../src/agent';
+import { COMPACTION_SUMMARY_PREFIX } from '../../../src/agent/compaction';
+import type { ContextMessage } from '../../../src/agent/context';
+import { FLAG_DEFINITIONS, FlagResolver } from '../../../src/flags';
+import { testAgent, type TestAgentContext } from '../harness/agent';
+
+type GenerateFn = NonNullable<AgentOptions['generate']>;
+
+const PROVIDER = { type: 'kimi', apiKey: 'test-key', model: 'kimi-code' } as const;
+const CAPS = {
+  image_in: true,
+  video_in: true,
+  audio_in: false,
+  thinking: true,
+  tool_use: true,
+  max_context_tokens: 256_000,
+} as const;
+
+function textResult(text: string): Awaited<ReturnType<GenerateFn>> {
+  return {
+    id: 'mock-compaction-summary',
+    message: { role: 'assistant', content: [{ type: 'text', text }], toolCalls: [] },
+    usage: { inputOther: 1, output: 1, inputCacheRead: 0, inputCacheCreation: 0 },
+    finishReason: 'completed',
+    rawFinishReason: 'stop',
+  };
+}
+
+function historyTexts(ctx: TestAgentContext): string[] {
+  return ctx.agent.context.history.map((message) =>
+    message.content.map((part) => (part.type === 'text' ? part.text : `[${part.type}]`)).join(''),
+  );
+}
+
+function summaryMessageText(ctx: TestAgentContext): string {
+  const summary = ctx.agent.context.history.find(
+    (message) => message.origin?.kind === 'compaction_summary',
+  );
+  return summary?.content.map((part) => (part.type === 'text' ? part.text : '')).join('') ?? '';
+}
+
+describe('compaction — guard tests', () => {
+  it('repeated compaction folds the prior summary into the new one, never stacking two summaries', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.appendExchange(1, 'user one', 'assistant one', 40);
+
+    ctx.mockNextResponse({ type: 'text', text: 'First summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'user two' }]);
+    ctx.mockNextResponse({ type: 'text', text: 'Second summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    const summaries = ctx.agent.context.history.filter(
+      (message) => message.origin?.kind === 'compaction_summary',
+    );
+    // Exactly one summary survives; the first was re-summarized, not carried.
+    expect(summaries).toHaveLength(1);
+    expect(summaryMessageText(ctx)).toContain('Second summary.');
+    expect(historyTexts(ctx).join('\n')).not.toContain('First summary.');
+  });
+
+  it('closes a dangling tool_use in the compaction summary request via synthesizeMissing', async () => {
+    // Full compaction projects its summarizer input with { synthesizeMissing: true }
+    // so an unresolved tool_use (whose result is sliced out / not yet recorded)
+    // is answered by a synthetic tool_result — keeping the summary request
+    // well-formed for strict providers instead of 400-ing on a dangling call.
+    let summarizerMessages: Message[] | undefined;
+    const capture: GenerateFn = async (_provider, _system, _tools, messages) => {
+      summarizerMessages = messages;
+      return textResult('Compacted summary.');
+    };
+    const ctx = testAgent({ generate: capture });
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.appendUnresolvedToolExchange(0); // assistant with 2 tool calls, no results
+
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    const msgs = summarizerMessages ?? [];
+    const assistantIndex = msgs.findIndex(
+      (message) => message.role === 'assistant' && message.toolCalls.length > 0,
+    );
+    expect(assistantIndex).toBeGreaterThanOrEqual(0);
+    for (const toolCall of msgs[assistantIndex]!.toolCalls) {
+      const answered = msgs
+        .slice(assistantIndex + 1)
+        .some((message) => message.role === 'tool' && message.toolCallId === toolCall.id);
+      expect(answered).toBe(true);
+    }
+  });
+
+  // Mutual exclusion: compaction and turn processing must not run concurrently,
+  // or a turn mutating the context mid-summary loses output. Auto compaction is
+  // structurally safe (it runs while the turn blocks at a step boundary); the
+  // manual/SDK path is guarded explicitly here.
+  it('rejects a manual compaction while a turn is active', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'seed' }], { kind: 'user' });
+    ctx.mockNextResponse({ type: 'text', text: 'turn done' });
+
+    // launch() sets the active turn synchronously, so a turn is active before the
+    // worker yields — exactly the window an SDK beginCompaction could land in.
+    ctx.agent.turn.prompt([{ type: 'text', text: 'go' }]);
+    expect(ctx.agent.turn.hasActiveTurn).toBe(true);
+
+    await expect(ctx.rpc.beginCompaction({})).rejects.toThrow(/turn/i);
+
+    await ctx.agent.turn.waitForCurrentTurn();
+  });
+
+  it('defers a prompt submitted during compaction and runs it afterward', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.appendExchange(1, 'user one', 'assistant one', 40);
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    ctx.mockNextResponse({ type: 'text', text: 'answer to the deferred prompt' });
+
+    // begin() sets the compacting flag synchronously before the summarizer yields.
+    void ctx.rpc.beginCompaction({});
+    expect(ctx.agent.fullCompaction.isCompacting).toBe(true);
+
+    // A prompt arriving mid-compaction is buffered (deferred), not rejected: null
+    // means "not launched now", and it must run once compaction finishes.
+    const turnId = ctx.agent.turn.prompt([{ type: 'text', text: 'DEFERRED-PROMPT' }]);
+    expect(turnId).toBeNull();
+
+    await ctx.once('compaction.completed');
+    await ctx.agent.turn.waitForCurrentTurn();
+
+    // Ran after compaction — neither lost nor stuck.
+    expect(historyTexts(ctx).join('\n')).toContain('DEFERRED-PROMPT');
+  });
+
+  it('defers a steer arriving during compaction and delivers it afterward', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.appendExchange(1, 'user one', 'assistant one', 40);
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    ctx.mockNextResponse({ type: 'text', text: 'handled the steer' });
+
+    void ctx.rpc.beginCompaction({});
+    expect(ctx.agent.fullCompaction.isCompacting).toBe(true);
+
+    // A background-task/cron steer mid-compaction must be buffered (null = buffered,
+    // which is exactly what those fire-and-forget callers assume), not dropped.
+    const turnId = ctx.agent.turn.steer([{ type: 'text', text: 'DEFERRED-STEER' }], {
+      kind: 'background_task',
+      taskId: 't',
+      status: 'completed',
+      notificationId: 'n',
+    });
+    expect(turnId).toBeNull();
+
+    await ctx.once('compaction.completed');
+    await ctx.agent.turn.waitForCurrentTurn();
+
+    expect(historyTexts(ctx).join('\n')).toContain('DEFERRED-STEER');
+  });
+});
+
+describe('compaction — probe tests (high-risk scenarios)', () => {
+  // PROBE #1 / CMP-02 — messages appended while the summarizer request is in
+  // flight (a live step racing a manual/SDK compaction). The summary only covers
+  // the pre-compaction snapshot, and the all-user rebuild would drop the appended
+  // assistant/tool tail — so compaction detects the changed history and cancels,
+  // leaving the appended turn intact for a later clean-boundary compaction.
+  it('preserves an assistant turn appended while the summarizer call is in flight', async () => {
+    let ctx!: TestAgentContext;
+    const appendDuringGenerate: GenerateFn = async () => {
+      // Simulate the turn loop completing a step while compaction awaits.
+      ctx.agent.context.appendLoopEvent({
+        type: 'step.begin',
+        uuid: 'race-step',
+        turnId: '',
+        step: 9,
+      });
+      ctx.agent.context.appendLoopEvent({
+        type: 'content.part',
+        uuid: 'race-part',
+        turnId: '',
+        step: 9,
+        stepUuid: 'race-step',
+        part: { type: 'text', text: 'RACE-ASSISTANT-OUTPUT' },
+      });
+      ctx.agent.context.appendLoopEvent({
+        type: 'step.end',
+        uuid: 'race-step',
+        turnId: '',
+        step: 9,
+        finishReason: 'end_turn',
+      });
+      return textResult('Compacted summary.');
+    };
+    ctx = testAgent({ generate: appendDuringGenerate });
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.appendExchange(1, 'user one', 'assistant one', 40);
+
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.cancelled');
+
+    expect(historyTexts(ctx).join('\n')).toContain('RACE-ASSISTANT-OUTPUT');
+  });
+
+  // PROBE #1b — a user-ROLE message that compaction would drop (background-task
+  // notification, hook/cron reminder, shell output) appended mid-summary. It is
+  // neither summarized (added after the snapshot) nor kept (applyCompaction keeps
+  // only real user input), so it would silently vanish; the race guard must cancel
+  // on any tail compaction would drop, not just non-user roles.
+  it('cancels compaction when a droppable user-role tail is appended mid-summary', async () => {
+    let ctx!: TestAgentContext;
+    const appendDuringGenerate: GenerateFn = async () => {
+      ctx.agent.context.appendUserMessage([{ type: 'text', text: 'BG-NOTIFY-OUTPUT' }], {
+        kind: 'background_task',
+        taskId: 't',
+        status: 'completed',
+        notificationId: 'n',
+      });
+      return textResult('Compacted summary.');
+    };
+    ctx = testAgent({ generate: appendDuringGenerate });
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.appendExchange(1, 'user one', 'assistant one', 40);
+
+    await ctx.rpc.beginCompaction({});
+    await Promise.race([ctx.once('compaction.completed'), ctx.once('compaction.cancelled')]);
+
+    // Cancelled, so the notification survives in history rather than being dropped.
+    expect(historyTexts(ctx).join('\n')).toContain('BG-NOTIFY-OUTPUT');
+  });
+
+  // PROBE #2 — empty/truncated summarizer responses drop one oldest message and
+  // retry. A dedicated shrink counter, bounded by MAX_COMPACTION_RETRY_ATTEMPTS,
+  // keeps a model that always returns empty from issuing ~one call per message.
+  it('bounds summarizer calls by the retry limit when the model keeps returning empty', async () => {
+    let calls = 0;
+    // Empty 7 times, then a valid summary. The bounded shrink counter gives up by
+    // ~call 6, so compaction errors out before ever reaching the 8th (valid)
+    // response; an unbounded impl would tolerate all 7 and complete on the 8th.
+    const flakyEmpty: GenerateFn = async () => {
+      calls += 1;
+      return calls <= 7 ? textResult('') : textResult('Compacted summary.');
+    };
+    const ctx = testAgent({ generate: flakyEmpty });
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    for (let i = 1; i <= 5; i++) {
+      ctx.appendExchange(i, `user ${String(i)}`, `assistant ${String(i)}`, 40);
+    }
+
+    await ctx.rpc.beginCompaction({});
+    await Promise.race([ctx.once('compaction.completed'), ctx.once('error')]);
+
+    // A retry budget of MAX_COMPACTION_RETRY_ATTEMPTS(5) should bound calls.
+    expect(calls).toBeLessThanOrEqual(6);
+  });
+
+  // PROBE #3 / CMP-08 — the kept-user budget is a fixed 20k and ignores the
+  // model window, so on a small-window model the post-compaction context can
+  // still exceed the trigger, re-compacting every turn without converging.
+  it.fails('keeps the post-compaction context below the auto-compaction trigger on a small window', async () => {
+    const SMALL_WINDOW = 16_000;
+    const ctx = testAgent();
+    ctx.configure({
+      provider: PROVIDER,
+      modelCapabilities: { ...CAPS, max_context_tokens: SMALL_WINDOW },
+    });
+    // ~7.5k tokens of user text per message (30k ascii chars / 4).
+    for (let i = 1; i <= 3; i++) {
+      ctx.appendExchange(i, 'u'.repeat(30_000), `assistant ${String(i)}`, 40);
+    }
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    // tokenCount after compaction should leave headroom below the 85% trigger,
+    // otherwise the next turn immediately re-compacts and never converges.
+    expect(ctx.agent.context.tokenCount).toBeLessThan(SMALL_WINDOW * 0.85);
+  });
+
+  // PROBE #4 / CMP-01 — compaction started while a tool exchange is still open
+  // (SDK/REST caller mid-tool) clears pendingToolResultIds, so the tool.result
+  // that arrives afterwards is treated as an orphan and silently dropped.
+  it.fails('does not drop a tool result that arrives after a compaction started mid-exchange', async () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.appendUnresolvedToolExchange(0); // assistant with 2 tool calls, no results yet
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    // The tool finishes after compaction; its result must not vanish.
+    ctx.agent.context.appendLoopEvent({
+      type: 'tool.result',
+      parentUuid: 'call_unresolved_one',
+      toolCallId: 'call_unresolved_one',
+      result: { output: 'LATE-TOOL-RESULT' },
+    });
+
+    expect(historyTexts(ctx).join('\n')).toContain('LATE-TOOL-RESULT');
+  });
+
+  // CMP-12 fix — restoring a legacy `context.apply_compaction` record (pre-rework:
+  // no keptUserMessageCount; the old `[summary, ...history.slice(compactedCount)]`
+  // semantics kept a verbatim recent tail). On restore we reproduce that shape so
+  // an upgraded session does not lose its recent assistant/tool tail.
+  it('preserves the verbatim tail when restoring a legacy compaction record', () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    ctx.appendExchange(1, 'summarized user', 'TAIL-ASSISTANT', 40);
+
+    // Goes through the real restore path so `records.restoring` gates the legacy
+    // reconstruction. No keptUserMessageCount + compactedCount < length marks the
+    // pre-rework record that kept history.slice(compactedCount) as a tail.
+    ctx.agent.records.restore({
+      type: 'context.apply_compaction',
+      summary: 'Legacy summary.',
+      compactedCount: 1,
+      tokensBefore: 100,
+      tokensAfter: 50,
+    });
+
+    expect(historyTexts(ctx).join('\n')).toContain('TAIL-ASSISTANT');
+  });
+
+  // PROBE #6 — when the summarizer request overflows, historyForModel is shrunk
+  // to a recent suffix but still projected through MicroCompaction.compact()
+  // with the cutoff computed for the FULL history. The absolute cutoff applied
+  // to the shifted suffix can clear recent tool results the summary needs.
+  it.fails('does not clear recent tool results when projecting a shrunk suffix under an active micro-compaction cutoff', () => {
+    // This defect only exists when micro-compaction is active, so enable the
+    // flag explicitly rather than inheriting the ambient KIMI_CODE_EXPERIMENTAL
+    // master switch — otherwise the probe's pass/fail flips with the runner's
+    // environment (on locally with the master switch, off in CI by default).
+    const ctx = testAgent({
+      experimentalFlags: new FlagResolver(
+        { KIMI_CODE_EXPERIMENTAL_MICRO_COMPACTION: '1' },
+        FLAG_DEFINITIONS,
+      ),
+    });
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+
+    const bigToolOutput = 'TOOL-OUTPUT-CONTENT '.repeat(60); // > minContentTokens(100)
+    const full: ContextMessage[] = [];
+    for (let i = 0; i < 20; i++) {
+      if (i === 15) {
+        full.push({
+          role: 'tool',
+          content: [{ type: 'text', text: bigToolOutput } satisfies ContentPart],
+          toolCalls: [],
+          toolCallId: `tool-${String(i)}`,
+        });
+      } else {
+        full.push({
+          role: i % 2 === 0 ? 'user' : 'assistant',
+          content: [{ type: 'text', text: `m${String(i)}` }],
+          toolCalls: [],
+          origin: i % 2 === 0 ? { kind: 'user' } : undefined,
+        });
+      }
+    }
+
+    // Cutoff computed for the full history: keep the recent 10 (indices >= 10).
+    ctx.agent.microCompaction.apply(10);
+
+    // In the full history the tool result is at index 15 (>= cutoff) -> kept.
+    const projectedFull = ctx.agent.context.project(full);
+    const fullToolText = projectedFull
+      .map((m) => m.content.map((p) => (p.type === 'text' ? p.text : '')).join(''))
+      .join('\n');
+    expect(fullToolText).toContain('TOOL-OUTPUT-CONTENT');
+
+    // After an overflow shrink drops the oldest 10, the SAME tool result sits at
+    // suffix index 5; the unchanged cutoff(10) now covers it. It must still be
+    // preserved (it is a recent result the summary depends on).
+    const shrunkSuffix = full.slice(10);
+    const projectedSuffix = ctx.agent.context.project(shrunkSuffix);
+    const suffixToolText = projectedSuffix
+      .map((m) => m.content.map((p) => (p.type === 'text' ? p.text : '')).join(''))
+      .join('\n');
+    expect(suffixToolText).toContain('TOOL-OUTPUT-CONTENT');
+  });
+
+  // PROBE #7 / CMP-07 — when the oldest kept user message overflows the budget it
+  // is truncated to text only, dropping any image/audio/video it carried: media
+  // can't be partially truncated, and keeping it whole would overshoot the
+  // budget. Recent messages that fit keep their media; only this boundary message
+  // loses its attachments. Documented as an accepted limitation rather than fixed.
+  it.fails('keeps media on the oldest kept user message instead of dropping it on truncation', () => {
+    const ctx = testAgent();
+    ctx.configure({ provider: PROVIDER, modelCapabilities: CAPS });
+    // Oldest user message: an image + long text that will overflow the budget.
+    ctx.agent.context.appendUserMessage(
+      [
+        { type: 'image_url', imageUrl: { url: 'data:image/png;base64,AAAA' } },
+        { type: 'text', text: 'x'.repeat(120_000) }, // ~30k tokens of text
+      ],
+      { kind: 'user' },
+    );
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'recent user' }], { kind: 'user' });
+
+    ctx.agent.context.applyCompaction({
+      summary: 'Summary.',
+      compactedCount: 2,
+      tokensBefore: 100,
+    });
+
+    const keptParts = ctx.agent.context.history.flatMap((message) => message.content);
+    expect(keptParts.some((part) => part.type === 'image_url')).toBe(true);
+  });
+});
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 4113de935..58ea539c8 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -18,10 +18,14 @@ import { afterEach, describe, expect, it, vi } from 'vitest';
 
 import type { KimiConfig } from '../../../src/config';
 import type { AgentOptions } from '../../../src/agent';
-import { DefaultCompactionStrategy, type CompactionStrategy } from '../../../src/agent/compaction';
+import {
+  COMPACTION_SUMMARY_PREFIX,
+  DefaultCompactionStrategy,
+  type CompactionStrategy,
+} from '../../../src/agent/compaction';
 import { FLAG_DEFINITIONS, MASTER_ENV } from '../../../src/flags';
 import { HookEngine, type HookEngineTriggerArgs } from '../../../src/session/hooks';
-import { estimateTokensForMessages } from '../../../src/utils/tokens';
+import { estimateTokens, estimateTokensForMessages } from '../../../src/utils/tokens';
 import { recordingTelemetry, type TelemetryRecord } from '../../fixtures/telemetry';
 import type { TestAgentContext, TestAgentOptions } from '../harness/agent';
 import { testAgent } from '../harness/agent';
@@ -44,138 +48,6 @@ const CATALOGUED_MODEL_CAPABILITIES = {
 const MICRO_COMPACTION_FLAG_ENV = getMicroCompactionFlagEnv();
 
 describe('FullCompaction', () => {
-  it('keeps an oversized trailing user message as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('keeps consecutive trailing user messages as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user one ${'x'.repeat(1_200)}`),
-      textMessage('user', `pending user two ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('compacts the prefix when the trailing exchange itself is oversized', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'recent user'),
-      textMessage('assistant', `recent assistant ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('returns 0 when there is nothing to compact', () => {
-    const strategy = testCompactionStrategy();
-    expect(strategy.computeCompactCount([], 'auto')).toBe(0);
-    expect(strategy.computeCompactCount([textMessage('user', 'only pending')], 'auto')).toBe(0);
-    expect(
-      strategy.computeCompactCount(
-        [
-          textMessage('user', 'a'),
-          textMessage('user', 'b'),
-          textMessage('user', 'c'),
-        ],
-        'auto',
-      ),
-    ).toBe(0);
-  });
-
-  it('returns 0 when no intermediate split exists and the last message is also unsplittable', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'inspect'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [{ type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }],
-      },
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(0);
-  });
-
-  it('does not split inside a parallel tool exchange', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'run both tools'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [
-          { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' },
-          { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' },
-        ],
-      },
-      { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' },
-      { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' },
-      textMessage('user', 'next prompt'),
-    ];
-
-    // The only valid split is before the parallel exchange (after 'old assistant'),
-    // never between tool_a and tool_b — that would leave tool_b as an orphan.
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('reserves response context by default before the ratio threshold is reached', () => {
-    const strategy = new DefaultCompactionStrategy(() => 256_000);
-
-    expect(strategy.shouldCompact(210_000)).toBe(true);
-    expect(strategy.shouldBlock(210_000)).toBe(true);
-  });
-
-  it('backs off overflow compaction by at least five percent of the context window', () => {
-    const strategy = testCompactionStrategy(1_000);
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      ...Array.from({ length: 20 }, () => [
-        textMessage('user', 'continue'),
-        textMessage('assistant', ''),
-      ]).flat(),
-    ];
-
-    const reduced = strategy.reduceCompactOnOverflow(messages);
-    const removed = messages.slice(reduced);
-
-    expect(reduced).toBeGreaterThan(0);
-    expect(estimateTokensForMessages(removed)).toBeGreaterThanOrEqual(50);
-  });
-
-  it('ignores reserved context when the reserve is not smaller than the model window', () => {
-    const strategy = new DefaultCompactionStrategy(() => 32_000, {
-      triggerRatio: 0.85,
-      blockRatio: 0.85,
-      reservedContextSize: 50_000,
-      maxCompactionPerTurn: 3,
-      maxRecentMessages: 3,
-      maxRecentUserMessages: Infinity,
-      maxRecentSizeRatio: 0.2,
-      minOverflowReductionRatio: 0.05,
-    });
-
-    expect(strategy.shouldCompact(1)).toBe(false);
-    expect(strategy.shouldBlock(1)).toBe(false);
-    expect(strategy.shouldCompact(28_000)).toBe(true);
-    expect(strategy.shouldBlock(28_000)).toBe(true);
-  });
-
   it('runs manual compaction and applies the compacted context', async () => {
     const records: TelemetryRecord[] = [];
     const ctx = testAgent({ telemetry: recordingTelemetry(records) });
@@ -204,12 +76,12 @@ describe('FullCompaction', () => {
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "recent user three" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
       [wire] full_compaction.begin      { "source": "manual", "instruction": "Keep the important test facts.", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual", "instruction": "Keep the important test facts." }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Compacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 5, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 5, "maxContextTokens": 256000, "contextUsage": 0.00001953125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 537, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 537, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 537, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction   { "summary": "Compacted summary.", "contextSummary": "The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 100, "keptUserMessageCount": 3, "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 100, "maxContextTokens": 256000, "contextUsage": 0.000390625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 537, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 537, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Compacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 5 } }
+      [emit] compaction.completed       { "result": { "summary": "Compacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 100, "keptUserMessageCount": 3 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -221,13 +93,26 @@ describe('FullCompaction', () => {
         assistant: text "old assistant two"
         user: text "recent user three"
         assistant: text "recent assistant three"
-        user: text <compaction-instruction>
+        user: text "You are about to run out of context. Write a first-person handoff note to\\nyourself so you can seamlessly continue this task after the earlier\\nconversation is cleared.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nWrite the note as your own continuing train of thought — first person, present\\ntense, the way you would reason through the next move. Do not write a\\nthird-party report about someone else's work, and do not impose rigid section\\nheadings; let the shape follow the task.\\n\\nMake the note self-sufficient: the next turn will see only your most recent user\\nmessages and this note — every assistant message, tool call, and tool result\\nabove will be gone. In your own words, preserve what you genuinely need to\\ncontinue:\\n\\n- The latest user request, quoted verbatim, and what it is actually asking for.\\n- The instructions and constraints currently in force (user preferences,\\n  project rules, environment and tooling limits) — condensed to what still\\n  matters.\\n- What has actually been done, at high fidelity: keep the exact commands that\\n  were run, the exact file paths touched, and whether each succeeded or failed.\\n  Keep only the final working version of any code; drop intermediate attempts\\n  and already-resolved errors.\\n- The precise next action — including the exact next command or tool call you\\n  intend to make — and any required format for the final answer.\\n\\nBe honest about uncertainty. If an earlier step claimed something was done but\\nwas never verified (tests \\"passing\\", a fix \\"working\\", a file \\"created\\"), say so\\nplainly and treat it as unverified rather than fact — re-check before relying\\non it.\\n\\nBe concise. Include the critical data, identifiers, and references needed to\\ncontinue, and omit anything that does not change the next move.\\n\\nRespond with text only. Do not call any tools — you already have everything you\\nneed in the conversation history.\\n\\n\\nOptional user instruction:\\nKeep the important test facts."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
         {
-          "role": "assistant",
-          "text": "Compacted summary.",
+          "role": "user",
+          "text": "old user one",
+        },
+        {
+          "role": "user",
+          "text": "old user two",
+        },
+        {
+          "role": "user",
+          "text": "recent user three",
+        },
+        {
+          "role": "user",
+          "text": "The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.
+      Compacted summary.",
         },
       ]
     `);
@@ -236,18 +121,169 @@ describe('FullCompaction', () => {
       properties: expect.objectContaining({
         source: 'manual',
         tokens_before: 39,
-        tokens_after: 5,
+        tokens_after: 100,
         duration_ms: expect.any(Number),
         compacted_count: 6,
         retry_count: 0,
         thinking_effort: 'off',
-        input_tokens: 520,
+        input_tokens: 537,
         output_tokens: 8,
       }),
     });
     await ctx.expectResumeMatches();
   });
 
+  it('emits the raw summary while keeping the prefixed summary in model context', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    const completedEvent = ctx.allEvents.find((entry) => entry.event === 'compaction.completed');
+    expect(completedEvent?.args).toEqual({
+      result: expect.objectContaining({
+        summary: 'Compacted summary.',
+      }),
+    });
+    expect(completedEvent?.args).not.toEqual({
+      result: expect.objectContaining({
+        summary: expect.stringContaining(COMPACTION_SUMMARY_PREFIX),
+      }),
+    });
+    expect(ctx.agent.context.history.at(-1)?.content).toEqual([
+      { type: 'text', text: `${COMPACTION_SUMMARY_PREFIX}\nCompacted summary.` },
+    ]);
+  });
+
+  it('keeps only real user input and re-injects permission reminders after compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'real user one', 'assistant one', 20);
+    ctx.agent.context.appendBashInput('pwd');
+    ctx.agent.context.appendBashOutput('/tmp/repo', '', false);
+    ctx.agent.context.appendLocalCommandStdout('local command output');
+    ctx.agent.context.appendSystemReminder('stale reminder', {
+      kind: 'injection',
+      variant: 'system_reminder',
+    });
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'background task done' }], {
+      kind: 'background_task',
+      taskId: 'task-1',
+      status: 'completed',
+      notificationId: 'notification-1',
+    });
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'real user two' }]);
+    ctx.agent.permission.setMode('auto');
+
+    const permissionReminder = new Promise<void>((resolve) => {
+      const handler = (entry: unknown) => {
+        const record = entry as {
+          event?: string;
+          args?: { message?: { origin?: { kind?: string; variant?: string } } };
+        };
+        const origin = record.args?.message?.origin;
+        if (
+          record.event === 'context.append_message' &&
+          origin?.kind === 'injection' &&
+          origin.variant === 'permission_mode'
+        ) {
+          ctx.emitter.off('context.append_message', handler);
+          resolve();
+        }
+      };
+      ctx.emitter.on('context.append_message', handler);
+    });
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+    await permissionReminder;
+
+    expect(ctx.agent.context.history.map((message) => message.origin?.kind ?? 'user')).toEqual([
+      'user',
+      'user',
+      'compaction_summary',
+      'injection',
+    ]);
+    expect(
+      ctx.agent.context.history.map((message) =>
+        message.origin?.kind === 'injection' ? message.origin.variant : undefined,
+      ),
+    ).toEqual([undefined, undefined, undefined, 'permission_mode']);
+
+    const applyCompaction = [...ctx.allEvents]
+      .toReversed()
+      .find((entry) => entry.type === '[wire]' && entry.event === 'context.apply_compaction');
+    expect(applyCompaction).toBeDefined();
+    const record = applyCompaction?.args as {
+      keptUserMessageCount?: number;
+      tokensAfter?: number;
+      summary?: string;
+      contextSummary?: string;
+    };
+    expect(record.keptUserMessageCount).toBe(2);
+    const expectedContextSummary = `${COMPACTION_SUMMARY_PREFIX}\nCompacted summary.`;
+    expect(record.summary).toBe('Compacted summary.');
+    expect(record.contextSummary).toBe(expectedContextSummary);
+    expect(record.tokensAfter).toBe(
+      estimateTokens(expectedContextSummary) +
+        estimateTokensForMessages(ctx.agent.context.history.slice(0, 2)),
+    );
+  });
+
+  it('refreshes the system prompt after compaction completes', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 40);
+
+    const refreshSpy = vi.spyOn(ctx.agent, 'refreshSystemPrompt');
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    expect(refreshSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('does not reset active tools while refreshing the system prompt after compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.agent.useProfile({
+      name: 'tool-profile',
+      systemPrompt: () => '<profile-prompt>',
+      tools: ['Read', 'Write'],
+    });
+    ctx.agent.tools.setActiveTools(['Read']);
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    const activeTools = ctx.agent.tools
+      .data()
+      .filter((tool) => tool.active)
+      .map((tool) => tool.name)
+      .toSorted();
+    expect(activeTools).toEqual(['Read']);
+  });
+
   it('projects the compacted prefix before sending the summary request', async () => {
     const ctx = testAgent({ compactionStrategy: alwaysCompactOnce });
     ctx.configure({
@@ -385,7 +421,9 @@ describe('FullCompaction', () => {
     expect(authKeys).toEqual(['fresh-token', 'forced-refresh-token', 'fresh-token']);
     expect(tokenCalls).toEqual([undefined, true, undefined]);
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Recovered compacted summary.' },
+      { role: 'user', text: 'old user one' },
+      { role: 'user', text: 'recent user two' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nRecovered compacted summary.` },
     ]);
     await ctx.expectResumeMatches();
   });
@@ -547,20 +585,22 @@ describe('FullCompaction', () => {
     await completed;
 
     expect(attempts).toBe(3);
-    // Each empty summary shrinks the compacted prefix before retrying, so the
-    // recovered summary compacts only the older exchange and leaves the recent
-    // one in history.
+    // Empty summaries are retried without shrinking the history; the recovered
+    // summary replaces the whole history with the real user messages plus the
+    // prefixed summary.
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Recovered compacted summary.' },
+      { role: 'user', text: 'old user one' },
       { role: 'user', text: 'recent user two' },
-      { role: 'assistant', text: 'recent assistant two' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nRecovered compacted summary.` },
     ]);
     expect(
       ctx.allEvents.filter((event) => event.event === 'compaction.completed'),
     ).toEqual([
       expect.objectContaining({
         args: expect.objectContaining({
-          result: expect.objectContaining({ summary: 'Recovered compacted summary.' }),
+          result: expect.objectContaining({
+            summary: expect.stringContaining('Recovered compacted summary.'),
+          }),
         }),
       }),
     ]);
@@ -603,12 +643,12 @@ describe('FullCompaction', () => {
     await completed;
 
     expect(inputs).toHaveLength(2);
-    // The retry compacts a strictly smaller prefix than the first attempt.
+    // The retry sends a strictly smaller input than the first attempt.
     expect(inputs[1]!.length).toBeLessThan(inputs[0]!.length);
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Recovered compacted summary.' },
+      { role: 'user', text: 'old user one' },
       { role: 'user', text: 'recent user two' },
-      { role: 'assistant', text: 'recent assistant two' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nRecovered compacted summary.` },
     ]);
     await ctx.expectResumeMatches();
   });
@@ -640,8 +680,10 @@ describe('FullCompaction', () => {
     await vi.advanceTimersByTimeAsync(60_000);
     await failed;
 
-    // MAX_COMPACTION_RETRY_ATTEMPTS attempts, with prefix reduction between them.
-    expect(inputs).toHaveLength(5);
+    // Each empty/think-only response drops the oldest item and resets the retry
+    // counter; once only one item remains, MAX_COMPACTION_RETRY_ATTEMPTS more
+    // retries run before failing. 3 drops + 5 retries = 8 generate calls.
+    expect(inputs).toHaveLength(8);
     expect(inputs[1]!.length).toBeLessThan(inputs[0]!.length);
     expect(records).toContainEqual({
       event: 'compaction_failed',
@@ -831,7 +873,9 @@ describe('FullCompaction', () => {
     await vi.advanceTimersByTimeAsync(60_000);
     const events = await ctx.untilTurnEnd();
 
-    expect(attempts).toBe(5);
+    // A single-item history cannot be shrunk further, so the truncated response
+    // fails immediately instead of looping through retries.
+    expect(attempts).toBe(1);
     expect(events).toContainEqual(
       expect.objectContaining({
         event: 'turn.ended',
@@ -907,7 +951,7 @@ describe('FullCompaction', () => {
     await ctx.expectResumeMatches();
   });
 
-  it('keeps an unresolved tool exchange out of the compaction prompt', async () => {
+  it('closes an unresolved tool exchange in the compaction prompt with a synthetic result', async () => {
     const ctx = testAgent();
     ctx.configure({
       provider: CATALOGUED_PROVIDER,
@@ -929,13 +973,20 @@ describe('FullCompaction', () => {
       messages:
         user: text "old user one"
         assistant: text "old assistant one"
-        user: text <compaction-instruction>
+        user: text "run both tools"
+        assistant: []  calls call_open_one:LookupOne { "query": "one" }, call_open_two:LookupTwo { "query": "two" }
+        tool[call_open_one]: text "one result"
+        tool[call_open_two]: text "Tool result is not available in the current context. Do not assume the tool completed successfully."
+        user: text "You are about to run out of context. Write a first-person handoff note to\\nyourself so you can seamlessly continue this task after the earlier\\nconversation is cleared.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nWrite the note as your own continuing train of thought — first person, present\\ntense, the way you would reason through the next move. Do not write a\\nthird-party report about someone else's work, and do not impose rigid section\\nheadings; let the shape follow the task.\\n\\nMake the note self-sufficient: the next turn will see only your most recent user\\nmessages and this note — every assistant message, tool call, and tool result\\nabove will be gone. In your own words, preserve what you genuinely need to\\ncontinue:\\n\\n- The latest user request, quoted verbatim, and what it is actually asking for.\\n- The instructions and constraints currently in force (user preferences,\\n  project rules, environment and tooling limits) — condensed to what still\\n  matters.\\n- What has actually been done, at high fidelity: keep the exact commands that\\n  were run, the exact file paths touched, and whether each succeeded or failed.\\n  Keep only the final working version of any code; drop intermediate attempts\\n  and already-resolved errors.\\n- The precise next action — including the exact next command or tool call you\\n  intend to make — and any required format for the final answer.\\n\\nBe honest about uncertainty. If an earlier step claimed something was done but\\nwas never verified (tests \\"passing\\", a fix \\"working\\", a file \\"created\\"), say so\\nplainly and treat it as unverified rather than fact — re-check before relying\\non it.\\n\\nBe concise. Include the critical data, identifiers, and references needed to\\ncontinue, and omit anything that does not change the next move.\\n\\nRespond with text only. Do not call any tools — you already have everything you\\nneed in the conversation history.\\n\\n\\nOptional user instruction:\\nKeep stable facts."
     `);
+    // The unresolved tool call is sent to the model with a synthetic tool_result
+    // closing it (so a strict provider accepts the summary request), while the
+    // whole exchange is still dropped from the replacement history, leaving only
+    // the real user messages followed by the compaction summary.
     expect(ctx.agent.context.history.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
+      'user',
+      'user',
     ]);
     ctx.dispatch({
       type: 'context.append_loop_event',
@@ -947,11 +998,9 @@ describe('FullCompaction', () => {
       },
     });
     expect(ctx.agent.context.history.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
+      'user',
+      'user',
     ]);
     await ctx.expectResumeMatches();
   });
@@ -979,12 +1028,12 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin      { "source": "manual", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual" }
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "new user while compacting" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Compacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 5, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 5, "maxContextTokens": 256000, "contextUsage": 0.00001953125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 508, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 508, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 508, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction   { "summary": "Compacted prefix.", "contextSummary": "The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 103, "keptUserMessageCount": 3, "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 103, "maxContextTokens": 256000, "contextUsage": 0.00040234375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 508, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 508, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Compacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 5 } }
+      [emit] compaction.completed       { "result": { "summary": "Compacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 103, "keptUserMessageCount": 3 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -994,116 +1043,32 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text <compaction-instruction>
+        user: text "You are about to run out of context. Write a first-person handoff note to\\nyourself so you can seamlessly continue this task after the earlier\\nconversation is cleared.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nWrite the note as your own continuing train of thought — first person, present\\ntense, the way you would reason through the next move. Do not write a\\nthird-party report about someone else's work, and do not impose rigid section\\nheadings; let the shape follow the task.\\n\\nMake the note self-sufficient: the next turn will see only your most recent user\\nmessages and this note — every assistant message, tool call, and tool result\\nabove will be gone. In your own words, preserve what you genuinely need to\\ncontinue:\\n\\n- The latest user request, quoted verbatim, and what it is actually asking for.\\n- The instructions and constraints currently in force (user preferences,\\n  project rules, environment and tooling limits) — condensed to what still\\n  matters.\\n- What has actually been done, at high fidelity: keep the exact commands that\\n  were run, the exact file paths touched, and whether each succeeded or failed.\\n  Keep only the final working version of any code; drop intermediate attempts\\n  and already-resolved errors.\\n- The precise next action — including the exact next command or tool call you\\n  intend to make — and any required format for the final answer.\\n\\nBe honest about uncertainty. If an earlier step claimed something was done but\\nwas never verified (tests \\"passing\\", a fix \\"working\\", a file \\"created\\"), say so\\nplainly and treat it as unverified rather than fact — re-check before relying\\non it.\\n\\nBe concise. Include the critical data, identifiers, and references needed to\\ncontinue, and omit anything that does not change the next move.\\n\\nRespond with text only. Do not call any tools — you already have everything you\\nneed in the conversation history."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
         {
-          "role": "assistant",
-          "text": "Compacted prefix.",
+          "role": "user",
+          "text": "old user one",
+        },
+        {
+          "role": "user",
+          "text": "recent user two",
         },
         {
           "role": "user",
           "text": "new user while compacting",
         },
+        {
+          "role": "user",
+          "text": "The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.
+      Compacted prefix.",
+        },
       ]
     `);
     await ctx.expectResumeMatches();
   });
 
-  it('continues a manual compaction run when the first pass still exceeds the trigger', async () => {
-    const ctx = testAgent();
-    ctx.configure({
-      provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: 4_000,
-      },
-    });
-    ctx.appendExchange(
-      1,
-      `old user one ${'u'.repeat(14_000)}`,
-      `old assistant one ${'a'.repeat(14_000)}`,
-      6_000,
-    );
-    const firstSummary = `large manual summary ${'x'.repeat(14_000)}`;
-    let appliedCount = 0;
-    const secondCompacted = new Promise<void>((resolve) => {
-      const handler = () => {
-        appliedCount += 1;
-        if (appliedCount === 2) {
-          ctx.emitter.off('context.apply_compaction', handler);
-          resolve();
-        }
-      };
-      ctx.emitter.on('context.apply_compaction', handler);
-    });
-
-    ctx.mockNextResponse({ type: 'text', text: firstSummary });
-    ctx.mockNextResponse({ type: 'text', text: 'Second manual summary.' });
-    const completed = ctx.once('compaction.completed');
-    await ctx.rpc.beginCompaction({});
-    ctx.appendExchange(2, 'new user while compacting', 'new assistant while compacting', 6_000);
-    await secondCompacted;
-    await completed;
-
-    const events = ctx.newEvents();
-    expect(countEvents(events, 'context.apply_compaction')).toBe(2);
-    expect(countEvents(events, 'compaction.started')).toBe(1);
-    expect(countEvents(events, 'compaction.completed')).toBe(1);
-    expect(ctx.llmCalls).toHaveLength(2);
-    const [firstCompactionCall, secondCompactionCall] = ctx.llmCalls;
-    expect(firstCompactionCall?.history.map(messageText)).not.toContain('new user while compacting');
-    expect(secondCompactionCall?.history.map(messageText)).toContain(firstSummary);
-    expect(secondCompactionCall?.history.map(messageText)).toContain('new user while compacting');
-    expect(secondCompactionCall?.history.map(messageText)).toContain('new assistant while compacting');
-    expect(ctx.compactHistory()).toEqual([
-      {
-        role: 'assistant',
-        text: 'Second manual summary.',
-      },
-    ]);
-    await ctx.expectResumeMatches();
-  });
-
-  it('auto-compacts very large context in window-sized rounds', async () => {
-    const maxContextTokens = 4_000;
-    const ctx = testAgent();
-    ctx.configure({
-      provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: maxContextTokens,
-      },
-    });
-    for (let i = 1; i <= 22; i++) {
-      ctx.appendAssistantTextWithUsage(
-        i,
-        `history chunk ${String(i)} ${'x'.repeat(7_200)}`,
-        i * 1_850,
-      );
-    }
-    const initialTokens = estimateTokensForMessages(ctx.agent.context.history);
-    const completed = ctx.once('compaction.completed');
-    for (let i = 1; i <= 30; i++) {
-      ctx.mockNextResponse({ type: 'text', text: `Auto summary ${String(i)}.` });
-    }
-
-    ctx.agent.fullCompaction.begin({ source: 'auto', instruction: undefined });
-    await completed;
-
-    const events = ctx.newEvents();
-    const compactedPrefixSizes = ctx.llmCalls.map((call) =>
-      estimateTokensForMessages(call.history.slice(0, -1)),
-    );
-    expect(initialTokens).toBeGreaterThan(maxContextTokens * 9);
-    expect(countEvents(events, 'context.apply_compaction')).toBeGreaterThan(1);
-    expect(countEvents(events, 'compaction.completed')).toBe(1);
-    expect(compactedPrefixSizes.length).toBeGreaterThan(1);
-    expect(compactedPrefixSizes.every((size) => size <= maxContextTokens)).toBe(true);
-    expect(ctx.agent.context.tokenCount).toBeLessThan(maxContextTokens * 0.85);
-    await ctx.expectResumeMatches();
-  });
 
   it('cancels when the compacted prefix changes before completion', async () => {
     const ctx = testAgent();
@@ -1127,8 +1092,8 @@ describe('FullCompaction', () => {
       [emit] compaction.started       { "trigger": "manual" }
       [wire] context.clear            { "time": "<time>" }
       [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual" }
-      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 499, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 508, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 508, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 508, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.cancel   { "time": "<time>" }
       [emit] compaction.cancelled     {}
     `);
@@ -1140,7 +1105,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text <compaction-instruction>
+        user: text "You are about to run out of context. Write a first-person handoff note to\\nyourself so you can seamlessly continue this task after the earlier\\nconversation is cleared.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nWrite the note as your own continuing train of thought — first person, present\\ntense, the way you would reason through the next move. Do not write a\\nthird-party report about someone else's work, and do not impose rigid section\\nheadings; let the shape follow the task.\\n\\nMake the note self-sufficient: the next turn will see only your most recent user\\nmessages and this note — every assistant message, tool call, and tool result\\nabove will be gone. In your own words, preserve what you genuinely need to\\ncontinue:\\n\\n- The latest user request, quoted verbatim, and what it is actually asking for.\\n- The instructions and constraints currently in force (user preferences,\\n  project rules, environment and tooling limits) — condensed to what still\\n  matters.\\n- What has actually been done, at high fidelity: keep the exact commands that\\n  were run, the exact file paths touched, and whether each succeeded or failed.\\n  Keep only the final working version of any code; drop intermediate attempts\\n  and already-resolved errors.\\n- The precise next action — including the exact next command or tool call you\\n  intend to make — and any required format for the final answer.\\n\\nBe honest about uncertainty. If an earlier step claimed something was done but\\nwas never verified (tests \\"passing\\", a fix \\"working\\", a file \\"created\\"), say so\\nplainly and treat it as unverified rather than fact — re-check before relying\\non it.\\n\\nBe concise. Include the critical data, identifiers, and references needed to\\ncontinue, and omit anything that does not change the next move.\\n\\nRespond with text only. Do not call any tools — you already have everything you\\nneed in the conversation history."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`[]`);
     await ctx.expectResumeMatches();
@@ -1171,20 +1136,20 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "Auto compacted summary.", "compactedCount": 4, "tokensBefore": 46, "tokensAfter": 28, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 28, "maxContextTokens": 256000, "contextUsage": 0.000109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 529, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 529, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 529, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction    { "summary": "Auto compacted summary.", "contextSummary": "The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 108, "keptUserMessageCount": 4, "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 108, "maxContextTokens": 256000, "contextUsage": 0.000421875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 529, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 529, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "Auto compacted summary.", "compactedCount": 4, "tokensBefore": 46, "tokensAfter": 28 } }
+      [emit] compaction.completed        { "result": { "summary": "Auto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 108, "keptUserMessageCount": 4 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I can answer after compaction." }
       [wire] context.append_loop_event   { "event": { "type": "content.part", "uuid": "<uuid-2>", "turnId": "0", "step": 1, "stepUuid": "<uuid-1>", "part": { "type": "text", "text": "I can answer after compaction." } }, "time": "<time>" }
-      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }, "time": "<time>" }
-      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }
-      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 42, "maxContextTokens": 256000, "contextUsage": 0.0001640625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 529, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 529, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 107, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }, "time": "<time>" }
+      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 107, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }
+      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 107, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 118, "maxContextTokens": 256000, "contextUsage": 0.0004609375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 636, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 636, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 107, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.ended                  { "turnId": 0, "reason": "completed" }
     `);
     expect(ctx.llmInputs()).toMatchInlineSnapshot(`
@@ -1196,22 +1161,23 @@ describe('FullCompaction', () => {
           assistant: text "old assistant one"
           user: text "old user two"
           assistant: text "old assistant two"
-          user: text <compaction-instruction>
-
-      call 2:
-        messages:
-          assistant: text "Auto compacted summary."
           user: text "recent user three"
           assistant: text "recent assistant three"
           user: text "Answer after compacting"
+          user: text "You are about to run out of context. Write a first-person handoff note to\\nyourself so you can seamlessly continue this task after the earlier\\nconversation is cleared.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nWrite the note as your own continuing train of thought — first person, present\\ntense, the way you would reason through the next move. Do not write a\\nthird-party report about someone else's work, and do not impose rigid section\\nheadings; let the shape follow the task.\\n\\nMake the note self-sufficient: the next turn will see only your most recent user\\nmessages and this note — every assistant message, tool call, and tool result\\nabove will be gone. In your own words, preserve what you genuinely need to\\ncontinue:\\n\\n- The latest user request, quoted verbatim, and what it is actually asking for.\\n- The instructions and constraints currently in force (user preferences,\\n  project rules, environment and tooling limits) — condensed to what still\\n  matters.\\n- What has actually been done, at high fidelity: keep the exact commands that\\n  were run, the exact file paths touched, and whether each succeeded or failed.\\n  Keep only the final working version of any code; drop intermediate attempts\\n  and already-resolved errors.\\n- The precise next action — including the exact next command or tool call you\\n  intend to make — and any required format for the final answer.\\n\\nBe honest about uncertainty. If an earlier step claimed something was done but\\nwas never verified (tests \\"passing\\", a fix \\"working\\", a file \\"created\\"), say so\\nplainly and treat it as unverified rather than fact — re-check before relying\\non it.\\n\\nBe concise. Include the critical data, identifiers, and references needed to\\ncontinue, and omit anything that does not change the next move.\\n\\nRespond with text only. Do not call any tools — you already have everything you\\nneed in the conversation history."
+
+      call 2:
+        messages:
+          user: text "old user one\\n\\nold user two\\n\\nrecent user three\\n\\nAnswer after compacting"
+          user: text "The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.\\nAuto compacted summary."
     `);
     expect(records).toContainEqual({
       event: 'compaction_finished',
       properties: expect.objectContaining({
         source: 'auto',
         tokens_before: 46,
-        tokens_after: 28,
-        compacted_count: 4,
+        tokens_after: 108,
+        compacted_count: 7,
         retry_count: 0,
       }),
     });
@@ -1244,15 +1210,18 @@ describe('FullCompaction', () => {
     await ctx.rpc.beginCompaction({});
     await compacted;
 
-    // Compaction preserves the in-flight tool exchange in recent; the deferred
-    // reminder still cannot land because the tool exchange is still open.
+    // Compaction drops the in-flight tool exchange and the deferred reminder
+    // (initial context is rebuilt every turn); only real user messages and
+    // the compaction summary remain.
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
+      'user',
+      'user',
     ]);
+    expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
-    // Closing the exchange flushes the deferred reminder to history.
+    // The dropped tool calls no longer exist, so late tool results are orphans
+    // and do not change history.
     ctx.dispatch({
       type: 'context.append_loop_event',
       event: {
@@ -1273,15 +1242,9 @@ describe('FullCompaction', () => {
     });
 
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
       'user',
-    ]);
-    expect(ctx.agent.context.history.at(-1)?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nhost note\n</system-reminder>' },
+      'user',
     ]);
   });
 
@@ -1312,13 +1275,18 @@ describe('FullCompaction', () => {
     await ctx.rpc.beginCompaction({});
     await compacted;
 
+    // Compaction drops the partially-resolved tool exchange and the deferred
+    // reminder (initial context is rebuilt every turn); only real user
+    // messages and the compaction summary remain.
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
+      'user',
+      'user',
     ]);
+    expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
+    // The dropped tool calls no longer exist, so a late tool result is an orphan
+    // and does not change history.
     ctx.dispatch({
       type: 'context.append_loop_event',
       event: {
@@ -1330,77 +1298,134 @@ describe('FullCompaction', () => {
     });
 
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
       'user',
-    ]);
-    expect(ctx.agent.context.history.at(-1)?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nhost note\n</system-reminder>' },
+      'user',
     ]);
   });
 
-  it('fails the turn with compaction.unable when auto compaction has no compactable prefix', async () => {
+  it('rejects manual compaction with compaction.unable when history is empty', async () => {
     const ctx = testAgent();
     ctx.configure({
       provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: 2_000,
-      },
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
     });
-    const oversizedPrompt = `initial-pending-verbatim:${'x'.repeat(8_000)}`;
-
-    await ctx.rpc.prompt({ input: [{ type: 'text', text: oversizedPrompt }] });
-    const events = await ctx.untilTurnEnd();
 
-    expect(eventIndex(events, 'compaction.started')).toBe(-1);
+    await expect(ctx.rpc.beginCompaction({})).rejects.toMatchObject({
+      code: 'compaction.unable',
+    });
     expect(ctx.llmCalls).toHaveLength(0);
-    expect(events).toContainEqual(
-      expect.objectContaining({
-        event: 'turn.ended',
-        args: expect.objectContaining({
-          reason: 'failed',
-          error: expect.objectContaining({ code: 'compaction.unable' }),
-        }),
-      }),
-    );
-    await ctx.expectResumeMatches();
   });
 
-  it('rejects manual compaction with compaction.unable when no prefix is compactable', async () => {
+  it('compacts a single user message and keeps it ahead of the summary', async () => {
     const ctx = testAgent();
     ctx.configure({
       provider: CATALOGUED_PROVIDER,
       modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
     });
     ctx.agent.context.appendUserMessage([{ type: 'text', text: 'only pending user' }]);
-
-    await expect(ctx.rpc.beginCompaction({})).rejects.toMatchObject({
-      code: 'compaction.unable',
-    });
-    expect(ctx.llmCalls).toHaveLength(0);
-
-    ctx.agent.context.clear();
-    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
-    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 80);
     const compacted = ctx.once('context.apply_compaction');
     const completed = ctx.once('compaction.completed');
 
-    ctx.mockNextResponse({ type: 'text', text: 'Compacted after no-op cancel.' });
+    ctx.mockNextResponse({ type: 'text', text: 'Single message summary.' });
     await ctx.rpc.beginCompaction({});
     await compacted;
     await completed;
 
     expect(ctx.llmCalls).toHaveLength(1);
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Compacted after no-op cancel.' },
+      { role: 'user', text: 'only pending user' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nSingle message summary.` },
     ]);
     await ctx.expectResumeMatches();
   });
 
+  it('reinjects the plan-mode reminder after manual compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    await ctx.agent.planMode.enter('compact-plan', false);
+    const planFilePath = ctx.agent.planMode.planFilePath;
+    if (planFilePath === null) throw new Error('plan file path missing');
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'draft the plan' }]);
+    await ctx.agent.injection.inject();
+    expect(ctx.compactHistory().at(-1)?.text).toContain(`Plan file: ${planFilePath}`);
+    const completed = ctx.once('compaction.completed');
+
+    ctx.mockNextResponse({ type: 'text', text: 'Plan-mode compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await completed;
+
+    await vi.waitFor(() => {
+      const planReminders = ctx.agent.context.history.filter(
+        (message) => message.origin?.kind === 'injection' && message.origin.variant === 'plan_mode',
+      );
+      expect(planReminders).toHaveLength(1);
+      expect(messageText(planReminders[0])).toContain(`Plan file: ${planFilePath}`);
+    });
+    expect(ctx.compactHistory().at(-1)?.text).toContain(`Plan file: ${planFilePath}`);
+    await ctx.expectResumeMatches();
+  });
+
+  it('includes the plan-mode reminder in the answer request after auto compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    await ctx.agent.planMode.enter('auto-compact-plan', false);
+    const planFilePath = ctx.agent.planMode.planFilePath;
+    if (planFilePath === null) throw new Error('plan file path missing');
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 100);
+    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 950_000);
+    await ctx.agent.injection.inject();
+
+    ctx.mockNextResponse({ type: 'text', text: 'Auto plan compacted summary.' });
+    ctx.mockNextResponse({ type: 'text', text: 'I can answer with the plan path.' });
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Continue the plan' }] });
+    await ctx.untilTurnEnd();
+
+    expect(ctx.llmCalls).toHaveLength(2);
+    const answerTexts = ctx.llmCalls[1]?.history.map(messageText) ?? [];
+    expect(answerTexts.some((text) => text.includes(`Plan file: ${planFilePath}`))).toBe(true);
+    await ctx.expectResumeMatches();
+  });
+
+  it('reinjects reminders before a turn deferred during manual compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    await ctx.agent.planMode.enter('deferred-plan', false);
+    const planFilePath = ctx.agent.planMode.planFilePath;
+    if (planFilePath === null) throw new Error('plan file path missing');
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 100);
+    await ctx.agent.injection.inject();
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' }); // summarizer
+    ctx.mockNextResponse({ type: 'text', text: 'answer for the deferred turn' }); // deferred turn
+
+    // A prompt arriving mid-compaction is deferred, then replayed once compaction
+    // finishes. It must run AFTER reinjection, so its request carries the plan-mode
+    // reminder — the post-compaction state is resurfaced on the very first turn.
+    void ctx.rpc.beginCompaction({});
+    expect(ctx.agent.fullCompaction.isCompacting).toBe(true);
+    const turnId = ctx.agent.turn.prompt([{ type: 'text', text: 'Continue the plan' }]);
+    expect(turnId).toBeNull();
+
+    await ctx.once('compaction.completed');
+    await ctx.agent.turn.waitForCurrentTurn();
+
+    // Two generate calls: the summarizer, then the deferred turn — proving the
+    // deferred prompt ran (not stuck) and saw the reinjected reminder.
+    expect(ctx.llmCalls).toHaveLength(2);
+    const answerTexts = ctx.llmCalls[1]?.history.map(messageText) ?? [];
+    expect(answerTexts.some((text) => text.includes(`Plan file: ${planFilePath}`))).toBe(true);
+  });
+
   it('does not auto compact small contexts when reserved size exceeds the model window', async () => {
     const ctx = testAgent({
       initialConfig: {
@@ -1451,8 +1476,10 @@ describe('FullCompaction', () => {
 
     expect(ctx.llmCalls).toHaveLength(2);
     const [compactionCall, answerCall] = ctx.llmCalls;
-    expect(messageText(compactionCall?.history.at(-1))).toContain('<!-- Compression Priorities');
-    expect(answerCall?.history.map(messageText)).toContain('Reserved compacted summary.');
+    expect(messageText(compactionCall?.history.at(-1))).toContain('first-person handoff note');
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('Reserved compacted summary.')),
+    ).toBe(true);
     await ctx.expectResumeMatches();
   });
 
@@ -1476,10 +1503,21 @@ describe('FullCompaction', () => {
     expect(ctx.llmCalls).toHaveLength(2);
     const [compactionCall, answerCall] = ctx.llmCalls;
     const compactionTexts = compactionCall?.history.map(messageText) ?? [];
-    expect(compactionTexts.some((text) => text.includes('keep-this-pending-verbatim'))).toBe(false);
-    expect(compactionCall?.history.map((message) => message.role)).toEqual(['user', 'assistant', 'user']);
-    expect(answerCall?.history.map(messageText)).toContain('Oversized prompt summary.');
-    expect(messageText(answerCall?.history.at(-1))).toBe(oversizedPrompt);
+    // The whole history is compacted, so the pending prompt is included in the
+    // compaction input and kept verbatim in the post-compaction replacement.
+    expect(compactionTexts.some((text) => text.includes('keep-this-pending-verbatim'))).toBe(true);
+    expect(compactionCall?.history.map((message) => message.role)).toEqual([
+      'user',
+      'assistant',
+      'user',
+      'user',
+    ]);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('Oversized prompt summary.')),
+    ).toBe(true);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('keep-this-pending-verbatim')),
+    ).toBe(true);
     await ctx.expectResumeMatches();
   });
 
@@ -1492,6 +1530,8 @@ describe('FullCompaction', () => {
         max_context_tokens: 1_000_000,
       },
     });
+    // The auto-compact ratio is 0.85, so the context alone (840k) sits below
+    // the 850k threshold and the pending prompt pushes it over.
     ctx.appendExchange(1, 'old user one', 'old assistant one', 840_000);
     const pendingPrompt = `ratio-pending-verbatim:${'x'.repeat(60_000)}`;
 
@@ -1503,10 +1543,21 @@ describe('FullCompaction', () => {
     expect(ctx.llmCalls).toHaveLength(2);
     const [compactionCall, answerCall] = ctx.llmCalls;
     const compactionTexts = compactionCall?.history.map(messageText) ?? [];
-    expect(compactionTexts.some((text) => text.includes('ratio-pending-verbatim'))).toBe(false);
-    expect(compactionCall?.history.map((message) => message.role)).toEqual(['user', 'assistant', 'user']);
-    expect(answerCall?.history.map(messageText)).toContain('Ratio compacted summary.');
-    expect(messageText(answerCall?.history.at(-1))).toBe(pendingPrompt);
+    // The whole history is compacted, so the pending prompt is included in the
+    // compaction input and kept verbatim in the post-compaction replacement.
+    expect(compactionTexts.some((text) => text.includes('ratio-pending-verbatim'))).toBe(true);
+    expect(compactionCall?.history.map((message) => message.role)).toEqual([
+      'user',
+      'assistant',
+      'user',
+      'user',
+    ]);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('Ratio compacted summary.')),
+    ).toBe(true);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('ratio-pending-verbatim')),
+    ).toBe(true);
 
     await ctx.expectResumeMatches();
   });
@@ -1554,8 +1605,8 @@ describe('FullCompaction', () => {
       expect.objectContaining({
         event: 'context.apply_compaction',
         args: expect.objectContaining({
-          summary: 'Overflow compacted summary.',
-          compactedCount: 2,
+          summary: expect.stringContaining('Overflow compacted summary.'),
+          compactedCount: 4,
         }),
       }),
     );
@@ -1575,47 +1626,137 @@ describe('FullCompaction', () => {
         [
           "user: old user one",
           "assistant: old assistant one",
+          "user: Retry after provider overflow",
           "user: <compaction-instruction>",
         ],
         [
-          "assistant: Overflow compacted summary.",
-          "user: Retry after provider overflow",
+          "user: old user one
+
+      Retry after provider overflow",
+          "user: The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.
+      Overflow compacted summary.",
         ],
       ]
     `);
     await ctx.expectResumeMatches();
   });
 
-  it('uses observed max from overflow to size compaction input', async () => {
-    const ctx = testAgent();
+  it('stops repeated provider-overflow compactions when the compacted context still overflows', async () => {
+    let callCount = 0;
+    const generate: GenerateFn = async (_provider, _system, _tools, history) => {
+      callCount += 1;
+      if (messageText(history.at(-1)).includes('first-person handoff note')) {
+        return textResult(`Still too large summary ${String(callCount)}.`);
+      }
+      throw new APIContextOverflowError(400, 'Context length exceeded', `req-overflow-${String(callCount)}`);
+    };
+    const ctx = testAgent({ generate });
     ctx.configure({
       provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: 1_000_000,
-      },
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry until overflow guard' }] });
+    const events = await ctx.untilTurnEnd();
+
+    expect(countEvents(events, 'compaction.started')).toBe(3);
+    expect(callCount).toBe(7);
+    expect(events).toContainEqual(
+      expect.objectContaining({
+        event: 'turn.ended',
+        args: expect.objectContaining({
+          reason: 'failed',
+          error: expect.objectContaining({
+            code: 'context.overflow',
+            message: 'Compaction failed to bring the context under the model window after 3 attempts.',
+          }),
+        }),
+      }),
+    );
+  });
+
+  it('does not leave an orphan tool result at the start when reducing overflowing compaction input', async () => {
+    const inputs: string[][] = [];
+    const generate: GenerateFn = async (_provider, _system, _tools, history) => {
+      inputs.push(inputHistorySnapshot(history));
+      if (inputs.length === 1) {
+        throw new APIContextOverflowError(400, 'Context length exceeded', 'req-compact-overflow');
+      }
+      return textResult('Reduced tool history summary.');
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendToolExchange();
+    let applyRecord: { compactedCount?: number; droppedCount?: number } | undefined;
+    ctx.emitter.on('context.apply_compaction', (entry) => {
+      applyRecord = (entry as { args: { compactedCount?: number; droppedCount?: number } }).args;
+    });
+    const compacted = ctx.once('context.apply_compaction');
+    const completed = ctx.once('compaction.completed');
+
+    await ctx.rpc.beginCompaction({});
+    await compacted;
+    await completed;
+
+    expect(inputs).toHaveLength(2);
+    const reducedHistory = inputs[1]!.slice(0, -1);
+    expect(reducedHistory[0]?.split(':', 1)[0]).not.toBe('tool');
+    // The whole 3-message history was folded (compactedCount), and all 3 were
+    // trimmed from the summarizer input on overflow (droppedCount), so the
+    // record honestly reports that the summary covers none of them.
+    expect(applyRecord?.compactedCount).toBe(3);
+    expect(applyRecord?.droppedCount).toBe(3);
+    await ctx.expectResumeMatches();
+  });
+
+  it('shrinks overflowing compaction input aggressively instead of one message at a time', async () => {
+    const inputs: string[][] = [];
+    let applyRecord: { compactedCount?: number; droppedCount?: number } | undefined;
+    const generate: GenerateFn = async (_provider, _system, _tools, history) => {
+      inputs.push(inputHistorySnapshot(history));
+      const compactedHistory = history.slice(0, -1);
+      if (compactedHistory.length > 20) {
+        throw new APIContextOverflowError(
+          400,
+          'Context length exceeded',
+          `req-long-compact-${String(inputs.length)}`,
+        );
+      }
+      return textResult('Aggressively reduced summary.');
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
     });
-    for (let i = 0; i < 20; i++) {
+    for (let i = 0; i < 30; i++) {
       ctx.appendExchange(
-        i + 1,
-        `old user ${String(i)}`,
-        `old assistant ${String(i)} ${'x'.repeat(40_000)}`,
-        20_000,
+        i,
+        `old user ${String(i)} ${'u'.repeat(400)}`,
+        `old assistant ${String(i)} ${'a'.repeat(400)}`,
+        10,
       );
     }
-    ctx.agent.fullCompaction.observeContextOverflow(200_000);
+    ctx.emitter.on('context.apply_compaction', (entry) => {
+      applyRecord = (entry as { args: { compactedCount?: number; droppedCount?: number } }).args;
+    });
     const compacted = ctx.once('context.apply_compaction');
     const completed = ctx.once('compaction.completed');
 
-    ctx.mockNextResponse({ type: 'text', text: 'Observed max summary.' });
     await ctx.rpc.beginCompaction({});
     await compacted;
     await completed;
 
-    expect(ctx.agent.fullCompaction.getEffectiveMaxContextTokens()).toBe(170_000);
-    const compactionTokens = estimateTokensForMessages(ctx.llmCalls[0]?.history ?? []);
-    expect(compactionTokens).toBeLessThan(200_000);
-    expect(ctx.compactHistory()[0]).toEqual({ role: 'assistant', text: 'Observed max summary.' });
+    expect(inputs[0]?.length).toBeGreaterThan(50);
+    expect(inputs.length).toBeLessThanOrEqual(4);
+    const finalCompactedHistory = inputs.at(-1)!.slice(0, -1);
+    expect(finalCompactedHistory[0]?.split(':', 1)[0]).not.toBe('tool');
+    expect(applyRecord?.compactedCount).toBe(60);
+    expect(applyRecord?.droppedCount).toBeGreaterThan(0);
     await ctx.expectResumeMatches();
   });
 
@@ -1799,8 +1940,8 @@ describe('FullCompaction', () => {
       expect.objectContaining({
         event: 'context.apply_compaction',
         args: expect.objectContaining({
-          summary: 'Unknown window compacted summary.',
-          compactedCount: 2,
+          summary: expect.stringContaining('Unknown window compacted summary.'),
+          compactedCount: 4,
         }),
       }),
     );
@@ -2005,8 +2146,8 @@ describe('FullCompaction', () => {
       expect.objectContaining({
         event: 'context.apply_compaction',
         args: expect.objectContaining({
-          summary: 'Placeholder compacted summary.',
-          compactedCount: 2,
+          summary: expect.stringContaining('Placeholder compacted summary.'),
+          compactedCount: 4,
         }),
       }),
     );
@@ -2033,12 +2174,12 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "First compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 6, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 6, "maxContextTokens": 1000000, "contextUsage": 0.000006, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 491, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 491, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 491, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction    { "summary": "First compacted summary.", "contextSummary": "The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 96, "keptUserMessageCount": 1, "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 96, "maxContextTokens": 1000000, "contextUsage": 0.000096, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 491, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 491, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "First compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 6 } }
+      [emit] compaction.completed        { "result": { "summary": "First compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 96, "keptUserMessageCount": 1 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I need a tool." }
@@ -2048,10 +2189,10 @@ describe('FullCompaction', () => {
       [emit] tool.call.started           { "turnId": 0, "toolCallId": "call_missing", "name": "MissingTool", "args": {} }
       [wire] context.append_loop_event   { "event": { "type": "tool.result", "parentUuid": "call_missing", "toolCallId": "call_missing", "result": { "output": "Tool \\"MissingTool\\" not found", "isError": true } }, "time": "<time>" }
       [emit] tool.result                 { "turnId": 0, "toolCallId": "call_missing", "output": "Tool \\"MissingTool\\" not found", "isError": true }
-      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }, "time": "<time>" }
-      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }
-      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 20, "maxContextTokens": 1000000, "contextUsage": 0.00002, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 491, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 491, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 97, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }, "time": "<time>" }
+      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 97, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }
+      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 97, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 108, "maxContextTokens": 1000000, "contextUsage": 0.000108, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 588, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 588, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 97, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.step.interrupted       { "turnId": 0, "step": 2, "reason": "error", "message": "Compaction limit exceeded (1)" }
       [emit] turn.ended                  { "turnId": 0, "reason": "failed", "error": { "code": "context.overflow", "message": "Compaction limit exceeded (1)", "name": "KimiError", "details": { "maxCompactions": 1, "turnId": 0 }, "retryable": true } }
     `);
@@ -2064,49 +2205,16 @@ describe('FullCompaction', () => {
         tools: []
         messages:
           user: text "Trigger repeated compaction"
-          user: text <compaction-instruction>
+          user: text "You are about to run out of context. Write a first-person handoff note to\\nyourself so you can seamlessly continue this task after the earlier\\nconversation is cleared.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nWrite the note as your own continuing train of thought — first person, present\\ntense, the way you would reason through the next move. Do not write a\\nthird-party report about someone else's work, and do not impose rigid section\\nheadings; let the shape follow the task.\\n\\nMake the note self-sufficient: the next turn will see only your most recent user\\nmessages and this note — every assistant message, tool call, and tool result\\nabove will be gone. In your own words, preserve what you genuinely need to\\ncontinue:\\n\\n- The latest user request, quoted verbatim, and what it is actually asking for.\\n- The instructions and constraints currently in force (user preferences,\\n  project rules, environment and tooling limits) — condensed to what still\\n  matters.\\n- What has actually been done, at high fidelity: keep the exact commands that\\n  were run, the exact file paths touched, and whether each succeeded or failed.\\n  Keep only the final working version of any code; drop intermediate attempts\\n  and already-resolved errors.\\n- The precise next action — including the exact next command or tool call you\\n  intend to make — and any required format for the final answer.\\n\\nBe honest about uncertainty. If an earlier step claimed something was done but\\nwas never verified (tests \\"passing\\", a fix \\"working\\", a file \\"created\\"), say so\\nplainly and treat it as unverified rather than fact — re-check before relying\\non it.\\n\\nBe concise. Include the critical data, identifiers, and references needed to\\ncontinue, and omit anything that does not change the next move.\\n\\nRespond with text only. Do not call any tools — you already have everything you\\nneed in the conversation history."
 
       call 2:
         messages:
-          assistant: text "First compacted summary."
+          user: text "Trigger repeated compaction"
+          user: text "The conversation so far has been compacted to free up context. What follows is your own working summary of this task — use it to continue your train of thought rather than starting over. Treat it as notes, not proof: where it says a step was done, tests passed, or a fix worked, verify that yourself before relying on it.\\nFirst compacted summary."
     `);
     await ctx.expectResumeMatches();
   });
 
-  it('appends the todo list to the compaction summary', async () => {
-    const ctx = testAgent();
-    ctx.configure({
-      provider: CATALOGUED_PROVIDER,
-      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
-    });
-    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
-    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 80);
-
-    ctx.agent.tools.updateStore('todo', [
-      { title: 'Fix the auth bug', status: 'in_progress' },
-      { title: 'Add tests', status: 'pending' },
-    ]);
-
-    const compacted = new Promise<void>((resolve) => {
-      ctx.emitter.once('context.apply_compaction', () => {
-        resolve();
-      });
-    });
-    const completed = ctx.once('compaction.completed');
-
-    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
-    await ctx.rpc.beginCompaction({});
-    await compacted;
-    await completed;
-
-    const history = ctx.compactHistory();
-    expect(history).toHaveLength(1);
-    expect(history[0]).toMatchObject({
-      role: 'assistant',
-      text: 'Compacted summary.\n\n## TODO List\n  [in_progress] Fix the auth bug\n  [pending] Add tests',
-    });
-    await ctx.expectResumeMatches();
-  });
 });
 
 afterEach(() => {
@@ -2249,10 +2357,9 @@ function realKosongGenerate(
 const alwaysCompactOnce: CompactionStrategy = {
   shouldCompact: () => true,
   shouldBlock: () => true,
-  computeCompactCount: (messages: readonly Message[]) => messages.length,
-  reduceCompactOnOverflow: (messages: readonly Message[]) => messages.length,
   checkAfterStep: true,
   maxCompactionPerTurn: 1,
+  maxOverflowCompactionAttempts: 3,
 };
 
 function missingToolCall(): ToolCall {
@@ -2264,29 +2371,13 @@ function missingToolCall(): ToolCall {
   };
 }
 
-function testCompactionStrategy(maxSize: number = 1_000): DefaultCompactionStrategy {
-  return new DefaultCompactionStrategy(() => maxSize, {
-    triggerRatio: 0.85,
-    blockRatio: 0.85,
-    reservedContextSize: 0,
-    maxCompactionPerTurn: 3,
-    maxRecentMessages: 10,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
-  });
-}
-
 function overflowOnlyCompactionStrategy(maxSize: number = 14): DefaultCompactionStrategy {
   return new DefaultCompactionStrategy(() => maxSize, {
     triggerRatio: Infinity,
     blockRatio: Infinity,
     reservedContextSize: 0,
     maxCompactionPerTurn: 3,
-    maxRecentMessages: 3,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
+    maxOverflowCompactionAttempts: 3,
   });
 }
 
@@ -2336,5 +2427,5 @@ function inputHistorySnapshot(history: readonly Message[]): string[] {
 }
 
 function normalizeInputText(text: string): string {
-  return text.includes('compact this conversation context') ? '<compaction-instruction>' : text;
+  return text.includes('first-person handoff note') ? '<compaction-instruction>' : text;
 }
diff --git a/packages/agent-core/test/agent/compaction/handoff.test.ts b/packages/agent-core/test/agent/compaction/handoff.test.ts
new file mode 100644
index 000000000..26e0f4956
--- /dev/null
+++ b/packages/agent-core/test/agent/compaction/handoff.test.ts
@@ -0,0 +1,258 @@
+import type { Message } from '@moonshot-ai/kosong';
+import { describe, expect, it } from 'vitest';
+
+import {
+  COMPACTION_SUMMARY_PREFIX,
+  buildCompactionSummaryText,
+  collectCompactableUserMessages,
+  compactionUserMessageDisposition,
+  isCompactionSummaryMessage,
+  isRealUserInput,
+  selectRecentUserMessages,
+  type CompactionUserDisposition,
+} from '../../../src/agent/compaction';
+import type { PromptOrigin } from '../../../src/agent/context/types';
+import { estimateTokens, estimateTokensForMessage } from '../../../src/utils/tokens';
+
+function textMessage(role: 'user' | 'assistant' | 'tool', text: string): Message {
+  return { role, content: [{ type: 'text', text }], toolCalls: [] };
+}
+
+function messageText(message: Message): string {
+  return message.content.map((part) => (part.type === 'text' ? part.text : '')).join('');
+}
+
+const ALL_PROMPT_ORIGIN_KINDS = {
+  user: true,
+  skill_activation: true,
+  plugin_command: true,
+  injection: true,
+  shell_command: true,
+  compaction_summary: true,
+  system_trigger: true,
+  background_task: true,
+  cron_job: true,
+  cron_missed: true,
+  hook_result: true,
+  retry: true,
+} satisfies Record<PromptOrigin['kind'], true>;
+
+const EXPECTED_DISPOSITION: Record<PromptOrigin['kind'], CompactionUserDisposition> = {
+  user: 'keep',
+  skill_activation: 'keep',
+  plugin_command: 'keep',
+  injection: 'drop',
+  shell_command: 'drop',
+  compaction_summary: 'drop',
+  system_trigger: 'drop',
+  background_task: 'drop',
+  cron_job: 'drop',
+  cron_missed: 'drop',
+  hook_result: 'drop',
+  retry: 'drop',
+};
+
+function originForKind(kind: PromptOrigin['kind']): PromptOrigin {
+  switch (kind) {
+    case 'user':
+      return { kind: 'user' };
+    case 'skill_activation':
+      return {
+        kind: 'skill_activation',
+        activationId: 'activation',
+        skillName: 'skill',
+        trigger: 'user-slash',
+      };
+    case 'plugin_command':
+      return {
+        kind: 'plugin_command',
+        activationId: 'activation',
+        pluginId: 'plugin',
+        commandName: 'command',
+        trigger: 'user-slash',
+      };
+    case 'injection':
+      return { kind: 'injection', variant: 'system_reminder' };
+    case 'shell_command':
+      return { kind: 'shell_command', phase: 'input' };
+    case 'compaction_summary':
+      return { kind: 'compaction_summary' };
+    case 'system_trigger':
+      return { kind: 'system_trigger', name: 'system' };
+    case 'background_task':
+      return {
+        kind: 'background_task',
+        taskId: 'task',
+        status: 'completed',
+        notificationId: 'notification',
+      };
+    case 'cron_job':
+      return {
+        kind: 'cron_job',
+        jobId: 'job',
+        cron: '* * * * *',
+        recurring: true,
+        coalescedCount: 1,
+        stale: false,
+      };
+    case 'cron_missed':
+      return { kind: 'cron_missed', count: 1 };
+    case 'hook_result':
+      return { kind: 'hook_result', event: 'PreCompact' };
+    case 'retry':
+      return { kind: 'retry', trigger: 'system' };
+  }
+}
+
+describe('isCompactionSummaryMessage', () => {
+  it('detects the compaction origin', () => {
+    const message = {
+      ...textMessage('user', 'anything'),
+      origin: { kind: 'compaction_summary' as const },
+    };
+    expect(isCompactionSummaryMessage(message)).toBe(true);
+  });
+
+  it('keeps real user prompts even when they start with the summary prefix', () => {
+    const message = {
+      ...textMessage('user', `${COMPACTION_SUMMARY_PREFIX}\nsummary`),
+      origin: { kind: 'user' as const },
+    };
+
+    expect(isCompactionSummaryMessage(message)).toBe(false);
+    expect(collectCompactableUserMessages([message])).toEqual([message]);
+  });
+
+  it('ignores ordinary user messages', () => {
+    expect(isCompactionSummaryMessage(textMessage('user', 'hello'))).toBe(false);
+  });
+});
+
+describe('compactionUserMessageDisposition', () => {
+  it('classifies every prompt origin kind', () => {
+    for (const kind of Object.keys(ALL_PROMPT_ORIGIN_KINDS) as Array<PromptOrigin['kind']>) {
+      expect(compactionUserMessageDisposition(originForKind(kind))).toBe(EXPECTED_DISPOSITION[kind]);
+    }
+  });
+
+  it('drops model-triggered skill activations', () => {
+    expect(
+      compactionUserMessageDisposition({
+        kind: 'skill_activation',
+        activationId: 'activation',
+        skillName: 'skill',
+        trigger: 'model-tool',
+      }),
+    ).toBe('drop');
+  });
+});
+
+describe('isRealUserInput', () => {
+  it('keeps genuine user input and drops other origins', () => {
+    expect(isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('user') })).toBe(
+      true,
+    );
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('skill_activation') }),
+    ).toBe(true);
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('injection') }),
+    ).toBe(false);
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('shell_command') }),
+    ).toBe(false);
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('background_task') }),
+    ).toBe(false);
+  });
+});
+
+describe('collectCompactableUserMessages', () => {
+  it('keeps only user messages', () => {
+    const messages = [
+      textMessage('user', 'u1'),
+      textMessage('assistant', 'a1'),
+      textMessage('tool', 't1'),
+      textMessage('user', 'u2'),
+    ];
+
+    expect(collectCompactableUserMessages(messages).map(messageText)).toEqual(['u1', 'u2']);
+  });
+
+  it('drops previous compaction summaries', () => {
+    const summary = {
+      ...textMessage('user', `${COMPACTION_SUMMARY_PREFIX}\nold summary`),
+      origin: { kind: 'compaction_summary' as const },
+    };
+    const messages = [textMessage('user', 'u1'), summary, textMessage('user', 'u2')];
+
+    expect(collectCompactableUserMessages(messages).map(messageText)).toEqual(['u1', 'u2']);
+  });
+});
+
+describe('selectRecentUserMessages', () => {
+  it('keeps the most recent messages within the budget', () => {
+    const messages = [
+      textMessage('user', 'old'),
+      textMessage('user', 'mid'),
+      textMessage('user', 'recent'),
+    ];
+    const budget = estimateTokensForMessage(messages[1]!) + estimateTokensForMessage(messages[2]!);
+
+    expect(selectRecentUserMessages(messages, budget).map(messageText)).toEqual(['mid', 'recent']);
+  });
+
+  it('truncates the oldest kept message when it would overflow the budget', () => {
+    const long = 'x'.repeat(1_000);
+    const messages = [textMessage('user', long), textMessage('user', 'recent')];
+    const budget = estimateTokensForMessage(messages[1]!) + 10;
+
+    const selected = selectRecentUserMessages(messages, budget);
+
+    expect(selected).toHaveLength(2);
+    expect(estimateTokens(messageText(selected[0]!))).toBeLessThanOrEqual(10);
+    expect(messageText(selected[1]!)).toBe('recent');
+  });
+
+  it('truncates a CJK-heavy oldest message within the budget in one pass', () => {
+    const cjk = '中'.repeat(40_000);
+    const messages = [textMessage('user', cjk), textMessage('user', 'recent')];
+    const budget = estimateTokensForMessage(messages[1]!) + 1_000;
+
+    const selected = selectRecentUserMessages(messages, budget);
+
+    expect(selected).toHaveLength(2);
+    expect(messageText(selected[1]!)).toBe('recent');
+    expect(estimateTokens(messageText(selected[0]!))).toBeLessThanOrEqual(1_000);
+    expect(cjk.startsWith(messageText(selected[0]!))).toBe(true);
+  });
+
+  it('does not split surrogate pairs while truncating emoji text', () => {
+    const emoji = '😀'.repeat(2_000);
+    const messages = [textMessage('user', emoji), textMessage('user', 'recent')];
+    const budget = estimateTokensForMessage(messages[1]!) + 333;
+
+    const selected = selectRecentUserMessages(messages, budget);
+    const truncated = messageText(selected[0]!);
+
+    expect(selected).toHaveLength(2);
+    expect(messageText(selected[1]!)).toBe('recent');
+    expect(estimateTokens(truncated)).toBeLessThanOrEqual(333);
+    expect(/^(?:😀)*$/u.test(truncated)).toBe(true);
+    expect(truncated.length % 2).toBe(0);
+  });
+
+  it('returns nothing when the budget is zero', () => {
+    expect(selectRecentUserMessages([textMessage('user', 'hi')], 0)).toEqual([]);
+  });
+});
+
+describe('buildCompactionSummaryText', () => {
+  it('prefixes the summary', () => {
+    expect(buildCompactionSummaryText('Summary.')).toBe(`${COMPACTION_SUMMARY_PREFIX}\nSummary.`);
+  });
+
+  it('falls back when the summary is empty', () => {
+    expect(buildCompactionSummaryText('   ')).toBe(`${COMPACTION_SUMMARY_PREFIX}\n(no summary available)`);
+  });
+});
diff --git a/packages/agent-core/test/agent/compaction/micro.test.ts b/packages/agent-core/test/agent/compaction/micro.test.ts
index 996588c9b..6043c7519 100644
--- a/packages/agent-core/test/agent/compaction/micro.test.ts
+++ b/packages/agent-core/test/agent/compaction/micro.test.ts
@@ -35,8 +35,8 @@ describe('MicroCompaction', () => {
     vi.stubEnv(MICRO_COMPACTION_FLAG_ENV, '1');
   });
 
-  it('defaults the micro_compaction flag on', () => {
-    expect(new FlagResolver({}, FLAG_DEFINITIONS).enabled('micro_compaction')).toBe(true);
+  it('defaults the micro_compaction flag off', () => {
+    expect(new FlagResolver({}, FLAG_DEFINITIONS).enabled('micro_compaction')).toBe(false);
   });
 
   it('truncates old tool results after cache miss', () => {
@@ -700,10 +700,10 @@ describe('MicroCompaction', () => {
     await ctx.rpc.beginCompaction({});
     await compacted;
 
-    expect(ctx.agent.context.messages).toHaveLength(1);
-    expect(ctx.agent.context.messages[0]).toMatchObject({
-      role: 'assistant',
-      content: [{ type: 'text', text: 'Summary.' }],
+    expect(ctx.agent.context.messages).toHaveLength(2);
+    expect(ctx.agent.context.messages[1]).toMatchObject({
+      role: 'user',
+      content: [{ type: 'text', text: expect.stringContaining('Summary.') }],
     });
   });
 
diff --git a/packages/agent-core/test/agent/compaction/strategy.test.ts b/packages/agent-core/test/agent/compaction/strategy.test.ts
index ebc4c7cdd..84422eb4e 100644
--- a/packages/agent-core/test/agent/compaction/strategy.test.ts
+++ b/packages/agent-core/test/agent/compaction/strategy.test.ts
@@ -1,155 +1,80 @@
-
-import {
-  type Message
-} from '@moonshot-ai/kosong';
 import { describe, expect, it } from 'vitest';
 
-import { DefaultCompactionStrategy } from '../../../src/agent/compaction';
-import { estimateTokensForMessages } from '../../../src/utils/tokens';
+import {
+  DEFAULT_COMPACTION_CONFIG,
+  DefaultCompactionStrategy,
+} from '../../../src/agent/compaction';
 
 describe('DefaultCompactionStrategy', () => {
-  it('keeps an oversized trailing user message as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('keeps consecutive trailing user messages as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user one ${'x'.repeat(1_200)}`),
-      textMessage('user', `pending user two ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('compacts the prefix when the trailing exchange itself is oversized', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'recent user'),
-      textMessage('assistant', `recent assistant ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('returns 0 when there is nothing to compact', () => {
-    const strategy = testCompactionStrategy();
-    expect(strategy.computeCompactCount([], 'auto')).toBe(0);
-    expect(strategy.computeCompactCount([textMessage('user', 'only pending')], 'auto')).toBe(0);
-    expect(
-      strategy.computeCompactCount(
-        [
-          textMessage('user', 'a'),
-          textMessage('user', 'b'),
-          textMessage('user', 'c'),
-        ],
-        'auto',
-      ),
-    ).toBe(0);
-  });
-
-  it('returns 0 when no intermediate split exists and the last message is also unsplittable', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'inspect'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [{ type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }],
-      },
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(0);
-  });
-
-  it('does not split inside a parallel tool exchange', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'run both tools'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [
-          { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' },
-          { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' },
-        ],
-      },
-      { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' },
-      { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' },
-      textMessage('user', 'next prompt'),
-    ];
-
-    // The only valid split is before the parallel exchange (after 'old assistant'),
-    // never between tool_a and tool_b — that would leave tool_b as an orphan.
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('shrinks auto compaction input to fit the model window', () => {
-    const maxSize = 1_000;
-    const strategy = testCompactionStrategy(maxSize);
-    const messages = Array.from({ length: 30 }, (_, i) =>
-      textMessage('assistant', `message ${i} ${'x'.repeat(400)}`),
-    );
-
-    const count = strategy.computeCompactCount(messages, 'auto');
+  it('triggers auto-compaction at 85% of the context window', () => {
+    const strategy = new DefaultCompactionStrategy(() => 100_000, {
+      ...DEFAULT_COMPACTION_CONFIG,
+      reservedContextSize: 0,
+    });
 
-    expect(count).toBeGreaterThan(0);
-    expect(count).toBeLessThan(messages.length);
-    expect(estimateTokensForMessages(messages.slice(0, count))).toBeLessThanOrEqual(maxSize);
-    expect(estimateTokensForMessages(messages.slice(0, count + 1))).toBeGreaterThan(maxSize);
+    expect(strategy.shouldCompact(84_999)).toBe(false);
+    expect(strategy.shouldCompact(85_000)).toBe(true);
+    expect(strategy.shouldCompact(100_000)).toBe(true);
   });
 
-  it('shrinks manual compaction input to fit the model window', () => {
-    const maxSize = 1_000;
-    const strategy = testCompactionStrategy(maxSize);
-    const messages = Array.from({ length: 30 }, (_, i) =>
-      textMessage('assistant', `message ${i} ${'x'.repeat(400)}`),
-    );
-
-    const count = strategy.computeCompactCount(messages, 'manual');
+  it('blocks at the same threshold by default (synchronous compaction)', () => {
+    const strategy = new DefaultCompactionStrategy(() => 100_000, {
+      ...DEFAULT_COMPACTION_CONFIG,
+      reservedContextSize: 0,
+    });
 
-    expect(count).toBeGreaterThan(0);
-    expect(count).toBeLessThan(messages.length);
-    expect(estimateTokensForMessages(messages.slice(0, count))).toBeLessThanOrEqual(maxSize);
-    expect(estimateTokensForMessages(messages.slice(0, count + 1))).toBeGreaterThan(maxSize);
+    expect(strategy.shouldBlock(84_999)).toBe(false);
+    expect(strategy.shouldBlock(85_000)).toBe(true);
+    expect(strategy.checkAfterStep).toBe(false);
   });
 
-  it('reserves response context by default before the ratio threshold is reached', () => {
+  it('reserves response context before the ratio threshold is reached', () => {
     const strategy = new DefaultCompactionStrategy(() => 256_000);
 
+    // 256k * 0.85 = 217_600, and the 50k reserve triggers at 206k.
     expect(strategy.shouldCompact(210_000)).toBe(true);
     expect(strategy.shouldBlock(210_000)).toBe(true);
   });
 
   it('ignores reserved context when the reserve is not smaller than the model window', () => {
     const strategy = new DefaultCompactionStrategy(() => 32_000, {
-      triggerRatio: 0.85,
-      blockRatio: 0.85,
+      triggerRatio: 0.9,
+      blockRatio: 0.9,
       reservedContextSize: 50_000,
       maxCompactionPerTurn: 3,
-      maxRecentMessages: 3,
-      maxRecentUserMessages: Infinity,
-      maxRecentSizeRatio: 0.2,
-      minOverflowReductionRatio: 0.05,
+      maxOverflowCompactionAttempts: 3,
     });
 
     expect(strategy.shouldCompact(1)).toBe(false);
     expect(strategy.shouldBlock(1)).toBe(false);
-    expect(strategy.shouldCompact(28_000)).toBe(true);
-    expect(strategy.shouldBlock(28_000)).toBe(true);
+    // Falls back to the 90% ratio: 32_000 * 0.9 = 28_800.
+    expect(strategy.shouldCompact(28_800)).toBe(true);
+    expect(strategy.shouldBlock(28_800)).toBe(true);
+  });
+
+  it('does not compact when the context window is unknown', () => {
+    const strategy = new DefaultCompactionStrategy(() => 0);
+
+    expect(strategy.shouldCompact(1_000_000)).toBe(false);
+    expect(strategy.shouldBlock(1_000_000)).toBe(false);
+  });
+
+  it('enables after-step checks only when ratios differ (async compaction)', () => {
+    const strategy = new DefaultCompactionStrategy(() => 100_000, {
+      triggerRatio: 0.8,
+      blockRatio: 0.9,
+      reservedContextSize: 0,
+      maxCompactionPerTurn: 3,
+      maxOverflowCompactionAttempts: 3,
+    });
+
+    expect(strategy.checkAfterStep).toBe(true);
+  });
+
+  it('exposes maxCompactionPerTurn', () => {
+    const strategy = testCompactionStrategy();
+
+    expect(strategy.maxCompactionPerTurn).toBe(3);
   });
 });
 
@@ -159,30 +84,6 @@ function testCompactionStrategy(maxSize: number = 1_000): DefaultCompactionStrat
     blockRatio: 0.85,
     reservedContextSize: 0,
     maxCompactionPerTurn: 3,
-    maxRecentMessages: 10,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
+    maxOverflowCompactionAttempts: 3,
   });
 }
-
-function overflowOnlyCompactionStrategy(maxSize: number = 14): DefaultCompactionStrategy {
-  return new DefaultCompactionStrategy(() => maxSize, {
-    triggerRatio: Infinity,
-    blockRatio: Infinity,
-    reservedContextSize: 0,
-    maxCompactionPerTurn: 3,
-    maxRecentMessages: 3,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
-  });
-}
-
-function textMessage(role: 'user' | 'assistant', text: string): Message {
-  return {
-    role,
-    content: [{ type: 'text', text }],
-    toolCalls: [],
-  };
-}
diff --git a/packages/agent-core/test/agent/context.test.ts b/packages/agent-core/test/agent/context.test.ts
index 580bda69c..eaec4923d 100644
--- a/packages/agent-core/test/agent/context.test.ts
+++ b/packages/agent-core/test/agent/context.test.ts
@@ -563,7 +563,95 @@ describe('Agent context', () => {
     await ctx.expectResumeMatches();
   });
 
-  it('preserves deferred reminders when compaction keeps a pending tool exchange', async () => {
+  // Regression: a user message injected after `step.begin` but before the first
+  // `tool.call` (e.g. a background-task notification flushed mid-step) lands
+  // between the assistant `tool_use` and its `tool_result` in history, which
+  // strict providers (Anthropic) reject with HTTP 400. The projector must repair
+  // the adjacency so the `tool_result` immediately follows the `tool_use`. Micro
+  // compaction exposed this latent misordering by busting the prompt cache.
+  it('repairs a tool_use/tool_result adjacency broken by an injected user message', async () => {
+    const ctx = testAgent();
+    ctx.configure();
+    const stepUuid = 'mid-step-notify-step';
+
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'drive the tank' }]);
+    ctx.dispatch({
+      type: 'context.append_loop_event',
+      event: { type: 'step.begin', uuid: stepUuid, turnId: '0', step: 1 },
+    });
+
+    // Notification arrives in the gap between step.begin and tool.call, when no
+    // tool result is yet pending, so it is pushed directly into history.
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: '<notification>bg done</notification>' }], {
+      kind: 'background_task',
+      taskId: 'task-1',
+      status: 'completed',
+      notificationId: 'task:task-1:completed',
+    });
+
+    ctx.dispatch({
+      type: 'context.append_loop_event',
+      event: {
+        type: 'tool.call',
+        uuid: 'call_drive',
+        turnId: '0',
+        step: 1,
+        stepUuid,
+        toolCallId: 'call_drive',
+        name: 'Drive',
+        args: {},
+      },
+    });
+    ctx.dispatch({
+      type: 'context.append_loop_event',
+      event: {
+        type: 'step.end',
+        uuid: stepUuid,
+        turnId: '0',
+        step: 1,
+        finishReason: 'tool_use',
+      },
+    });
+    ctx.dispatch({
+      type: 'context.append_loop_event',
+      event: {
+        type: 'tool.result',
+        parentUuid: 'call_drive',
+        toolCallId: 'call_drive',
+        result: { output: 'drove forward' },
+      },
+    });
+
+    // History preserves the original (misordered) sequence: the notification sits
+    // between the assistant tool_use and its tool_result.
+    expect(ctx.agent.context.history.map((message) => message.role)).toEqual([
+      'user',
+      'assistant',
+      'user',
+      'tool',
+    ]);
+
+    // Projection repairs the adjacency: the tool_result immediately follows the
+    // assistant tool_use, and the sandwiched notification is moved after it.
+    const projected = ctx.agent.context.messages;
+    expect(projected.map((message) => message.role)).toEqual(['user', 'assistant', 'tool', 'user']);
+    const assistantIndex = projected.findIndex(
+      (message) => message.role === 'assistant' && message.toolCalls.length > 0,
+    );
+    expect(projected[assistantIndex]?.toolCalls.map((toolCall) => toolCall.id)).toEqual([
+      'call_drive',
+    ]);
+    expect(projected[assistantIndex + 1]).toMatchObject({
+      role: 'tool',
+      toolCallId: 'call_drive',
+    });
+    expect(projected[assistantIndex + 2]?.content).toEqual([
+      { type: 'text', text: '<notification>bg done</notification>' },
+    ]);
+    await ctx.expectResumeMatches();
+  });
+
+  it('drops deferred reminders when compaction drops a pending tool exchange', async () => {
     const ctx = testAgent();
     ctx.configure();
 
@@ -576,20 +664,24 @@ describe('Agent context', () => {
     });
     ctx.agent.context.applyCompaction({
       summary: 'summary of old prompt',
-      compactedCount: 1,
+      compactedCount: 4,
       tokensBefore: 100,
-      tokensAfter: 40,
     });
     ctx.agent.context.appendSystemReminder('second reminder', {
       kind: 'injection',
       variant: 'host',
     });
 
+    // Compaction keeps only the real user prompt plus the summary; the deferred
+    // first reminder is dropped because initial context is rebuilt every turn.
+    // The second reminder, appended after compaction, is preserved.
     expect(ctx.agent.context.messages.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
+      'user',
+      'user',
+    ]);
+    expect(ctx.agent.context.messages[2]?.content).toEqual([
+      { type: 'text', text: '<system-reminder>\nsecond reminder\n</system-reminder>' },
     ]);
 
     ctx.dispatch({
@@ -602,24 +694,47 @@ describe('Agent context', () => {
       },
     });
 
+    // The pending tool exchange was dropped by compaction, so the late tool
+    // result is ignored and the history is unchanged.
     expect(ctx.agent.context.messages.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
       'user',
       'user',
     ]);
-    expect(ctx.agent.context.messages[5]?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nfirst reminder\n</system-reminder>' },
-    ]);
-    expect(ctx.agent.context.messages[6]?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nsecond reminder\n</system-reminder>' },
-    ]);
     await ctx.expectResumeMatches();
   });
 
+  it('applyCompaction keeps only real user input from mixed user-role history', () => {
+    const ctx = testAgent();
+    ctx.configure();
+
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'real prompt' }]);
+    ctx.agent.context.appendBashInput('pwd');
+    ctx.agent.context.appendBashOutput('/tmp/repo', '', false);
+    ctx.agent.context.appendLocalCommandStdout('local command output');
+    ctx.agent.context.appendSystemReminder('stale reminder', {
+      kind: 'injection',
+      variant: 'host',
+    });
+
+    const result = ctx.agent.context.applyCompaction({
+      summary: 'summary of mixed history',
+      compactedCount: 5,
+      tokensBefore: 100,
+    });
+    ctx.agent.context.appendSystemReminder('fresh reminder', {
+      kind: 'injection',
+      variant: 'host',
+    });
+
+    expect(ctx.agent.context.history.map(({ role, origin }) => ({ role, origin }))).toEqual([
+      { role: 'user', origin: { kind: 'user' } },
+      { role: 'user', origin: { kind: 'compaction_summary' } },
+      { role: 'user', origin: { kind: 'injection', variant: 'host' } },
+    ]);
+    expect(result.keptUserMessageCount).toBe(1);
+  });
+
   it('clears context before the next LLM request', async () => {
     const ctx = testAgent();
     ctx.configure();
@@ -648,9 +763,8 @@ describe('Agent context', () => {
       summary: 'summary of old context',
       compactedCount: 1,
       tokensBefore: 100,
-      tokensAfter: 20,
     });
-    expect(ctx.agent.context.history[0]?.origin).toEqual({ kind: 'compaction_summary' });
+    expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
     ctx.mockNextResponse({ type: 'text', text: 'after compaction' });
     await ctx.rpc.prompt({ input: [{ type: 'text', text: 'new prompt' }] });
@@ -660,8 +774,9 @@ describe('Agent context', () => {
       system: <system-prompt>
       tools: []
       messages:
-        assistant: text "summary of old context"
-        user: text "recent user message\\n\\nnew prompt"
+        user: text "old user message\\n\\nrecent user message"
+        user: text "summary of old context"
+        user: text "new prompt"
     `);
     await ctx.expectResumeMatches();
   });
@@ -812,7 +927,6 @@ describe('Agent context', () => {
       summary: 'summary of compacted context',
       compactedCount: 1,
       tokensBefore: 100,
-      tokensAfter: 20,
     });
     ctx.agent.context.appendUserMessage([{ type: 'text', text: 'recent user message' }]);
     ctx.agent.context.appendMessage({
@@ -830,7 +944,11 @@ describe('Agent context', () => {
 
     expect(ctx.agent.context.history).toEqual([
       expect.objectContaining({
-        role: 'assistant',
+        role: 'user',
+        content: [{ type: 'text', text: 'old user message' }],
+      }),
+      expect.objectContaining({
+        role: 'user',
         origin: { kind: 'compaction_summary' },
         content: [{ type: 'text', text: 'summary of compacted context' }],
       }),
@@ -852,7 +970,6 @@ describe('Agent context', () => {
       summary: 'summary of compacted context',
       compactedCount: 1,
       tokensBefore: 100,
-      tokensAfter: 20,
     });
     ctx.agent.context.appendUserMessage([{ type: 'text', text: 'recent user message' }]);
     ctx.agent.context.appendMessage({
@@ -866,7 +983,11 @@ describe('Agent context', () => {
     }).not.toThrow();
     expect(ctx.agent.context.history).toEqual([
       expect.objectContaining({
-        role: 'assistant',
+        role: 'user',
+        content: [{ type: 'text', text: 'old user message' }],
+      }),
+      expect.objectContaining({
+        role: 'user',
         origin: { kind: 'compaction_summary' },
         content: [{ type: 'text', text: 'summary of compacted context' }],
       }),
diff --git a/packages/agent-core/test/agent/context/projector.test.ts b/packages/agent-core/test/agent/context/projector.test.ts
new file mode 100644
index 000000000..6dc2a3ab6
--- /dev/null
+++ b/packages/agent-core/test/agent/context/projector.test.ts
@@ -0,0 +1,439 @@
+import type { ContentPart, Message, ToolCall } from '@moonshot-ai/kosong';
+import { describe, expect, it } from 'vitest';
+
+import { project } from '../../../src/agent/context/projector';
+import type { ContextMessage } from '../../../src/agent/context/types';
+
+// ---------------------------------------------------------------------------
+// Invariant under test
+// ---------------------------------------------------------------------------
+//
+// Strict providers (Anthropic) reject a request with HTTP 400 when an assistant
+// `tool_use` is not immediately followed by its matching `tool_result`. The
+// projector must therefore guarantee that, for every assistant tool call whose
+// result exists anywhere in the projected history, that result sits in the
+// consecutive tool messages immediately following the assistant message.
+//
+// A tool call with no recorded result anywhere is considered still in-flight
+// (pending) and is intentionally left untouched — it is not an orphan.
+
+interface MisplacedToolUse {
+  readonly assistantIndex: number;
+  readonly toolCallId: string;
+}
+
+/**
+ * Return tool calls whose result exists somewhere in `messages` but is not
+ * adjacent to the assistant `tool_use`. An empty result means the invariant
+ * holds and the history is safe to send to a strict provider.
+ */
+function findMisplacedToolUses(messages: readonly Message[]): MisplacedToolUse[] {
+  // Index every recorded tool result by its toolCallId.
+  const resultIndexById = new Map<string, number>();
+  messages.forEach((message, index) => {
+    if (message.role === 'tool' && message.toolCallId !== undefined) {
+      resultIndexById.set(message.toolCallId, index);
+    }
+  });
+
+  const violations: MisplacedToolUse[] = [];
+  for (let i = 0; i < messages.length; i++) {
+    const message = messages[i]!;
+    if (message.role !== 'assistant' || message.toolCalls.length === 0) continue;
+
+    // Collect the toolCallIds answered in the consecutive tool messages that
+    // immediately follow this assistant message.
+    const adjacentResultIds = new Set<string>();
+    let j = i + 1;
+    while (j < messages.length && messages[j]!.role === 'tool') {
+      const id = messages[j]!.toolCallId;
+      if (id !== undefined) adjacentResultIds.add(id);
+      j++;
+    }
+
+    for (const toolCall of message.toolCalls) {
+      // Only flag tool calls whose result was actually recorded; a missing
+      // result means the call is still in-flight, not misplaced.
+      if (!resultIndexById.has(toolCall.id)) continue;
+      if (!adjacentResultIds.has(toolCall.id)) {
+        violations.push({ assistantIndex: i, toolCallId: toolCall.id });
+      }
+    }
+  }
+  return violations;
+}
+
+// ---------------------------------------------------------------------------
+// Builders
+// ---------------------------------------------------------------------------
+
+function textPart(text: string): ContentPart {
+  return { type: 'text', text };
+}
+
+function textOf(message: Message | undefined): string {
+  return (
+    message?.content
+      .map((part) => (part.type === 'text' ? part.text : ''))
+      .join('') ?? ''
+  );
+}
+
+function user(text: string): ContextMessage {
+  return { role: 'user', content: [textPart(text)], toolCalls: [] };
+}
+
+function notification(text: string): ContextMessage {
+  return {
+    role: 'user',
+    content: [textPart(text)],
+    toolCalls: [],
+    origin: {
+      kind: 'background_task',
+      taskId: 'task',
+      status: 'completed',
+      notificationId: 'task:task:completed',
+    },
+  };
+}
+
+function assistant(toolCallIds: readonly string[], text = ''): ContextMessage {
+  return {
+    role: 'assistant',
+    content: text.length > 0 ? [textPart(text)] : [],
+    toolCalls: toolCallIds.map(
+      (id): ToolCall => ({ type: 'function', id, name: 'Run', arguments: '{}' }),
+    ),
+  };
+}
+
+function emptyAssistant(): ContextMessage {
+  return { role: 'assistant', content: [], toolCalls: [] };
+}
+
+function tool(toolCallId: string, text = 'ok'): ContextMessage {
+  return { role: 'tool', content: [textPart(text)], toolCalls: [], toolCallId };
+}
+
+function compactionSummary(text = 'summary'): ContextMessage {
+  return {
+    role: 'assistant',
+    content: [textPart(text)],
+    toolCalls: [],
+    origin: { kind: 'compaction_summary' },
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Targeted regression tests
+// ---------------------------------------------------------------------------
+
+describe('project tool_use/tool_result adjacency', () => {
+  it('leaves an already well-formed history unchanged (idempotent)', () => {
+    const history: ContextMessage[] = [
+      user('u1'),
+      assistant(['a']),
+      tool('a'),
+      user('u2'),
+      assistant(['b', 'c']),
+      tool('b'),
+      tool('c'),
+      user('u3'),
+    ];
+    const projected = project(history);
+    expect(projected.map((m) => [m.role, m.toolCallId])).toEqual([
+      ['user', undefined],
+      ['assistant', undefined],
+      ['tool', 'a'],
+      ['user', undefined],
+      ['assistant', undefined],
+      ['tool', 'b'],
+      ['tool', 'c'],
+      ['user', undefined],
+    ]);
+    expect(findMisplacedToolUses(projected)).toEqual([]);
+  });
+
+  it('moves a user message sandwiched between tool_use and tool_result to after the result', () => {
+    const history: ContextMessage[] = [user('u1'), assistant(['a']), notification('ping'), tool('a')];
+    const projected = project(history);
+    expect(projected.map((m) => m.role)).toEqual(['user', 'assistant', 'tool', 'user']);
+    expect(projected[1]?.toolCalls.map((tc) => tc.id)).toEqual(['a']);
+    expect(projected[2]).toMatchObject({ role: 'tool', toolCallId: 'a' });
+    expect(findMisplacedToolUses(projected)).toEqual([]);
+  });
+
+  it('pulls a distant tool result back up across intervening exchanges', () => {
+    const history: ContextMessage[] = [
+      user('u1'),
+      assistant(['a']),
+      user('middle'),
+      assistant(['b']),
+      tool('b'),
+      user('later'),
+      tool('a'),
+    ];
+    const projected = project(history);
+    expect(projected.map((m) => [m.role, m.toolCallId])).toEqual([
+      ['user', undefined],
+      ['assistant', undefined],
+      ['tool', 'a'],
+      ['user', undefined],
+      ['assistant', undefined],
+      ['tool', 'b'],
+      ['user', undefined],
+    ]);
+    expect(findMisplacedToolUses(projected)).toEqual([]);
+  });
+
+  it('reorders parallel tool results that arrive out of order', () => {
+    const history: ContextMessage[] = [
+      user('u1'),
+      assistant(['a', 'b', 'c']),
+      tool('c'),
+      tool('a'),
+      tool('b'),
+    ];
+    const projected = project(history);
+    // All three results must be adjacent to the assistant, regardless of order.
+    expect(projected.map((m) => m.role)).toEqual(['user', 'assistant', 'tool', 'tool', 'tool']);
+    const resultIds = projected.slice(2).map((m) => m.toolCallId);
+    expect(resultIds).toEqual(expect.arrayContaining(['a', 'b', 'c']));
+    expect(findMisplacedToolUses(projected)).toEqual([]);
+  });
+
+  it('repairs multiple misplaced exchanges in a single history', () => {
+    const history: ContextMessage[] = [
+      user('u1'),
+      assistant(['a']),
+      user('sandwich-a'),
+      assistant(['b']),
+      tool('b'),
+      user('sandwich-b'),
+      tool('a'),
+    ];
+    const projected = project(history);
+    expect(findMisplacedToolUses(projected)).toEqual([]);
+    // a's result must immediately follow a's assistant.
+    const aIndex = projected.findIndex((m) => m.toolCalls.some((tc) => tc.id === 'a'));
+    expect(projected[aIndex + 1]).toMatchObject({ role: 'tool', toolCallId: 'a' });
+    const bIndex = projected.findIndex((m) => m.toolCalls.some((tc) => tc.id === 'b'));
+    expect(projected[bIndex + 1]).toMatchObject({ role: 'tool', toolCallId: 'b' });
+  });
+
+  it('leaves a pending (in-flight) tool call without a recorded result untouched', () => {
+    const history: ContextMessage[] = [user('u1'), assistant(['a', 'b']), tool('a')];
+    // b has no recorded result — it is still pending, not orphaned.
+    const projected = project(history);
+    expect(projected.map((m) => [m.role, m.toolCallId])).toEqual([
+      ['user', undefined],
+      ['assistant', undefined],
+      ['tool', 'a'],
+    ]);
+    // No new (synthetic) tool result for b was introduced.
+    expect(projected.some((m) => m.toolCallId === 'b')).toBe(false);
+  });
+
+  it('synthesizes a tool result for a missing tool call when synthesizeMissing is set', () => {
+    const history: ContextMessage[] = [user('u1'), assistant(['a', 'b']), tool('a')];
+    const projected = project(history, { synthesizeMissing: true });
+    expect(projected.map((m) => [m.role, m.toolCallId])).toEqual([
+      ['user', undefined],
+      ['assistant', undefined],
+      ['tool', 'a'],
+      ['tool', 'b'],
+    ]);
+    expect(projected.at(-1)).toMatchObject({ role: 'tool', toolCallId: 'b' });
+    expect(findMisplacedToolUses(projected)).toEqual([]);
+  });
+
+  // Regression for the full-compaction prefix gap: a delayed tool result may be
+  // sliced out of the compacted prefix (the split is computed on the raw,
+  // misordered history). With synthesizeMissing the sliced projection must still
+  // close the exchange so the summary request is not rejected.
+  it('closes a tool call whose delayed result is sliced out of a compaction prefix', () => {
+    const fullHistory: ContextMessage[] = [
+      user('u1'),
+      assistant(['a']),
+      user('middle'),
+      assistant(['b']),
+      tool('b'),
+      user('later'),
+      tool('a'),
+    ];
+    // The strategy may split after tool('b'), excluding the distant tool('a').
+    const prefix = fullHistory.slice(0, 5);
+    const projected = project(prefix, { synthesizeMissing: true });
+    expect(findMisplacedToolUses(projected)).toEqual([]);
+    const aIndex = projected.findIndex((m) => m.toolCalls.some((tc) => tc.id === 'a'));
+    expect(projected[aIndex + 1]).toMatchObject({ role: 'tool', toolCallId: 'a' });
+    // The synthesized result carries the placeholder text, not the real output.
+    expect(textOf(projected[aIndex + 1])).toContain('not available');
+  });
+
+  it('does not move a tool result whose toolCallId matches no assistant tool_use', () => {
+    const history: ContextMessage[] = [
+      user('u1'),
+      assistant(['a']),
+      tool('a'),
+      tool('orphan-result'),
+      user('u2'),
+    ];
+    const projected = project(history);
+    // The stray result stays where it was; nothing references it.
+    expect(projected.map((m) => [m.role, m.toolCallId])).toEqual([
+      ['user', undefined],
+      ['assistant', undefined],
+      ['tool', 'a'],
+      ['tool', 'orphan-result'],
+      ['user', undefined],
+    ]);
+  });
+
+  it('does not crash when a tool result appears before its tool_use', () => {
+    const history: ContextMessage[] = [tool('a'), user('u1'), assistant(['a'])];
+    // Forward scan cannot find the result (it is behind the assistant), so the
+    // exchange is left as-is rather than throwing.
+    expect(() => project(history)).not.toThrow();
+  });
+
+  it('preserves compaction summaries and empty assistants while repairing', () => {
+    const history: ContextMessage[] = [
+      compactionSummary(),
+      user('u1'),
+      assistant(['a']),
+      notification('ping'),
+      emptyAssistant(),
+      tool('a'),
+    ];
+    const projected = project(history);
+    expect(findMisplacedToolUses(projected)).toEqual([]);
+    const aIndex = projected.findIndex((m) => m.toolCalls.some((tc) => tc.id === 'a'));
+    expect(projected[aIndex + 1]).toMatchObject({ role: 'tool', toolCallId: 'a' });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Property-based fuzz test
+// ---------------------------------------------------------------------------
+//
+// Generate a large number of histories with randomized, worst-case misordering
+// (sandwiched user messages, distant results, parallel calls, pending calls,
+// empty assistants, compaction summaries) and assert the projector ALWAYS
+// produces a history that satisfies the adjacency invariant. This is the guard
+// that catches regressions which would otherwise strand the user with HTTP 400.
+
+describe('project adjacency invariant (fuzz)', () => {
+  it('holds for thousands of randomized histories', () => {
+    const rng = mulberry32(0x5eed_c0de);
+    const iterations = 4000;
+    for (let n = 0; n < iterations; n++) {
+      const history = generateHistory(rng, n);
+      const projected = project(history);
+      const violations = findMisplacedToolUses(projected);
+      expect(
+        violations,
+        `adjacency invariant violated at iteration ${n}\n` +
+          `history:   ${JSON.stringify(history.map(label))}\n` +
+          `projected: ${JSON.stringify(projected.map(label))}`,
+      ).toEqual([]);
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Fuzz generator
+// ---------------------------------------------------------------------------
+
+type Rng = () => number;
+
+function label(message: Message): string {
+  const id = message.toolCallId ?? message.toolCalls.map((toolCall) => toolCall.id).join(',');
+  return `${message.role}:${id}`;
+}
+
+// mulberry32 requires unsigned 32-bit wrapping arithmetic (`>>> 0`), which
+// `Math.trunc` does not provide, so the prefer-math-trunc lint is a false
+// positive here.
+/* eslint-disable unicorn/prefer-math-trunc */
+function mulberry32(seed: number): Rng {
+  let state = seed >>> 0;
+  return () => {
+    state = (state + 0x6d2b79f5) >>> 0;
+    let t = state;
+    t = Math.imul(t ^ (t >>> 15), t | 1);
+    t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  };
+}
+/* eslint-enable unicorn/prefer-math-trunc */
+
+function pick<T>(rng: Rng, items: readonly T[]): T {
+  return items[Math.floor(rng() * items.length)]!;
+}
+
+function generateHistory(rng: Rng, seed: number): ContextMessage[] {
+  const messages: ContextMessage[] = [];
+  let nextId = 1;
+  const blockCount = 2 + Math.floor(rng() * 8);
+
+  for (let b = 0; b < blockCount; b++) {
+    const kind = pick(rng, ['user', 'exchange', 'notification', 'empty', 'compaction'] as const);
+    switch (kind) {
+      case 'user':
+        messages.push(user(`u-${seed}-${b}`));
+        break;
+      case 'notification':
+        messages.push(notification(`n-${seed}-${b}`));
+        break;
+      case 'empty':
+        messages.push(emptyAssistant());
+        break;
+      case 'compaction':
+        messages.push(compactionSummary());
+        break;
+      case 'exchange': {
+        const arity = 1 + Math.floor(rng() * 3);
+        const ids: string[] = [];
+        for (let k = 0; k < arity; k++) {
+          ids.push(`c${nextId++}`);
+        }
+        messages.push(assistant(ids));
+        // Decide which results are recorded (some may be pending).
+        const recorded = ids.filter(() => rng() > 0.25);
+        // Randomize result order to simulate parallel calls completing out of order.
+        shuffle(recorded, rng);
+        // Randomly inject a sandwiched user/notification before the results.
+        if (rng() > 0.5) {
+          messages.push(pick(rng, [user(`sandwich-${seed}-${b}`), notification(`sandwich-n-${seed}-${b}`)]));
+        }
+        // Randomly delay one recorded result past a following exchange.
+        let delayed: ContextMessage | undefined;
+        if (recorded.length > 0 && rng() > 0.6) {
+          delayed = tool(recorded.pop()!);
+        }
+        for (const id of recorded) {
+          messages.push(tool(id));
+        }
+        // Possibly emit a full extra exchange before the delayed result lands.
+        if (delayed !== undefined) {
+          if (rng() > 0.4) {
+            const laterIds = [`c${nextId++}`];
+            messages.push(assistant(laterIds));
+            messages.push(tool(laterIds[0]!));
+          }
+          messages.push(delayed);
+        }
+        break;
+      }
+    }
+  }
+  return messages;
+}
+
+function shuffle<T>(items: T[], rng: Rng): void {
+  for (let i = items.length - 1; i > 0; i--) {
+    const j = Math.floor(rng() * (i + 1));
+    [items[i], items[j]] = [items[j]!, items[i]!];
+  }
+}
diff --git a/packages/agent-core/test/agent/injection/manager.test.ts b/packages/agent-core/test/agent/injection/manager.test.ts
index a8a91ea93..5b4818f7c 100644
--- a/packages/agent-core/test/agent/injection/manager.test.ts
+++ b/packages/agent-core/test/agent/injection/manager.test.ts
@@ -1,5 +1,6 @@
-import { describe, expect, it } from 'vitest';
+import { describe, expect, it, vi } from 'vitest';
 
+import type { BackgroundTaskInfo } from '../../../src/agent/background';
 import { DynamicInjector } from '../../../src/agent/injection/injector';
 import { InjectionManager } from '../../../src/agent/injection/manager';
 import { TodoListReminderInjector } from '../../../src/agent/injection/todo-list';
@@ -15,9 +16,9 @@ class RecordingInjector extends DynamicInjector {
     super.onContextClear();
   }
 
-  override onContextCompacted(compactedCount: number): void {
+  override onContextCompacted(): void {
     this.compactionCalls += 1;
-    super.onContextCompacted(compactedCount);
+    super.onContextCompacted();
   }
 
   protected override getInjection(): string | undefined {
@@ -28,7 +29,7 @@ class RecordingInjector extends DynamicInjector {
 class BoomInjector extends DynamicInjector {
   override readonly injectionVariant = 'boom_test';
 
-  override onContextCompacted(_compactedCount: number): void {
+  override onContextCompacted(): void {
     throw new Error('boom-compact');
   }
 
@@ -49,7 +50,7 @@ describe('InjectionManager.onContextCompacted', () => {
     const b = new RecordingInjector(ctx.agent);
     installInjectors(ctx.agent.injection, [a, b]);
 
-    ctx.agent.injection.onContextCompacted(3);
+    ctx.agent.injection.onContextCompacted();
 
     expect(a.compactionCalls).toBe(1);
     expect(b.compactionCalls).toBe(1);
@@ -62,7 +63,7 @@ describe('InjectionManager.onContextCompacted', () => {
     installInjectors(ctx.agent.injection, [new BoomInjector(ctx.agent), recorder]);
 
     expect(() => {
-      ctx.agent.injection.onContextCompacted(2);
+      ctx.agent.injection.onContextCompacted();
     }).not.toThrow();
     expect(recorder.compactionCalls).toBe(1);
   });
@@ -74,11 +75,11 @@ describe('InjectionManager.onContextCompacted', () => {
     installInjectors(ctx.agent.injection, [new BoomInjector(ctx.agent), recorder]);
 
     expect(() => {
-      ctx.agent.injection.onContextCompacted(1);
+      ctx.agent.injection.onContextCompacted();
     }).not.toThrow();
     expect(recorder.compactionCalls).toBe(1);
 
-    ctx.agent.injection.onContextCompacted(1);
+    ctx.agent.injection.onContextCompacted();
     expect(recorder.compactionCalls).toBe(2);
   });
 
@@ -112,3 +113,46 @@ describe('InjectionManager registration', () => {
     expect(injectors.some((injector) => injector instanceof TodoListReminderInjector)).toBe(true);
   });
 });
+
+describe('InjectionManager.injectAfterCompaction — active background tasks', () => {
+  const fakeTask = {
+    taskId: 'process-abc123',
+    kind: 'process',
+    description: 'run the full test suite',
+    status: 'running',
+  } as unknown as BackgroundTaskInfo;
+
+  function backgroundReminderTexts(agent: ReturnType<typeof testAgent>['agent']): string[] {
+    return agent.context.history
+      .filter(
+        (message) =>
+          message.origin?.kind === 'injection' &&
+          message.origin.variant === 'background_task_status',
+      )
+      .map((message) =>
+        message.content.map((part) => (part.type === 'text' ? part.text : '')).join(''),
+      );
+  }
+
+  it('re-injects active background tasks after compaction (they were dropped from the folded context)', async () => {
+    const ctx = testAgent();
+    ctx.configure();
+    vi.spyOn(ctx.agent.background, 'list').mockReturnValue([fakeTask]);
+
+    await ctx.agent.injection.injectAfterCompaction();
+
+    const texts = backgroundReminderTexts(ctx.agent);
+    expect(texts).toHaveLength(1);
+    expect(texts[0]).toContain('active_background_tasks');
+  });
+
+  it('injects nothing when there are no active background tasks', async () => {
+    const ctx = testAgent();
+    ctx.configure();
+    vi.spyOn(ctx.agent.background, 'list').mockReturnValue([]);
+
+    await ctx.agent.injection.injectAfterCompaction();
+
+    expect(backgroundReminderTexts(ctx.agent)).toHaveLength(0);
+  });
+});
diff --git a/packages/agent-core/test/agent/permission.test.ts b/packages/agent-core/test/agent/permission.test.ts
index 8d0b27712..3e3a75d6e 100644
--- a/packages/agent-core/test/agent/permission.test.ts
+++ b/packages/agent-core/test/agent/permission.test.ts
@@ -276,6 +276,46 @@ describe('Permission auto mode', () => {
     );
   });
 
+  it('reinjects the auto mode reminder after context compaction', async () => {
+    const appendSystemReminder = vi.fn();
+    const agent = {
+      permission: { mode: 'auto' },
+      context: { history: [], appendSystemReminder },
+    } as unknown as Agent;
+    const injector = new PermissionModeInjector(agent);
+
+    await injector.inject();
+    appendSystemReminder.mockClear();
+    injector.onContextCompacted();
+    await injector.inject();
+
+    expect(appendSystemReminder).toHaveBeenCalledWith(
+      expect.stringContaining('Do NOT call AskUserQuestion while auto mode is active'),
+      { kind: 'injection', variant: 'permission_mode' },
+    );
+  });
+
+  it('keeps the auto mode exit reminder after compaction if the mode changes', async () => {
+    const appendSystemReminder = vi.fn();
+    const permission = { mode: 'auto' as PermissionMode };
+    const agent = {
+      permission,
+      context: { history: [], appendSystemReminder },
+    } as unknown as Agent;
+    const injector = new PermissionModeInjector(agent);
+
+    await injector.inject();
+    appendSystemReminder.mockClear();
+    injector.onContextCompacted();
+    permission.mode = 'manual';
+    await injector.inject();
+
+    expect(appendSystemReminder).toHaveBeenCalledWith(
+      expect.stringContaining('Auto permission mode is no longer active'),
+      { kind: 'injection', variant: 'permission_mode' },
+    );
+  });
+
   it('blocks AskUserQuestion in auto mode before execution', async () => {
     const { manager, requestApproval } = makePermissionManager(async () => ({
       decision: 'approved',
diff --git a/packages/agent-core/test/agent/records/index.test.ts b/packages/agent-core/test/agent/records/index.test.ts
index f9e07b6c4..47571446b 100644
--- a/packages/agent-core/test/agent/records/index.test.ts
+++ b/packages/agent-core/test/agent/records/index.test.ts
@@ -425,9 +425,11 @@ describe('agent replay range build', () => {
         instruction: 'keep facts',
         result: {
           summary: 'Compacted summary.',
+          contextSummary: 'Compacted summary.',
           compactedCount: 0,
           tokensBefore: 10,
           tokensAfter: 3,
+          keptUserMessageCount: 0,
         },
       }),
     ]);
diff --git a/packages/agent-core/test/agent/resume.test.ts b/packages/agent-core/test/agent/resume.test.ts
index de24bff33..12afac0dc 100644
--- a/packages/agent-core/test/agent/resume.test.ts
+++ b/packages/agent-core/test/agent/resume.test.ts
@@ -79,7 +79,8 @@ describe('Agent resume', () => {
         system: <system-prompt>
         tools: Bash
         messages:
-          assistant: text "Historical compacted summary."
+          user: text "Historical prompt"
+          user: text "Historical compacted summary."
           user: text "Fresh prompt after resume"
           user: text <plan-mode-reminder>
     `);
@@ -355,7 +356,11 @@ describe('Agent resume', () => {
 
     expect(ctx.agent.context.history).toEqual([
       expect.objectContaining({
-        role: 'assistant',
+        role: 'user',
+        content: [{ type: 'text', text: 'Historical prompt before compaction' }],
+      }),
+      expect.objectContaining({
+        role: 'user',
         content: [{ type: 'text', text: 'Compacted implementation notes.' }],
         origin: { kind: 'compaction_summary' },
       }),
@@ -372,9 +377,11 @@ describe('Agent resume', () => {
         type: 'compaction',
         result: {
           summary: 'Compacted implementation notes.',
+          contextSummary: 'Compacted implementation notes.',
           compactedCount: 1,
           tokensBefore: 120,
           tokensAfter: 24,
+          keptUserMessageCount: 1,
         },
         instruction: 'preserve implementation notes',
       }),
diff --git a/packages/agent-core/test/prompt-placeholders.test.ts b/packages/agent-core/test/prompt-placeholders.test.ts
index a98e977e3..9566415c7 100644
--- a/packages/agent-core/test/prompt-placeholders.test.ts
+++ b/packages/agent-core/test/prompt-placeholders.test.ts
@@ -22,8 +22,8 @@ const SRC = join(import.meta.dirname, '..', 'src');
 // `.md` files rendered through `renderPrompt`. Keep in sync when a new
 // templated prompt file is introduced.
 const TEMPLATED = new Set([
-  'profile/default/system.md',
   'agent/compaction/compaction-instruction.md',
+  'profile/default/system.md',
   'tools/builtin/file/read.md',
   'tools/builtin/file/read-media.md',
   'tools/builtin/shell/bash.md',
diff --git a/packages/agent-core/test/services/message-transcript.test.ts b/packages/agent-core/test/services/message-transcript.test.ts
index 4ec462f2d..c249c0174 100644
--- a/packages/agent-core/test/services/message-transcript.test.ts
+++ b/packages/agent-core/test/services/message-transcript.test.ts
@@ -64,7 +64,12 @@ function assistantStep(uuid: string, text: string, time?: number): AgentRecord[]
   ];
 }
 
-function compaction(summary: string, compactedCount: number, time?: number): AgentRecord {
+function compaction(
+  summary: string,
+  compactedCount: number,
+  time?: number,
+  keptUserMessageCount?: number,
+): AgentRecord {
   return {
     type: 'context.apply_compaction',
     summary,
@@ -72,6 +77,7 @@ function compaction(summary: string, compactedCount: number, time?: number): Age
     tokensBefore: 1000,
     tokensAfter: 100,
     time,
+    ...(keptUserMessageCount === undefined ? {} : { keptUserMessageCount }),
   } as AgentRecord;
 }
 
@@ -92,26 +98,63 @@ describe('reduceWireRecords', () => {
     expect(foldedLength).toBe(2);
   });
 
-  it('compaction keeps the prefix and inserts the summary at the fold point', () => {
+  it('compaction keeps the prefix and appends the user-role summary', () => {
     const { entries, foldedLength } = reduceWireRecords([
       appendMessage(userMessage('u1')),
       ...assistantStep('s1', 'a1'),
       appendMessage(userMessage('u2')),
       ...assistantStep('s2', 'a2'),
-      // folded history is [u1, a1, u2, a2]; compact the first 3, keep a2.
-      compaction('SUM', 3),
+      compaction('SUM', 4),
       appendMessage(userMessage('u3')),
     ]);
     expect(entries.map((e) => textOf(e.message))).toEqual([
       'u1',
       'a1',
       'u2',
-      'SUM',
       'a2',
+      'SUM',
       'u3',
     ]);
-    expect(entries[3]!.message.origin).toEqual({ kind: 'compaction_summary' });
-    // live folded view would be [SUM, a2, u3]
+    expect(entries[4]!.message.origin).toEqual({ kind: 'compaction_summary' });
+    expect(entries[4]!.message.role).toBe('user');
+    // live folded view would be [u1, u2, SUM, u3]
+    expect(foldedLength).toBe(4);
+  });
+
+  it('keeps shell and local-command output in the transcript but not foldedLength', () => {
+    const { entries, foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      appendMessage(userMessage('! pwd', { kind: 'shell_command', phase: 'input' })),
+      appendMessage(userMessage('local output', { kind: 'injection', variant: 'local-command-stdout' })),
+      ...assistantStep('s1', 'a1'),
+      {
+        type: 'context.apply_compaction',
+        summary: 'SUM',
+        compactedCount: 4,
+        tokensBefore: 100,
+        tokensAfter: 20,
+        keptUserMessageCount: 1,
+      } as AgentRecord,
+      appendMessage(userMessage('u2')),
+    ]);
+
+    expect(entries.map((e) => textOf(e.message))).toEqual([
+      'u1',
+      '! pwd',
+      'local output',
+      'a1',
+      'SUM',
+      'u2',
+    ]);
+    expect(entries.map((e) => e.message.role)).toEqual([
+      'user',
+      'user',
+      'user',
+      'assistant',
+      'user',
+      'user',
+    ]);
+    // 1 kept real user message + summary + u2 appended after compaction.
     expect(foldedLength).toBe(3);
   });
 
@@ -120,11 +163,103 @@ describe('reduceWireRecords', () => {
       appendMessage(userMessage('u1')),
       compaction('S1', 1),
       appendMessage(userMessage('u2')),
-      // folded = [S1, u2]; compact both.
-      compaction('S2', 2),
+      compaction('S2', 3),
     ]);
     expect(entries.map((e) => textOf(e.message))).toEqual(['u1', 'S1', 'u2', 'S2']);
-    expect(foldedLength).toBe(1);
+    // live folded view would be [u1, u2, S2]
+    expect(foldedLength).toBe(3);
+  });
+
+  it('uses the recorded kept-user count for foldedLength when present', () => {
+    // The live context kept only the most recent real user message (e.g. the
+    // older ones were truncated in a prior compaction, or a clear dropped
+    // them). The full transcript still holds all three, so re-deriving from
+    // it would yield 3 and disagree with the live context. The reducer must
+    // trust the count recorded by ContextMemory.applyCompaction.
+    const { foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      appendMessage(userMessage('u2')),
+      appendMessage(userMessage('u3')),
+      {
+        type: 'context.apply_compaction',
+        summary: 'SUM',
+        compactedCount: 3,
+        tokensBefore: 100,
+        tokensAfter: 20,
+        keptUserMessageCount: 1,
+      } as AgentRecord,
+      appendMessage(userMessage('u4')),
+    ]);
+    // 1 kept user message + summary + u4 appended after compaction.
+    expect(foldedLength).toBe(3);
+  });
+
+  it('drops a late tool result after compaction closes an open exchange', () => {
+    const { entries, foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      loopEvent({ type: 'step.begin', uuid: 's1', turnId: 't', step: 0 }),
+      loopEvent({
+        type: 'tool.call',
+        uuid: 'c1',
+        turnId: 't',
+        step: 0,
+        stepUuid: 's1',
+        toolCallId: 'call_1',
+        name: 'Bash',
+        arguments: '{"command":"ls"}',
+      }),
+      compaction('SUM', 3),
+      loopEvent({
+        type: 'tool.result',
+        parentUuid: 'c1',
+        toolCallId: 'call_1',
+        result: { output: 'late result' },
+      }),
+      appendMessage(userMessage('u2')),
+    ]);
+
+    // Compaction closes the open exchange, so the late tool result is an
+    // orphan and dropped — matching ContextMemory — and the following user
+    // message is appended normally instead of being stranded in `deferred`.
+    expect(entries.map((e) => e.message.role)).toEqual(['user', 'assistant', 'user', 'user']);
+    expect(entries.map((e) => textOf(e.message))).toEqual(['u1', '', 'SUM', 'u2']);
+    // live folded view would be [u1, SUM, u2]
+    expect(foldedLength).toBe(3);
+  });
+
+  it('reproduces the legacy [summary, tail] fold length for records without keptUserMessageCount', () => {
+    // A pre-rework record (no keptUserMessageCount) kept history.slice(compactedCount)
+    // verbatim, and ContextMemory's legacy restore now reproduces [summary, ...tail].
+    // The reducer must track that same folded length — 1 + (preCompactionLength -
+    // compactedCount) — not the re-derived kept-user count, or MessageService's
+    // length comparison diverges from the live context for old sessions.
+    const { foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      ...assistantStep('s1', 'a1'),
+      appendMessage(userMessage('u2')),
+      ...assistantStep('s2', 'a2'),
+      compaction('SUM', 1),
+    ]);
+    // Pre-compaction live history = [u1, a1, u2, a2] (4); legacy restore keeps
+    // [SUM, ...slice(1)] = [SUM, a1, u2, a2] = 4. (Re-deriving kept users gives 3.)
+    expect(foldedLength).toBe(4);
+  });
+
+  it('ignores pre-clear prompts when re-deriving a legacy fold length', () => {
+    // Legacy record (no keptUserMessageCount) compacting after a /clear with no
+    // tail re-derives the kept-user count, but only from post-clear messages —
+    // the live context dropped u1/u2 at the clear. Counting them would overstate
+    // foldedLength and make MessageService skip the unflushed live tail.
+    const { foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      appendMessage(userMessage('u2')),
+      { type: 'context.clear' } as AgentRecord,
+      appendMessage(userMessage('u3')),
+      compaction('SUM', 1),
+    ]);
+    // Post-clear live history = [u3] (1); restore keeps [u3, SUM] = 2.
+    // (Re-deriving over the full transcript would wrongly give 4.)
+    expect(foldedLength).toBe(2);
   });
 
   it('undo removes through the last real user prompt and skips injections', () => {
@@ -424,8 +559,9 @@ describe('MessageService over a compacted wire log', () => {
       ...assistantStep('s1', 'a1', SESSION_CREATED_AT + 2_000),
       appendMessage(userMessage('u2'), SESSION_CREATED_AT + 3_000),
       ...assistantStep('s2', 'a2', SESSION_CREATED_AT + 4_000),
-      // folded = [u1, a1, u2, a2] → compact first 3.
-      compaction('SUM', 3, SESSION_CREATED_AT + 5_000),
+      // New-format record: the summary covered all 4 messages and 2 user
+      // prompts were kept verbatim, so the live fold is [u1, u2, SUM] below.
+      compaction('SUM', 4, SESSION_CREATED_AT + 5_000, 2),
     ];
     await mkdir(path.join(dir, 'agents', 'main'), { recursive: true });
     await writeFile(
@@ -433,19 +569,16 @@ describe('MessageService over a compacted wire log', () => {
       records.map((r) => JSON.stringify(r)).join('\n') + '\n',
       'utf8',
     );
-    // What getContext would return after the fold.
+    // What getContext would return after the fold: kept user messages + summary.
     liveHistory = [
+      userMessage('u1'),
+      userMessage('u2'),
       {
-        role: 'assistant',
+        role: 'user',
         content: [{ type: 'text', text: 'SUM' }],
         toolCalls: [],
         origin: { kind: 'compaction_summary' },
       } as ContextMessage,
-      {
-        role: 'assistant',
-        content: [{ type: 'text', text: 'a2' }],
-        toolCalls: [],
-      } as ContextMessage,
     ];
     const rpc: Partial<CoreRPC> = {
       listSessions: vi.fn().mockImplementation(async () => [summary()]),
@@ -473,8 +606,8 @@ describe('MessageService over a compacted wire log', () => {
     const asc = [...page.items].reverse();
     expect(
       asc.map((m) => (m.content[0] as { text?: string }).text ?? '[non-text]'),
-    ).toEqual(['u1', 'a1', 'u2', 'SUM', 'a2']);
-    expect(asc[3]!.metadata).toEqual({ origin: { kind: 'compaction_summary' } });
+    ).toEqual(['u1', 'a1', 'u2', 'a2', 'SUM']);
+    expect(asc[4]!.metadata).toEqual({ origin: { kind: 'compaction_summary' } });
   });
 
   it('uses wire record times for created_at, strictly increasing', async () => {
@@ -495,7 +628,7 @@ describe('MessageService over a compacted wire log', () => {
     const asc = [...page.items].reverse();
     expect(
       asc.map((m) => (m.content[0] as { text?: string }).text ?? '[non-text]'),
-    ).toEqual(['u1', 'a1', 'u2', 'SUM', 'a2', 'u3-live']);
+    ).toEqual(['u1', 'a1', 'u2', 'a2', 'SUM', 'u3-live']);
   });
 
   it('get() resolves ids against the same full transcript', async () => {
@@ -511,8 +644,9 @@ describe('MessageService over a compacted wire log', () => {
     const page = await impl.list(SESSION_ID, { page_size: 100 });
     const asc = [...page.items].reverse();
     expect(asc.map((m) => (m.content[0] as { text?: string }).text)).toEqual([
+      'u1',
+      'u2',
       'SUM',
-      'a2',
     ]);
   });
 
@@ -530,6 +664,6 @@ describe('MessageService over a compacted wire log', () => {
     const asc = [...page.items].reverse();
     expect(
       asc.map((m) => (m.content[0] as { text?: string }).text ?? '[non-text]'),
-    ).toEqual(['u1', 'a1', 'u2', 'SUM', 'a2', 'u3']);
+    ).toEqual(['u1', 'a1', 'u2', 'a2', 'SUM', 'u3']);
   });
 });
diff --git a/packages/agent-core/test/session/init.test.ts b/packages/agent-core/test/session/init.test.ts
index 2da472ac5..1a8096c16 100644
--- a/packages/agent-core/test/session/init.test.ts
+++ b/packages/agent-core/test/session/init.test.ts
@@ -166,6 +166,53 @@ describe('Session.init', () => {
     }
   });
 
+  it('refreshes AGENTS.md from a resumed native session system prompt', async () => {
+    const workDir = await makeTempDir();
+    const sessionDir = await makeTempDir();
+    await mkdir(join(workDir, '.git'));
+    await writeFile(join(workDir, 'AGENTS.md'), 'initial resume instructions', 'utf-8');
+
+    const firstSession = new Session({
+      id: 'test-resume-system-prompt-refresh',
+      kaos: testKaos.withCwd(workDir),
+      persistenceKaos: testKaos.withCwd(workDir),
+      homedir: sessionDir,
+      rpc: createSessionRpc([]),
+      skills: { explicitDirs: [join(workDir, 'missing-skills')] },
+      providerManager: testProviderManager(),
+    });
+    try {
+      const agent = await firstSession.createMain();
+      expect(agent.config.systemPrompt).toContain('initial resume instructions');
+    } finally {
+      await firstSession.closeForReload();
+    }
+
+    await writeFile(join(workDir, 'AGENTS.md'), 'updated resume instructions', 'utf-8');
+
+    const resumedSession = new Session({
+      id: 'test-resume-system-prompt-refresh',
+      kaos: testKaos.withCwd(workDir),
+      persistenceKaos: testKaos.withCwd(workDir),
+      homedir: sessionDir,
+      rpc: createSessionRpc([]),
+      skills: { explicitDirs: [join(workDir, 'missing-skills')] },
+      providerManager: testProviderManager(),
+    });
+    try {
+      await resumedSession.resume();
+      const resumedAgent = await resumedSession.ensureAgentResumed('main');
+      expect(resumedAgent.config.systemPrompt).toContain('initial resume instructions');
+
+      await resumedAgent.refreshSystemPrompt();
+
+      expect(resumedAgent.config.systemPrompt).toContain('updated resume instructions');
+      expect(resumedAgent.config.systemPrompt).not.toContain('initial resume instructions');
+    } finally {
+      await resumedSession.close();
+    }
+  });
+
   it('rebuilds builtin tools when rebinding the session tool kaos', async () => {
     const workDir = await makeTempDir();
     const sessionDir = await makeTempDir();
diff --git a/packages/agent-core/test/tools/cron/cron-list.test.ts b/packages/agent-core/test/tools/cron/cron-list.test.ts
index 3b70c5282..5ae398cc7 100644
--- a/packages/agent-core/test/tools/cron/cron-list.test.ts
+++ b/packages/agent-core/test/tools/cron/cron-list.test.ts
@@ -279,7 +279,7 @@ describe('CronListTool', () => {
   });
 
   it('one-shot nextFireAt is anchored at createdAt, not nowMs (pending today’s slot)', async () => {
-    // Scenario from the Codex review: a daily one-shot scheduled for
+    // Scenario from a review: a daily one-shot scheduled for
     // 12:00 that the agent could not yet deliver (busy turn, manual
     // tick mode) and is listed 5 minutes after the ideal slot. The
     // scheduler will still fire today's 12:00 slot from createdAt, so
diff --git a/packages/agent-core/test/tools/goal.test.ts b/packages/agent-core/test/tools/goal.test.ts
index d89f19642..736e11bee 100644
--- a/packages/agent-core/test/tools/goal.test.ts
+++ b/packages/agent-core/test/tools/goal.test.ts
@@ -252,7 +252,7 @@ describe('SetGoalBudgetTool', () => {
 describe('UpdateGoalTool', () => {
   it('guards against premature blocked status', () => {
     const description = new UpdateGoalTool(fakeAgent()).description.toLowerCase();
-    // codex spec.rs:80 wording (without the 3-turn machinery kimi lacks).
+    // Reference spec wording (without the 3-turn machinery kimi lacks).
     expect(description).toContain('hard, slow');
     // UpdateGoal also injects the completion/blocked outcome prompt, so it does
     // more than "only record the status".
diff --git a/packages/agent-core/test/utils/tokens.test.ts b/packages/agent-core/test/utils/tokens.test.ts
new file mode 100644
index 000000000..4b22d78e8
--- /dev/null
+++ b/packages/agent-core/test/utils/tokens.test.ts
@@ -0,0 +1,61 @@
+import type { ContentPart } from '@moonshot-ai/kosong';
+import { describe, expect, it } from 'vitest';
+
+import {
+  estimateTokensForContentPart,
+  estimateTokensForMessage,
+  MEDIA_TOKEN_ESTIMATE,
+} from '../../src/utils/tokens';
+
+// Regression coverage for CMP-03: media content parts (image/audio/video) must
+// NOT estimate to 0 tokens. When they did, compaction triggers, the
+// overflow-shrink budget, the kept-user 20k budget, and the reported
+// `tokensAfter` all went blind to the single largest context contributor (a
+// base64 image data URL), so a vision-heavy session could overflow the provider
+// while the estimator reported a near-empty context.
+describe('estimateTokensForContentPart — media parts', () => {
+  const imagePart: ContentPart = {
+    type: 'image_url',
+    imageUrl: { url: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB' },
+  };
+  const audioPart: ContentPart = {
+    type: 'audio_url',
+    audioUrl: { url: 'data:audio/mp3;base64,AAAA' },
+  };
+  const videoPart: ContentPart = {
+    type: 'video_url',
+    videoUrl: { url: 'data:video/mp4;base64,AAAA' },
+  };
+
+  it('estimates an image part as a substantial, non-zero token cost', () => {
+    expect(estimateTokensForContentPart(imagePart)).toBe(MEDIA_TOKEN_ESTIMATE);
+    expect(MEDIA_TOKEN_ESTIMATE).toBeGreaterThan(100);
+  });
+
+  it('estimates audio and video parts as non-zero', () => {
+    expect(estimateTokensForContentPart(audioPart)).toBeGreaterThan(0);
+    expect(estimateTokensForContentPart(videoPart)).toBeGreaterThan(0);
+  });
+
+  it('uses a bounded fixed estimate, not the base64 payload length', () => {
+    // A ~4 MB base64 data URL must not be counted as text (which would yield
+    // ~1M "tokens"); the estimate must stay a small bounded value.
+    const huge = 'A'.repeat(4_000_000);
+    const bigImage: ContentPart = {
+      type: 'image_url',
+      imageUrl: { url: `data:image/png;base64,${huge}` },
+    };
+    const estimate = estimateTokensForContentPart(bigImage);
+    expect(estimate).toBeGreaterThan(0);
+    expect(estimate).toBeLessThan(50_000);
+  });
+
+  it('includes media when estimating a whole message', () => {
+    const message = {
+      role: 'user',
+      content: [{ type: 'text', text: 'see screenshot' }, imagePart] satisfies ContentPart[],
+    };
+    // The image must dominate the ~4-token text, not be free.
+    expect(estimateTokensForMessage(message)).toBeGreaterThan(100);
+  });
+});
diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts
index 75d4d714e..72f243941 100644
--- a/packages/kosong/src/providers/anthropic.ts
+++ b/packages/kosong/src/providers/anthropic.ts
@@ -37,6 +37,7 @@ import type {
   ToolUseBlockParam,
 } from '@anthropic-ai/sdk/resources/messages/messages.js';
 
+import { mergeConsecutiveUserMessages } from './merge-user-messages';
 import { mergeRequestHeaders, resolveAuthBackedClient } from './request-auth';
 import {
   normalizeToolCallIdsForProvider,
@@ -410,15 +411,10 @@ function injectCacheControlOnLastBlock(messages: MessageParam[]): void {
 }
 
 /**
- * Check whether a MessageParam is a user message whose content consists
- * entirely of `tool_result` blocks.
- *
- * Used to detect adjacent tool-result-only messages that must be merged
- * before hitting the Anthropic wire. Per the Messages API parallel-tool-use
- * spec, all `tool_result` blocks answering parallel `tool_use` calls must
- * live in a single user message — splitting them across consecutive user
- * messages fails on strict Anthropic-compatible backends (HTTP 400) and
- * silently degrades parallel tool use on api.anthropic.com.
+ * Whether a user MessageParam consists solely of `tool_result` blocks. Used to
+ * keep tool results bundled with each other (parallel-tool-use spec) while
+ * not merging a tool-result user message into an adjacent plain-text user
+ * message — the two carry different semantics and must stay separate.
  */
 function isToolResultOnly(message: MessageParam): boolean {
   if (message.role !== 'user') return false;
@@ -1017,25 +1013,33 @@ export class AnthropicChatProvider implements ChatProvider {
         ]
       : undefined;
 
-    // Convert messages, merging consecutive tool-result-only user messages
-    // into a single user message (Anthropic parallel-tool-use spec).
-    const messages: MessageParam[] = [];
-    const normalizedHistory = normalizeToolCallIdsForProvider(
-      history,
-      ANTHROPIC_TOOL_CALL_ID_POLICY,
+    // Convert messages, then merge consecutive user messages into one. Strict
+    // Anthropic-compatible backends reject consecutive user messages with HTTP
+    // 400 ("roles must alternate"), and api.anthropic.com concatenates them
+    // anyway — so merging is safe for native Anthropic and required for strict
+    // backends. Consecutive plain-text user messages arise naturally after
+    // compaction (kept user prompts + user-role summary + injected reminders)
+    // and from back-to-back system messages converted to user role above; a
+    // tool-result user turn followed by a text turn arises from steering after
+    // a tool result. The shared helper applies the asymmetric merge rule (see
+    // mergeConsecutiveUserMessages) so this provider and Gemini/Vertex stay in
+    // step.
+    const messages = mergeConsecutiveUserMessages(
+      normalizeToolCallIdsForProvider(history, ANTHROPIC_TOOL_CALL_ID_POLICY).map((msg) =>
+        convertMessage(msg, this._model),
+      ),
+      {
+        isUser: (message) => message.role === 'user',
+        isToolResultOnly,
+        merge: (last, next) => ({
+          ...last,
+          content: [
+            ...(last.content as ContentBlockParam[]),
+            ...(next.content as ContentBlockParam[]),
+          ],
+        }),
+      },
     );
-    for (const msg of normalizedHistory) {
-      const converted = convertMessage(msg, this._model);
-      const last = messages.at(-1);
-      if (last !== undefined && isToolResultOnly(last) && isToolResultOnly(converted)) {
-        last.content = [
-          ...(last.content as ContentBlockParam[]),
-          ...(converted.content as ContentBlockParam[]),
-        ];
-      } else {
-        messages.push(converted);
-      }
-    }
 
     // Inject cache_control on last content block of last message (after merge,
     // so it lands on the final tool_result block in the merged user message).
diff --git a/packages/kosong/src/providers/google-genai.ts b/packages/kosong/src/providers/google-genai.ts
index 522c48a52..39fcc0db4 100644
--- a/packages/kosong/src/providers/google-genai.ts
+++ b/packages/kosong/src/providers/google-genai.ts
@@ -16,6 +16,7 @@ import type {
 import type { Tool } from '#/tool';
 import type { TokenUsage } from '#/usage';
 import { ApiError as GoogleApiError, GoogleGenAI as GenAIClient } from '@google/genai';
+import { mergeConsecutiveUserMessages } from './merge-user-messages';
 
 import { requireProviderApiKey, resolveAuthBackedClient } from './request-auth';
 
@@ -447,7 +448,16 @@ export function messagesToGoogleGenAIContents(messages: Message[]): GoogleConten
     i += 1;
   }
 
-  return contents;
+  // Gemini/Vertex require strictly alternating user/model turns. Consecutive
+  // user Contents arise after compaction (`[prompts, summary, reminders]`) and
+  // when a user turn follows a tool result; collapse them into one user turn.
+  return mergeConsecutiveUserMessages(contents, {
+    isUser: (content) => content.role === 'user',
+    isToolResultOnly: (content) =>
+      content.parts.length > 0 &&
+      content.parts.every((part) => part.function_response !== undefined),
+    merge: (last, next) => ({ ...last, parts: [...last.parts, ...next.parts] }),
+  });
 }
 export class GoogleGenAIStreamedMessage implements StreamedMessage {
   private _id: string | null = null;
diff --git a/packages/kosong/src/providers/merge-user-messages.ts b/packages/kosong/src/providers/merge-user-messages.ts
new file mode 100644
index 000000000..95db1182e
--- /dev/null
+++ b/packages/kosong/src/providers/merge-user-messages.ts
@@ -0,0 +1,64 @@
+/**
+ * Collapses consecutive same-role "user" turns in a provider's already-converted
+ * wire message list into one turn.
+ *
+ * Strict providers (Anthropic, Gemini/Vertex) reject consecutive user turns with
+ * HTTP 400 ("roles must alternate"). Consecutive user turns arise naturally:
+ *   - after compaction, whose shape is `[kept user prompts, user-role summary,
+ *     injected reminders]` — all role 'user'; and
+ *   - when a user turn (steer/injection) follows a tool result.
+ *
+ * Both only become visible once tool messages have been converted to user-role
+ * turns, which is why this runs at each provider's conversion boundary rather
+ * than in the provider-agnostic projector: the projector deliberately preserves
+ * message structure for lenient providers (OpenAI/Kimi) that accept — and read
+ * more clearly — distinct turns, while strict providers normalize for their own
+ * protocol here. Keeping the algorithm in one place stops a provider from
+ * silently omitting it (the original cause of the Gemini regression).
+ *
+ * The merge is asymmetric, keyed on whether the running turn is tool-result-only:
+ *   - a tool-result-only running turn absorbs whatever follows — another
+ *     tool-result-only turn (the parallel-tool-use spec requires all tool
+ *     results answering parallel calls to share one user turn) or a following
+ *     text turn, yielding a valid `[tool_result, …, text]` turn;
+ *   - a text running turn absorbs only a following text turn, never a leading
+ *     tool-result turn (a tool-result must answer the immediately preceding
+ *     assistant tool_use, which a text turn is not — though in well-formed
+ *     transcripts this ordering never arises).
+ *
+ * @typeParam T - the provider's wire message type (e.g. Anthropic `MessageParam`
+ *   or Google `Content`).
+ * @param messages - the converted wire messages, in order.
+ * @param ops - provider-specific predicates and a content merger.
+ * @param ops.isUser - whether a wire message is a user-role turn.
+ * @param ops.isToolResultOnly - whether a user-role turn carries only tool
+ *   results (no plain text/media).
+ * @param ops.merge - produces a new wire message combining `last` and `next`
+ *   (must not mutate its arguments).
+ * @returns a new array with consecutive user turns merged.
+ */
+export function mergeConsecutiveUserMessages<T>(
+  messages: readonly T[],
+  ops: {
+    readonly isUser: (message: T) => boolean;
+    readonly isToolResultOnly: (message: T) => boolean;
+    readonly merge: (last: T, next: T) => T;
+  },
+): T[] {
+  const out: T[] = [];
+  for (const message of messages) {
+    const lastIndex = out.length - 1;
+    const last = lastIndex >= 0 ? out[lastIndex] : undefined;
+    if (
+      last !== undefined &&
+      ops.isUser(last) &&
+      ops.isUser(message) &&
+      (ops.isToolResultOnly(last) || !ops.isToolResultOnly(message))
+    ) {
+      out[lastIndex] = ops.merge(last, message);
+    } else {
+      out.push(message);
+    }
+  }
+  return out;
+}
diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts
index 6b6f86ba6..470379b83 100644
--- a/packages/kosong/test/anthropic.test.ts
+++ b/packages/kosong/test/anthropic.test.ts
@@ -969,11 +969,12 @@ describe('AnthropicChatProvider', () => {
       expect(trailing.every((b) => b.type === 'tool_result')).toBe(true);
     });
 
-    // Edge case: parallel tool results followed by a plain user text turn —
-    // only the tool_result-only user messages merge; the text message stays
-    // in its own message (proving the predicate is content-shape-aware, not
-    // just role-based).
-    it('text turn after parallel tool_results stays separate', async () => {
+    // Edge case: parallel tool results followed by a plain user text turn.
+    // The tool_result-only user messages merge with each other AND absorb the
+    // following text turn, producing a single `[tool_result, tool_result, text]`
+    // user message. Strict Anthropic-compatible backends reject consecutive
+    // user messages, so the follow-up text must not be left in its own turn.
+    it('merges a follow-up text turn into the preceding tool_results', async () => {
       const provider = createProvider();
       const tcAdd: ToolCall = {
         type: 'function',
@@ -1014,14 +1015,74 @@ describe('AnthropicChatProvider', () => {
       };
       const msgs = body['messages'] as MsgParam[];
 
-      // 4 messages: user prompt, assistant tool_use, merged tool_result user, final text user.
-      expect(msgs).toHaveLength(4);
+      // 3 messages: user prompt, assistant tool_use, and a single merged user
+      // turn holding both tool_results followed by the follow-up text.
+      expect(msgs).toHaveLength(3);
+      expect(msgs[2]!.role).toBe('user');
+      expect(msgs[2]!.content).toHaveLength(3);
+      expect(msgs[2]!.content.slice(0, 2).every((b) => b.type === 'tool_result')).toBe(true);
+      expect(msgs[2]!.content[2]!.type).toBe('text');
+      expect(msgs[2]!.content[2]!.text).toBe('Now summarize');
+    });
+
+    // Single tool call answered, then a follow-up text turn (e.g. an injected
+    // reminder/notification after the tool result). The tool_result and the
+    // text must collapse into one user message so no two user turns are adjacent.
+    it('merges a single tool_result with a following injected text turn', async () => {
+      const provider = createProvider();
+      const tcRead: ToolCall = {
+        type: 'function',
+        id: 'call_read',
+        name: 'read',
+        arguments: '{"path": "a.ts"}',
+      };
+      const history: Message[] = [
+        { role: 'user', content: [{ type: 'text', text: 'Read it' }], toolCalls: [] },
+        { role: 'assistant', content: [], toolCalls: [tcRead] },
+        {
+          role: 'tool',
+          content: [{ type: 'text', text: 'file body' }],
+          toolCallId: 'call_read',
+          toolCalls: [],
+        },
+        { role: 'user', content: [{ type: 'text', text: 'system reminder' }], toolCalls: [] },
+      ];
+      const body = await captureRequestBody(provider, '', [], history);
+
+      const msgs = body['messages'] as Array<{
+        role: string;
+        content: Array<{ type: string; text?: string }>;
+      }>;
+
+      // No two adjacent user messages: tool_result + reminder share one turn.
+      const roles = msgs.map((m) => m.role);
+      expect(roles).toEqual(['user', 'assistant', 'user']);
       expect(msgs[2]!.content).toHaveLength(2);
-      expect(msgs[2]!.content.every((b) => b.type === 'tool_result')).toBe(true);
-      expect(msgs[3]!.role).toBe('user');
-      expect(msgs[3]!.content).toHaveLength(1);
-      expect(msgs[3]!.content[0]!.type).toBe('text');
-      expect(msgs[3]!.content[0]!.text).toBe('Now summarize');
+      expect(msgs[2]!.content[0]!.type).toBe('tool_result');
+      expect(msgs[2]!.content[1]!.type).toBe('text');
+      expect(msgs[2]!.content[1]!.text).toBe('system reminder');
+    });
+
+    it('merges consecutive plain-text user messages into one', async () => {
+      const provider = createProvider();
+      const history: Message[] = [
+        { role: 'user', content: [{ type: 'text', text: 'First' }], toolCalls: [] },
+        { role: 'user', content: [{ type: 'text', text: 'Second' }], toolCalls: [] },
+        { role: 'user', content: [{ type: 'text', text: 'Third' }], toolCalls: [] },
+      ];
+      const body = await captureRequestBody(provider, '', [], history);
+
+      const msgs = body['messages'] as Array<{
+        role: string;
+        content: Array<{ type: string; text?: string }>;
+      }>;
+
+      // Strict Anthropic-compatible backends reject consecutive user messages,
+      // so back-to-back plain-text user turns (e.g. the post-compaction shape
+      // of kept prompts + user-role summary + reminders) must be collapsed.
+      expect(msgs).toHaveLength(1);
+      expect(msgs[0]!.role).toBe('user');
+      expect(msgs[0]!.content.map((block) => block.text)).toEqual(['First', 'Second', 'Third']);
     });
 
     it('assistant with thinking (has encrypted -> ThinkingBlockParam)', async () => {
diff --git a/packages/kosong/test/google-genai.test.ts b/packages/kosong/test/google-genai.test.ts
index 61c51f42c..d8459c226 100644
--- a/packages/kosong/test/google-genai.test.ts
+++ b/packages/kosong/test/google-genai.test.ts
@@ -153,21 +153,20 @@ describe('GoogleGenAIChatProvider', () => {
 
       const contents = messagesToGoogleGenAIContents(messages);
 
-      expect(contents).toHaveLength(2);
-      const first = contents[0] as unknown as {
-        role: string;
-        parts: Array<{ text?: string }>;
-      };
-      expect(first.role).toBe('user');
-      expect(first.parts).toHaveLength(1);
-      expect(first.parts[0]!.text).toBe('<system>You are helpful.</system>');
-      // Original user turn is untouched.
-      const second = contents[1] as unknown as {
+      // The system turn is wrapped as a user turn, then merged with the
+      // following real user turn — Gemini/Vertex would reject the two
+      // consecutive user Contents the wrap would otherwise produce. The
+      // <system>…</system> tags keep the boundary legible within the merged
+      // turn.
+      expect(contents).toHaveLength(1);
+      const merged = contents[0] as unknown as {
         role: string;
         parts: Array<{ text?: string }>;
       };
-      expect(second.role).toBe('user');
-      expect(second.parts[0]!.text).toBe('Hi');
+      expect(merged.role).toBe('user');
+      expect(merged.parts).toHaveLength(2);
+      expect(merged.parts[0]!.text).toBe('<system>You are helpful.</system>');
+      expect(merged.parts[1]!.text).toBe('Hi');
       // No emitted content carries the unsupported "system" role.
       for (const c of contents) {
         expect((c as unknown as { role: string }).role).not.toBe('system');
@@ -212,6 +211,52 @@ describe('GoogleGenAIChatProvider', () => {
       ]);
     });
 
+    it('merges consecutive user messages into one Content (post-compaction shape)', () => {
+      // After compaction the history is `[kept user prompts, user-role summary,
+      // injected reminders]` — all role 'user'. Gemini/Vertex require strictly
+      // alternating user/model turns and reject consecutive user Contents, so
+      // the converter must collapse them into a single user Content.
+      const contents = messagesToGoogleGenAIContents([
+        { role: 'user', content: [{ type: 'text', text: 'Earlier prompt' }], toolCalls: [] },
+        { role: 'user', content: [{ type: 'text', text: 'Conversation summary' }], toolCalls: [] },
+        { role: 'user', content: [{ type: 'text', text: 'A reminder' }], toolCalls: [] },
+      ]);
+
+      expect(contents).toEqual([
+        {
+          role: 'user',
+          parts: [
+            { text: 'Earlier prompt' },
+            { text: 'Conversation summary' },
+            { text: 'A reminder' },
+          ],
+        },
+      ]);
+    });
+
+    it('merges a trailing user turn into the preceding tool-result Content', () => {
+      // A user turn arriving right after a tool result (e.g. steering) would
+      // otherwise produce two consecutive user Contents (the function-response
+      // turn and the steer text), which Gemini/Vertex rejects.
+      const toolCall: ToolCall = {
+        type: 'function',
+        id: 'call_1',
+        name: 'add',
+        arguments: '{"a": 2, "b": 3}',
+      };
+      const contents = messagesToGoogleGenAIContents([
+        { role: 'user', content: [{ type: 'text', text: 'Add 2 and 3' }], toolCalls: [] },
+        { role: 'assistant', content: [], toolCalls: [toolCall] },
+        { role: 'tool', content: [{ type: 'text', text: '5' }], toolCallId: 'call_1', toolCalls: [] },
+        { role: 'user', content: [{ type: 'text', text: 'Now multiply' }], toolCalls: [] },
+      ]);
+
+      expect(contents.map((c) => c.role)).toEqual(['user', 'model', 'user']);
+      const last = contents.at(-1)!;
+      expect(last.parts.some((p) => p.function_response !== undefined)).toBe(true);
+      expect(last.parts.some((p) => p.text === 'Now multiply')).toBe(true);
+    });
+
     it('multi-turn conversation with system prompt sets system_instruction', async () => {
       const provider = createProvider();
       const history: Message[] = [
diff --git a/packages/kosong/test/strict-role-alternation.test.ts b/packages/kosong/test/strict-role-alternation.test.ts
new file mode 100644
index 000000000..d75d8227a
--- /dev/null
+++ b/packages/kosong/test/strict-role-alternation.test.ts
@@ -0,0 +1,134 @@
+import type { Message } from '#/message';
+import { AnthropicChatProvider } from '#/providers/anthropic';
+import { GoogleGenAIChatProvider } from '#/providers/google-genai';
+import { describe, expect, it, vi } from 'vitest';
+
+/**
+ * Conformance suite: strict providers (those whose APIs require alternating
+ * user/model turns) must never emit two consecutive same-role turns, no matter
+ * what valid history they are handed. The two shapes below — the post-compaction
+ * history (`[kept prompts, user-role summary, injected reminders]`) and a user
+ * turn steered in right after a tool result — are the realistic sources of
+ * consecutive user turns. A new strict provider added without the consecutive-
+ * user merge will fail here rather than 400 in production.
+ */
+
+const POST_COMPACTION_SHAPE: Message[] = [
+  { role: 'user', content: [{ type: 'text', text: 'An earlier user prompt' }], toolCalls: [] },
+  {
+    role: 'user',
+    content: [{ type: 'text', text: '<summary>Conversation so far…</summary>' }],
+    toolCalls: [],
+  },
+  {
+    role: 'user',
+    content: [{ type: 'text', text: '<system-reminder>Stay on task.</system-reminder>' }],
+    toolCalls: [],
+  },
+];
+
+const STEER_AFTER_TOOL_RESULT: Message[] = [
+  { role: 'user', content: [{ type: 'text', text: 'Add 2 and 3' }], toolCalls: [] },
+  {
+    role: 'assistant',
+    content: [],
+    toolCalls: [{ type: 'function', id: 'call_1', name: 'add', arguments: '{"a": 2, "b": 3}' }],
+  },
+  { role: 'tool', content: [{ type: 'text', text: '5' }], toolCallId: 'call_1', toolCalls: [] },
+  { role: 'user', content: [{ type: 'text', text: 'Now multiply them instead' }], toolCalls: [] },
+];
+
+function assertNoConsecutiveSameRole(roles: readonly string[]): void {
+  for (let i = 1; i < roles.length; i++) {
+    expect(
+      roles[i],
+      `consecutive '${roles[i]}' turns at index ${i} in ${JSON.stringify(roles)}`,
+    ).not.toBe(roles[i - 1]);
+  }
+}
+
+/** Drives a provider with `history` and returns the wire turn roles, in order. */
+type WireRoles = (history: Message[]) => Promise<string[]>;
+
+async function anthropicWireRoles(history: Message[]): Promise<string[]> {
+  const provider = new AnthropicChatProvider({
+    model: 'k25',
+    apiKey: 'test-key',
+    defaultMaxTokens: 1024,
+    stream: false,
+  });
+  let captured: Record<string, unknown> | undefined;
+  (provider as unknown as { _client: { messages: { create: unknown } } })._client.messages.create =
+    vi.fn().mockImplementation((params: Record<string, unknown>) => {
+      captured = params;
+      return Promise.resolve({
+        id: 'msg_test',
+        type: 'message',
+        role: 'assistant',
+        model: 'k25',
+        content: [{ type: 'text', text: 'ok' }],
+        stop_reason: 'end_turn',
+        usage: { input_tokens: 1, output_tokens: 1 },
+      });
+    });
+
+  const stream = await provider.generate('', [], history);
+  for await (const part of stream) void part;
+
+  if (captured === undefined) throw new Error('Anthropic provider did not call messages.create');
+  return (captured['messages'] as Array<{ role: string }>).map((m) => m.role);
+}
+
+async function googleWireRoles(history: Message[]): Promise<string[]> {
+  const provider = new GoogleGenAIChatProvider({
+    model: 'gemini-2.5-flash',
+    apiKey: 'test-key',
+    stream: false,
+  });
+  let captured: Record<string, unknown> | undefined;
+  const response = {
+    candidates: [{ content: { parts: [{ text: 'ok' }], role: 'model' }, finishReason: 'STOP' }],
+    usageMetadata: { promptTokenCount: 1, candidatesTokenCount: 1, totalTokenCount: 2 },
+    modelVersion: 'gemini-2.5-flash',
+  };
+  async function* stream() {
+    yield response;
+  }
+  const models = (provider as unknown as { _client: { models: Record<string, unknown> } })._client
+    .models;
+  models['generateContent'] = vi.fn().mockImplementation((params: Record<string, unknown>) => {
+    captured = params;
+    return Promise.resolve(response);
+  });
+  models['generateContentStream'] = vi
+    .fn()
+    .mockImplementation((params: Record<string, unknown>) => {
+      captured = params;
+      return Promise.resolve(stream());
+    });
+
+  const generated = await provider.generate('', [], history);
+  for await (const part of generated) void part;
+
+  if (captured === undefined) throw new Error('Google provider did not call a model endpoint');
+  return (captured['contents'] as Array<{ role: string }>).map((c) => c.role);
+}
+
+const STRICT_PROVIDERS: ReadonlyArray<{ name: string; wireRoles: WireRoles }> = [
+  { name: 'anthropic', wireRoles: anthropicWireRoles },
+  { name: 'google-genai', wireRoles: googleWireRoles },
+];
+
+describe('strict provider role alternation', () => {
+  for (const { name, wireRoles } of STRICT_PROVIDERS) {
+    describe(name, () => {
+      it('collapses the post-compaction shape into alternating turns', async () => {
+        assertNoConsecutiveSameRole(await wireRoles(POST_COMPACTION_SHAPE));
+      });
+
+      it('stays alternating when a user turn is steered in after a tool result', async () => {
+        assertNoConsecutiveSameRole(await wireRoles(STEER_AFTER_TOOL_RESULT));
+      });
+    });
+  }
+});
diff --git a/packages/protocol/src/events.ts b/packages/protocol/src/events.ts
index f9f0fac9e..2aaa2c857 100644
--- a/packages/protocol/src/events.ts
+++ b/packages/protocol/src/events.ts
@@ -300,6 +300,22 @@ export interface CompactionResult {
   readonly compactedCount: number;
   readonly tokensBefore: number;
   readonly tokensAfter: number;
+  /**
+   * Number of real user messages kept verbatim ahead of the summary in the
+   * post-compaction live context. Recorded so the wire-transcript reducer can
+   * reproduce the live folded length without re-deriving it from the full
+   * transcript (which still holds the untruncated originals of messages the
+   * live context may have truncated, so the two would otherwise diverge).
+   * Optional for backward compatibility with older wire records.
+   */
+  readonly keptUserMessageCount?: number;
+  /**
+   * Oldest messages trimmed from the summarizer input when the compaction
+   * request overflowed the model window; not covered by the produced summary.
+   * Mirrors agent-core's `CompactionResult.droppedCount`; optional for backward
+   * compatibility.
+   */
+  readonly droppedCount?: number;
 }
 
 export interface ToolUpdate {
@@ -1008,6 +1024,8 @@ export const compactionResultSchema = z.object({
   compactedCount: z.number(),
   tokensBefore: z.number(),
   tokensAfter: z.number(),
+  keptUserMessageCount: z.number().optional(),
+  droppedCount: z.number().optional(),
 }) satisfies z.ZodType<CompactionResult>;
 
 export const toolUpdateSchema = z.object({
diff --git a/packages/server/test/sessions.e2e.test.ts b/packages/server/test/sessions.e2e.test.ts
index 12ae9b761..46d662d79 100644
--- a/packages/server/test/sessions.e2e.test.ts
+++ b/packages/server/test/sessions.e2e.test.ts
@@ -580,7 +580,7 @@ describe('POST /api/v1/sessions/{session_id}:compact — begin compaction', () =
     const env = envelopeOf<unknown>(res.json());
     expect(env.code).toBe(ErrorCode.COMPACTION_UNABLE);
     expect(env.data).toBeNull();
-    expect(env.msg).toMatch(/No prefix/);
+    expect(env.msg).toMatch(/No messages to compact/);
   });
 });
 
diff --git a/packages/server/test/snapshotService.unit.test.ts b/packages/server/test/snapshotService.unit.test.ts
index 1c328e990..be50822eb 100644
--- a/packages/server/test/snapshotService.unit.test.ts
+++ b/packages/server/test/snapshotService.unit.test.ts
@@ -235,11 +235,11 @@ describe('SnapshotService.read', () => {
     ]);
 
     const snap = await f.service.read(sid);
-    // Reduce keeps the prefix and inserts the summary at the fold; final
-    // entry list is older-1, older-2, <summary>, after-compaction.
+    // Reduce keeps the prefix and appends a user-role summary; final entry
+    // list is older-1, older-2, <summary>, after-compaction.
     expect(snap.messages.items).toHaveLength(4);
     const summaryMsg = snap.messages.items[2]!;
-    expect(summaryMsg.role).toBe('assistant');
+    expect(summaryMsg.role).toBe('user');
     expect((summaryMsg.content[0] as { text: string }).text).toBe('compacted prefix');
     expect(snap.messages.items[3]!.role).toBe('user');
   });