Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/image-compression.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@moonshot-ai/kimi-code-sdk": minor
"@moonshot-ai/kimi-code": minor
---

Automatically compress oversized images before they reach the model. Whatever the source — pasted into the CLI, uploaded from the web/desktop client, sent over ACP, read via `ReadMediaFile`, or returned by an MCP tool — images are downsampled (longest edge ≤ 2000px) and re-encoded to fit a per-image byte budget, cutting vision-token cost and avoiding provider image-size errors. Screenshots stay lossless PNG and only degrade to JPEG when the byte budget cannot otherwise be met. Compression runs as an input-stage step at each ingestion point (while the content part is built), and guards against decompression bombs by skipping absurdly large pixel/byte payloads before decoding. Best-effort: if it fails for any reason the original image is sent unchanged.
1 change: 1 addition & 0 deletions apps/kimi-code/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
"chalk": "^5.4.1",
"cli-highlight": "^2.1.11",
"commander": "^13.1.0",
"jimp": "^1.6.1",
"pathe": "^2.0.3",
"postject": "1.0.0-alpha.6",
"semver": "^7.7.4",
Expand Down
15 changes: 14 additions & 1 deletion apps/kimi-code/src/tui/controllers/editor-keyboard.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { Session } from '@moonshot-ai/kimi-code-sdk';
import { compressImageForModel } from '@moonshot-ai/kimi-code-sdk';

import { ClipboardMediaError, readClipboardMedia } from '#/utils/clipboard/clipboard-image';
import { parseImageMeta } from '#/utils/image/image-mime';
Expand Down Expand Up @@ -360,7 +361,19 @@ export class EditorKeyboardController {

const meta = parseImageMeta(media.bytes);
if (meta === null) return false;
const attachment = this.imageStore.addImage(media.bytes, meta.mime, meta.width, meta.height);
// Compress at ingestion — a pure data step while building the attachment, so
// the stored bytes, the inline thumbnail, the `[image #N (W×H)]` placeholder,
// and the submitted image all agree, and the agent core only ever sees an
// already-compressed image. Best effort: originals pass through on failure.
const compressed = await compressImageForModel(media.bytes, meta.mime);
const attachment = compressed.changed
? this.imageStore.addImage(
compressed.data,
compressed.mimeType,
compressed.width,
compressed.height,
)
: this.imageStore.addImage(media.bytes, meta.mime, meta.width, meta.height);
this.host.state.editor.insertTextAtCursor?.(`${attachment.placeholder} `);
this.host.state.ui.requestRender();
this.host.track('shortcut_paste', { kind: 'image' });
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/**
* Clipboard image paste → attachment store, with ingestion-time compression.
*
* Tests pin:
* - an oversized pasted image is downsampled while building the attachment,
* so the stored bytes, the `[image #N (W×H)]` placeholder, and the eventual
* submitted image all agree on the compressed size
* - a within-budget paste is stored byte-for-byte (fast path)
*/

import { Jimp } from 'jimp';
import { beforeEach, describe, expect, it, vi } from 'vitest';

import {
EditorKeyboardController,
type EditorKeyboardHost,
} from '#/tui/controllers/editor-keyboard';
import { ImageAttachmentStore } from '#/tui/utils/image-attachment-store';
import { parseImageMeta } from '#/utils/image/image-mime';

// vitest hoists vi.mock/vi.hoisted above the imports above, so the mock still
// applies to the editor-keyboard module that pulls in readClipboardMedia.
const { readClipboardMedia } = vi.hoisted(() => ({ readClipboardMedia: vi.fn() }));

vi.mock('#/utils/clipboard/clipboard-image', async (importActual) => {
const actual = await importActual<typeof import('#/utils/clipboard/clipboard-image')>();
return { ...actual, readClipboardMedia };
});

interface PasteHarness {
readonly store: ImageAttachmentStore;
pasteImage(): Promise<void>;
}

function createPasteHarness(): PasteHarness {
const editor: Record<string, ((...args: never[]) => unknown) | undefined> = {};
const store = new ImageAttachmentStore();
const host = {
state: {
editor,
activeDialog: null,
appState: { streamingPhase: 'idle', isCompacting: false },
footer: { setTransientHint: vi.fn() },
ui: { requestRender: vi.fn() },
},
session: undefined,
btwPanelController: { closeOrCancel: vi.fn(() => false) },
track: vi.fn(),
showError: vi.fn(),
openUndoSelector: vi.fn(),
cancelRunningShellCommand: vi.fn(),
} as unknown as EditorKeyboardHost;

const controller = new EditorKeyboardController(host, store);
controller.install();

return {
store,
async pasteImage() {
const handler = editor['onPasteImage'];
if (handler === undefined) throw new Error('onPasteImage handler not installed');
await (handler as () => Promise<boolean>)();
},
};
}

async function solidPng(width: number, height: number): Promise<Uint8Array> {
return new Uint8Array(
await new Jimp({ width, height, color: 0x3366ccff }).getBuffer('image/png'),
);
}

describe('clipboard image paste compression', () => {
beforeEach(() => {
readClipboardMedia.mockReset();
});

it('downsamples an oversized pasted image before storing it', async () => {
const big = await solidPng(2600, 2600);
readClipboardMedia.mockResolvedValue({ kind: 'image', bytes: big, mimeType: 'image/png' });

const { store, pasteImage } = createPasteHarness();
await pasteImage();

expect(store.size()).toBe(1);
const att = store.get(1);
expect(att?.kind).toBe('image');
if (att?.kind !== 'image') throw new Error('expected image attachment');

// Stored metadata reflects the compressed size.
expect(Math.max(att.width, att.height)).toBeLessThanOrEqual(2000);
expect(att.placeholder).toContain('2000×2000');

// The stored bytes decode to the compressed dimensions — the thumbnail and
// the submitted image both read from these bytes, so they cannot diverge.
const dims = parseImageMeta(att.bytes);
expect(dims).not.toBeNull();
expect(Math.max(dims!.width, dims!.height)).toBeLessThanOrEqual(2000);
});

it('stores a within-budget paste byte-for-byte', async () => {
const small = await solidPng(80, 80);
readClipboardMedia.mockResolvedValue({ kind: 'image', bytes: small, mimeType: 'image/png' });

const { store, pasteImage } = createPasteHarness();
await pasteImage();

const att = store.get(1);
if (att?.kind !== 'image') throw new Error('expected image attachment');
expect(att.width).toBe(80);
expect(att.height).toBe(80);
expect(att.bytes).toBe(small); // identity: no re-encode on the fast path
});
});
2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@
inherit (finalAttrs) pname version src pnpmWorkspaces;
inherit pnpm;
fetcherVersion = 3;
hash = "sha256-oratz8x67ZEJGTiNy+s4XaKe0TtpRKh63aIqkV79vvM=";
hash = "sha256-mqyi0VuPZwESZcdU5E8F3XUG99OH636knBfb8y6TQpw=";
};

nativeBuildInputs = [
Expand Down
3 changes: 3 additions & 0 deletions packages/acp-adapter/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,8 @@
"@moonshot-ai/agent-core": "workspace:^",
"@moonshot-ai/kaos": "workspace:^",
"@moonshot-ai/kimi-code-sdk": "workspace:^"
},
"devDependencies": {
"jimp": "^1.6.1"
}
}
36 changes: 36 additions & 0 deletions packages/acp-adapter/src/convert.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type { ContentBlock, ToolCallContent } from '@agentclientprotocol/sdk';
import {
log,
compressBase64ForModel,
type PromptPart,
type ToolInputDisplay,
type ToolResultEvent,
Expand Down Expand Up @@ -71,6 +72,41 @@ export function acpBlocksToPromptParts(
return out;
}

/**
* Shrink oversized inline images in a prompt-part list — the ACP ingestion
* point's input-stage compression, mirroring the CLI's paste-time and the
* server's upload-time step. Best effort: a part that cannot be compressed is
* passed through unchanged.
*/
export async function compressPromptImageParts(
parts: readonly PromptPart[],
): Promise<PromptPart[]> {
const out: PromptPart[] = [];
for (const part of parts) {
if (part.type === 'image_url') {
const parsed = parseImageDataUrl(part.imageUrl.url);
if (parsed !== null) {
const result = await compressBase64ForModel(parsed.base64, parsed.mimeType);
if (result.changed) {
out.push({
type: 'image_url',
imageUrl: { ...part.imageUrl, url: `data:${result.mimeType};base64,${result.base64}` },
});
continue;
}
}
}
out.push(part);
}
return out;
}

function parseImageDataUrl(url: string): { mimeType: string; base64: string } | null {
const match = /^data:([^;,]+);base64,(.*)$/s.exec(url);
if (match === null) return null;
return { mimeType: match[1]!, base64: match[2]! };
}

/**
* Minimum-viable XML-attribute escaping for prompt-embedded resource
* wrappers. The output is consumed by an LLM, not parsed by a canonical
Expand Down
30 changes: 28 additions & 2 deletions packages/acp-adapter/src/session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
type KimiErrorPayload,
type KimiHarness,
type McpServerInfo,
type PromptPart,
type QuestionAnswers,
type QuestionRequest,
type Session,
Expand All @@ -38,7 +39,7 @@ import {
} from './builtin-commands';
import { buildSessionConfigOptions } from './config-options';
import { listModelsFromHarness } from './model-catalog';
import { acpBlocksToPromptParts } from './convert';
import { acpBlocksToPromptParts, compressPromptImageParts } from './convert';
import {
acpToolCallId,
assistantDeltaToSessionUpdate,
Expand Down Expand Up @@ -147,6 +148,13 @@ export class AcpSession {
*/
private skillCommandMap: ReadonlyMap<string, string> = new Map();

// One token per in-flight `prompt()` that is still awaiting image compression
// (before any turn exists). A `session/cancel` in that window has no turn to
// abort, so it flips every token and each affected `prompt()` returns
// `cancelled` instead of launching. A set (not a single field) so concurrent
// prompts are all covered rather than only the most recent.
private readonly pendingPromptAborts = new Set<{ aborted: boolean }>();

/**
* The most recent command palette advertised to the ACP client. Used by
* `/help` so the response matches the client's `available_commands_update`
Expand Down Expand Up @@ -268,6 +276,11 @@ export class AcpSession {
* acceptable.
*/
async cancel(): Promise<void> {
// If any prompt is mid-compression (no turn yet), mark them aborted so they
// do not launch once compression finishes.
for (const pending of this.pendingPromptAborts) {
pending.aborted = true;
}
await this.session.cancel();
}

Expand Down Expand Up @@ -715,7 +728,20 @@ export class AcpSession {
* sees a JSON-RPC error rather than a hung request.
*/
async prompt(blocks: readonly ContentBlock[]): Promise<PromptResponse> {
const parts = acpBlocksToPromptParts(blocks);
// Compression happens before any turn exists, so honor a `session/cancel`
// that arrives during it: flip the flag from cancel() and bail out here
// rather than launching a turn the client already asked to stop.
const pending = { aborted: false };
this.pendingPromptAborts.add(pending);
let parts: readonly PromptPart[];
try {
parts = await compressPromptImageParts(acpBlocksToPromptParts(blocks));
} finally {
this.pendingPromptAborts.delete(pending);
}
if (pending.aborted) {
return { stopReason: 'cancelled' };
}
const sessionId = this.id;
const conn = this.conn;

Expand Down
78 changes: 78 additions & 0 deletions packages/acp-adapter/test/cancel.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
type WriteTextFileResponse,
} from '@agentclientprotocol/sdk';
import { log, type KimiHarness, type Session } from '@moonshot-ai/kimi-code-sdk';
import { Jimp } from 'jimp';

import { AcpServer } from '../src/server';
import { AUTHED_STATUS } from './_helpers/harness-stubs';
Expand Down Expand Up @@ -139,4 +140,81 @@ describe('AcpServer cancel', () => {
expect.objectContaining({ sessionId: 'sess-erroring' }),
);
});

it('returns cancelled without launching when cancel arrives during image compression', async () => {
let promptCalls = 0;
const fakeSession = {
id: 'sess-cancel-compress',
prompt: async () => {
promptCalls += 1;
return undefined;
},
cancel: async () => undefined,
onEvent: () => () => undefined,
} as unknown as Session;
const harness = {
auth: { status: async () => AUTHED_STATUS },
createSession: async () => fakeSession,
} as unknown as KimiHarness;

const { agentStream, clientStream } = makeInMemoryStreamPair();
new AgentSideConnection((c) => new AcpServer(harness, c), agentStream);
const client = new ClientSideConnection((_a) => new StubClient(), clientStream);

const { sessionId } = await client.newSession({ cwd: '/tmp/x', mcpServers: [] });

// A solid 2600×2600 image is small in bytes but slow enough to compress
// that the cancel below reliably lands mid-compression, before any turn.
const data = Buffer.from(
await new Jimp({ width: 2600, height: 2600, color: 0x3366ccff }).getBuffer('image/png'),
).toString('base64');

const promptP = client.prompt({
sessionId,
prompt: [{ type: 'image', data, mimeType: 'image/png' }],
});
await client.cancel({ sessionId });
const res = await promptP;

expect(res.stopReason).toBe('cancelled');
expect(promptCalls).toBe(0); // the turn was never launched
});

it('cancels every prompt compressing concurrently, not just the most recent', async () => {
let promptCalls = 0;
const fakeSession = {
id: 'sess-cancel-concurrent',
prompt: async () => {
promptCalls += 1;
return undefined;
},
cancel: async () => undefined,
onEvent: () => () => undefined,
} as unknown as Session;
const harness = {
auth: { status: async () => AUTHED_STATUS },
createSession: async () => fakeSession,
} as unknown as KimiHarness;

const { agentStream, clientStream } = makeInMemoryStreamPair();
new AgentSideConnection((c) => new AcpServer(harness, c), agentStream);
const client = new ClientSideConnection((_a) => new StubClient(), clientStream);

const { sessionId } = await client.newSession({ cwd: '/tmp/x', mcpServers: [] });

const data = Buffer.from(
await new Jimp({ width: 2600, height: 2600, color: 0x3366ccff }).getBuffer('image/png'),
).toString('base64');
const imageBlock = { type: 'image' as const, data, mimeType: 'image/png' };

// Two prompts compressing at once; a single cancel must cover both.
const p1 = client.prompt({ sessionId, prompt: [imageBlock] });
const p2 = client.prompt({ sessionId, prompt: [imageBlock] });
await client.cancel({ sessionId });
const [r1, r2] = await Promise.all([p1, p2]);

expect(r1.stopReason).toBe('cancelled');
expect(r2.stopReason).toBe('cancelled');
expect(promptCalls).toBe(0);
});
});
Loading
Loading