From f77c30e1485259b1ddaf1e60c8e34e2da0564e69 Mon Sep 17 00:00:00 2001 From: Luke Melia Date: Tue, 30 Jun 2026 11:07:40 -0400 Subject: [PATCH 1/2] ai-bot: fulfill readRealmFile via Matrix attachments on the host-command path Re-architect readRealmFile so the bot no longer runs reads in an inline, same-turn loop. Instead it surfaces each read as an `executedBy: 'ai-bot'` command request, fetches the file, uploads it to the Matrix media repo, and posts a command-result event carrying the file as `data.attachedFiles`. The existing host-command result path then reconstructs the content on the next turn (via the same attachment-download the host `readFile` uses), and the bot's own result event re-triggers generation. Because reads now resolve next-turn like host commands, the two timing models collapse into one: an answer may freely mix reads and host commands, so the mixed-round rejection, the inline `for(;;)` loop, the "thinking"-event indicator rotation, and the in-memory followup splicing are all deleted. Uploads dedupe by content hash, so the same skill read repeatedly is stored in Matrix once; keying on the hash keeps it version-correct (changed content re-uploads). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ai-bot/lib/matrix/response-publisher.ts | 62 +--- .../ai-bot/lib/read-realm-file-fulfillment.ts | 222 +++++++++++ packages/ai-bot/lib/read-realm-file-loop.ts | 217 ----------- packages/ai-bot/lib/read-realm-file.ts | 53 +++ packages/ai-bot/lib/responder.ts | 33 +- packages/ai-bot/lib/response-state.ts | 18 - packages/ai-bot/main.ts | 351 +++++++----------- packages/ai-bot/tests/index.ts | 2 +- .../tests/read-realm-file-fulfillment-test.ts | 208 +++++++++++ .../ai-bot/tests/read-realm-file-loop-test.ts | 258 ------------- packages/ai-bot/tests/read-realm-file-test.ts | 66 ++++ 11 files changed, 700 insertions(+), 790 deletions(-) create mode 100644 packages/ai-bot/lib/read-realm-file-fulfillment.ts delete mode 100644 packages/ai-bot/lib/read-realm-file-loop.ts create mode 100644 packages/ai-bot/tests/read-realm-file-fulfillment-test.ts delete mode 100644 packages/ai-bot/tests/read-realm-file-loop-test.ts diff --git a/packages/ai-bot/lib/matrix/response-publisher.ts b/packages/ai-bot/lib/matrix/response-publisher.ts index 6a20df3c931..429c99de561 100644 --- a/packages/ai-bot/lib/matrix/response-publisher.ts +++ b/packages/ai-bot/lib/matrix/response-publisher.ts @@ -1,5 +1,10 @@ import type { ChatCompletionMessageFunctionToolCall } from 'openai/resources/chat/completions'; import type { CommandRequest } from '@cardstack/runtime-common/commands'; +import { AI_BOT_EXECUTOR } from '@cardstack/runtime-common/commands'; +import { + READ_REALM_FILE_TOOL_NAME, + fileLabelFromUrl, +} from '../read-realm-file.ts'; import { thinkingMessage } from '../../constants.ts'; import type ResponseState from '../response-state.ts'; import { @@ -35,6 +40,18 @@ function toCommandRequest( result['arguments'] = {}; } } + // readRealmFile is a tool ai-bot fulfills itself: tag it so the host records + // it in the timeline but never runs it, and give it a human label the + // timeline indicator can show ("Read file: ") since the raw arguments + // carry no description of their own. + if (result.name === READ_REALM_FILE_TOOL_NAME) { + result.executedBy = AI_BOT_EXECUTOR; + let label = fileLabelFromUrl(result.arguments?.url); + result.arguments = { + ...(result.arguments ?? {}), + description: label ? `Read file: ${label}` : 'Read file', + }; + } return result; } @@ -216,49 +233,4 @@ export default class MatrixResponsePublisher { new ResponseEventData(initialMessage.event_id, this.eventSizeMax), ]; } - - // When the bot is mid-turn there's already a "Thinking…" message on screen. - // If the model then asks for a tool that ai-bot runs itself (readRealmFile), - // rather than one it hands to the host, we want to show that work as a - // command-result indicator right where the Thinking message is — and before - // the answer — so the turn reads as "did this, then answered". - // - // To get that ordering we reuse the Thinking event: replace it in place with - // the command requests, then rotate to a fresh event for whatever streams - // next (the answer). Reusing it is what matters — that event already holds - // this turn's earliest timeline slot, so the indicator inherits the slot and - // the answer in the fresh event sorts after it. Posting a brand-new indicator - // message instead would land it after the answer's event and render below it. - // - // The indicator is sent finished with an empty body + the command requests. - // The caller posts a result event against the returned event id to flip it - // from in-progress to done/failed, and resets ResponseState so the next event - // streams clean. - async sendCommandResultIndicator( - commandRequests: Partial[], - ): Promise { - await this.ensureThinkingMessageSent(); - let indicatorEventId = this.currentResponseEventId; - let sendOperation = this.sendingMessage.then(async () => { - await sendMessageEvent( - this.client, - this.roomId, - '', - indicatorEventId, - { - isStreamingFinished: true, - data: { context: { agentId: this.agentId } }, - }, - commandRequests, - ); - // Fresh event (no id yet) → the next send creates a new message that - // sorts after this indicator. - this.responseEvents = [ - new ResponseEventData(undefined, this.eventSizeMax), - ]; - }); - this.sendingMessage = sendOperation.catch(() => {}); - await sendOperation; - return indicatorEventId; - } } diff --git a/packages/ai-bot/lib/read-realm-file-fulfillment.ts b/packages/ai-bot/lib/read-realm-file-fulfillment.ts new file mode 100644 index 00000000000..762ceb9add3 --- /dev/null +++ b/packages/ai-bot/lib/read-realm-file-fulfillment.ts @@ -0,0 +1,222 @@ +import { createHash } from 'crypto'; +import { logger } from '@cardstack/runtime-common'; +import { sendMatrixEvent } from '@cardstack/runtime-common/ai'; +import { + APP_BOXEL_COMMAND_RESULT_EVENT_TYPE, + APP_BOXEL_COMMAND_RESULT_REL_TYPE, + APP_BOXEL_COMMAND_RESULT_WITH_NO_OUTPUT_MSGTYPE, + APP_BOXEL_COMMAND_RESULT_WITH_OUTPUT_MSGTYPE, +} from '@cardstack/runtime-common/matrix-constants'; +import type { MatrixClient } from 'matrix-js-sdk'; +import type { ChatCompletionMessageToolCall } from 'openai/resources'; +import { + executeReadRealmFile, + fileLabelFromUrl, + type ReadRealmFileArgs, +} from './read-realm-file.ts'; +import type { DelegatedUserRealmSessionManager } from './user-delegated-realm-server-session.ts'; + +let log = logger('ai-bot:read-realm-file'); + +// The contentType the fetched file is attached under. It must be text-based so +// the prompt builder downloads and inlines the content (rather than treating it +// as opaque media); 'text/plain' satisfies that for any source we read. +const READ_FILE_CONTENT_TYPE = 'text/plain'; + +// Maps a fetched file's content hash to the Matrix media URL it was uploaded +// under, so identical bytes (the same skill read across rooms or turns) are +// uploaded once and re-referenced. Keyed on a SHA-256 of the content, so a +// changed file misses the cache and re-uploads — dedup without staleness. +// Matrix media is not content-addressable (each upload gets a fresh id), so +// this app-level cache is what keeps us from re-storing the same bytes. +const uploadedContentUrlByHash = new Map(); + +export interface ReadRealmFileFulfillmentDeps { + client: MatrixClient; + roomId: string; + // The bot message that carried the readRealmFile command requests. Result + // events relate back to it; pairing is ultimately by commandRequestId, so + // this only needs to be the anchoring bot message. + requestEventId: string; + agentId: string | undefined; + onBehalfOf: string; + delegatedUserRealmSessions: Pick< + DelegatedUserRealmSessionManager, + 'getToken' | 'invalidate' + >; + fetch?: typeof globalThis.fetch; + // Injectable for tests; defaults to uploading to the Matrix media repo. + uploadText?: (content: string, contentType: string) => Promise; +} + +export interface ReadRealmFileFulfillmentOutcome { + commandRequestId: string; + ok: boolean; + error?: string; +} + +// Upload bytes to the Matrix media repo and return an http download URL that +// both the bot and the host can fetch. Dedupes identical content by hash. +async function uploadTextToMatrix( + client: MatrixClient, + content: string, + contentType: string, +): Promise { + let hash = createHash('sha256').update(content).digest('hex'); + let cached = uploadedContentUrlByHash.get(hash); + if (cached) { + return cached; + } + let uploaded = await client.uploadContent(content, { type: contentType }); + let url = client.mxcUrlToHttp( + uploaded.content_uri, + undefined, + undefined, + undefined, + undefined, + undefined, + true, + ); + if (!url) { + throw new Error('could not derive a download URL for the uploaded file'); + } + uploadedContentUrlByHash.set(hash, url); + return url; +} + +// Runs each readRealmFile tool call ai-bot owns and publishes its outcome as a +// command-result event — the same shape a host command result takes, so the +// existing prompt reconstruction pairs it with the request and feeds it back on +// the next turn. On success the fetched file is uploaded to Matrix and attached +// to the result event (data.attachedFiles); the model receives its content via +// the same attachment-download path host-read files use. On failure the result +// event carries the reason and resolves the request as invalid, so a failed +// read reads as failed rather than as a clean read. Returns one outcome per +// call; never throws (a publish failure is logged so the turn still settles). +export async function fulfillReadRealmFileCalls( + botToolCalls: ChatCompletionMessageToolCall[], + deps: ReadRealmFileFulfillmentDeps, +): Promise { + let upload = + deps.uploadText ?? + ((content: string, contentType: string) => + uploadTextToMatrix(deps.client, content, contentType)); + let outcomes: ReadRealmFileFulfillmentOutcome[] = []; + for (let call of botToolCalls) { + if (call.type !== 'function') { + continue; + } + outcomes.push(await fulfillOne(call, deps, upload)); + } + return outcomes; +} + +async function fulfillOne( + call: ChatCompletionMessageToolCall & { type: 'function' }, + deps: ReadRealmFileFulfillmentDeps, + upload: (content: string, contentType: string) => Promise, +): Promise { + let args: ReadRealmFileArgs | undefined; + try { + args = JSON.parse(call.function.arguments) as ReadRealmFileArgs; + } catch { + args = undefined; + } + if (!args || !args.realm || !args.url) { + return await publishFailure( + call.id, + 'readRealmFile needs a realm and a url.', + deps, + ); + } + + let result = await executeReadRealmFile(args, { + onBehalfOf: deps.onBehalfOf, + delegatedUserRealmSessions: deps.delegatedUserRealmSessions, + fetch: deps.fetch, + }); + if (!result.ok) { + return await publishFailure(call.id, result.error, deps); + } + + let label = fileLabelFromUrl(args.url) ?? args.url; + let fileUrl: string; + try { + fileUrl = await upload(result.content, READ_FILE_CONTENT_TYPE); + } catch (e: any) { + log.error( + `readRealmFile: upload failed for ${args.url}: ${e?.message ?? e}`, + ); + return await publishFailure( + call.id, + `could not store ${args.url} for reading`, + deps, + ); + } + + await publish(deps, { + msgtype: APP_BOXEL_COMMAND_RESULT_WITH_OUTPUT_MSGTYPE, + commandRequestId: call.id, + 'm.relates_to': { + rel_type: APP_BOXEL_COMMAND_RESULT_REL_TYPE, + key: 'applied', + event_id: deps.requestEventId, + }, + data: { + context: { agentId: deps.agentId }, + attachedFiles: [ + { + sourceUrl: args.url, + url: fileUrl, + name: label, + contentType: READ_FILE_CONTENT_TYPE, + contentSize: Buffer.byteLength(result.content), + }, + ], + }, + }); + return { commandRequestId: call.id, ok: true }; +} + +async function publishFailure( + commandRequestId: string, + error: string, + deps: ReadRealmFileFulfillmentDeps, +): Promise { + await publish(deps, { + msgtype: APP_BOXEL_COMMAND_RESULT_WITH_NO_OUTPUT_MSGTYPE, + commandRequestId, + failureReason: error, + 'm.relates_to': { + rel_type: APP_BOXEL_COMMAND_RESULT_REL_TYPE, + key: 'invalid', + event_id: deps.requestEventId, + }, + data: { context: { agentId: deps.agentId } }, + }); + return { commandRequestId, ok: false, error }; +} + +async function publish( + deps: ReadRealmFileFulfillmentDeps, + content: Record, +): Promise { + try { + // eventIdToReplace must stay undefined: sendMatrixEvent overwrites + // m.relates_to with an m.replace relation when it's set, which would clobber + // the command-result relation we build here. + await sendMatrixEvent( + deps.client, + deps.roomId, + APP_BOXEL_COMMAND_RESULT_EVENT_TYPE, + content, + undefined, + ); + } catch (e: any) { + log.error( + `readRealmFile: failed to publish result for ${content.commandRequestId}: ${ + e?.message ?? e + }`, + ); + } +} diff --git a/packages/ai-bot/lib/read-realm-file-loop.ts b/packages/ai-bot/lib/read-realm-file-loop.ts deleted file mode 100644 index 2a694bac1bd..00000000000 --- a/packages/ai-bot/lib/read-realm-file-loop.ts +++ /dev/null @@ -1,217 +0,0 @@ -import type { - ChatCompletion, - ChatCompletionMessageParam, - ChatCompletionMessageToolCall, -} from 'openai/resources'; -import { - AI_BOT_EXECUTOR, - type CommandRequest, -} from '@cardstack/runtime-common/commands'; -import { - APP_BOXEL_COMMAND_RESULT_REL_TYPE, - APP_BOXEL_COMMAND_RESULT_WITH_NO_OUTPUT_MSGTYPE, -} from '@cardstack/runtime-common/matrix-constants'; -import { - executeReadRealmFile, - READ_REALM_FILE_TOOL_NAME, - type ReadRealmFileArgs, -} from './read-realm-file.ts'; -import type { DelegatedUserRealmSessionManager } from './user-delegated-realm-server-session.ts'; - -export interface ReadRealmFileLoopDeps { - onBehalfOf: string; - delegatedUserRealmSessions: Pick< - DelegatedUserRealmSessionManager, - 'getToken' | 'invalidate' - >; - fetch?: typeof globalThis.fetch; -} - -// Per-file result of a round, so the caller can resolve each command-result -// indicator to done or failed — a read that 404s or is denied must not read as -// success. -export interface ReadRealmFileOutcome { - commandRequestId: string; - ok: boolean; - error?: string; -} - -export interface ReadRealmFileFollowup { - // Assistant turn + one tool result per call, to append before generating - // again. Empty when the round shouldn't loop. - messages: ChatCompletionMessageParam[]; - // One entry per readRealmFile call that ran, in call order. - outcomes: ReadRealmFileOutcome[]; -} - -export interface ClassifiedToolCalls { - // Tool calls ai-bot runs itself, in-process, this turn (readRealmFile). - botToolCalls: ChatCompletionMessageToolCall[]; - // Tool calls the host runs on a later turn (everything else). - hostToolCalls: ChatCompletionMessageToolCall[]; -} - -// Split a completion's tool calls by who executes them. Bot tools and host -// tools have different time models — bot tools resolve same-turn, host tools -// next-turn — so the caller handles each set explicitly rather than forcing -// them through one path. -export function classifyToolCalls( - assistantMessage: ChatCompletion.Choice['message'], -): ClassifiedToolCalls { - let botToolCalls: ChatCompletionMessageToolCall[] = []; - let hostToolCalls: ChatCompletionMessageToolCall[] = []; - for (let call of assistantMessage.tool_calls ?? []) { - if ( - call.type === 'function' && - call.function.name === READ_REALM_FILE_TOOL_NAME - ) { - botToolCalls.push(call); - } else { - hostToolCalls.push(call); - } - } - return { botToolCalls, hostToolCalls }; -} - -// Runs the given bot tool calls (the `botToolCalls` from `classifyToolCalls`) -// in-process and returns the messages to append before generating again — the -// assistant turn followed by one tool result per call — plus the per-call -// outcome. The caller invokes this only when the round is bot-only; a mixed -// round is handled (rejected) by the caller without running anything here. -export async function buildReadRealmFileFollowup( - assistantMessage: ChatCompletion.Choice['message'], - botToolCalls: ChatCompletionMessageToolCall[], - deps: ReadRealmFileLoopDeps, -): Promise { - if (botToolCalls.length === 0) { - return { messages: [], outcomes: [] }; - } - - let toolMessages: ChatCompletionMessageParam[] = []; - let outcomes: ReadRealmFileOutcome[] = []; - for (let call of botToolCalls) { - if (call.type !== 'function') { - continue; - } - let ok: boolean; - let error: string | undefined; - let content: string; - let args: ReadRealmFileArgs | undefined; - try { - args = JSON.parse(call.function.arguments) as ReadRealmFileArgs; - } catch { - args = undefined; - } - if (!args || !args.realm || !args.url) { - ok = false; - error = 'readRealmFile needs a realm and a url.'; - content = `Error: ${error}`; - } else { - let result = await executeReadRealmFile(args, deps); - if (result.ok) { - ok = true; - content = result.content; - } else { - ok = false; - error = result.error; - content = `Error: ${result.error}`; - } - } - toolMessages.push({ - role: 'tool', - tool_call_id: call.id, - content, - }); - outcomes.push({ commandRequestId: call.id, ok, error }); - } - - return { - messages: [assistantMessage as ChatCompletionMessageParam, ...toolMessages], - outcomes, - }; -} - -// A short human label for a file being read, derived from its URL: -// `…/skills//SKILL.md` → `/SKILL.md`, otherwise the file name. -// Shown in the command-result indicator so the user sees which file was read, -// not a blank one. -function fileLabelFromUrl(url: string | undefined): string | undefined { - if (!url) { - return undefined; - } - try { - let segments = new URL(url).pathname.split('/').filter(Boolean); - let last = segments[segments.length - 1]; - if (last === 'SKILL.md' && segments.length >= 2) { - return `${segments[segments.length - 2]}/SKILL.md`; - } - return last ?? url; - } catch { - return url; - } -} - -// The given bot tool calls expressed as command requests tagged -// `executedBy: 'ai-bot'`. The bot runs these in-process, so the host must not -// execute them — it surfaces them in the timeline as a record of what was read -// (and as a debugging aid when an answer looks under-informed). Used for both -// the success indicators (bot-only round) and the rejection indicators (mixed -// round). -export function readRealmFileCommandRequests( - botToolCalls: ChatCompletionMessageToolCall[], -): Partial[] { - return botToolCalls.map((call) => { - let args: Record = {}; - if (call.type === 'function') { - try { - args = JSON.parse(call.function.arguments); - } catch { - // A malformed call still gets recorded; arguments stay empty. - } - } - let label = fileLabelFromUrl(args.url); - return { - id: call.id, - name: READ_REALM_FILE_TOOL_NAME, - // `description` is what the timeline command header renders; without it - // the indicator is blank. The host reads it off the arguments. - arguments: { - ...args, - description: label ? `Read file: ${label}` : 'Read file', - }, - executedBy: AI_BOT_EXECUTOR, - }; - }); -} - -// Content for the command-result event that resolves a file-read indicator from -// its in-progress (loading) state to a terminal one. ai-bot posts one per file -// once the fetch completes; the host renders the indicator as a spinner until -// it lands, then as applied (success) or invalid + the reason (failure) — same -// applying→applied/invalid path host-run commands follow. A failed read must -// surface as failed, not as a successful read. -export function fileReadResultContent({ - commandRequestId, - indicatorEventId, - ok, - failureReason, - agentId, -}: { - commandRequestId: string; - indicatorEventId: string; - ok: boolean; - failureReason?: string; - agentId: string | undefined; -}) { - return { - msgtype: APP_BOXEL_COMMAND_RESULT_WITH_NO_OUTPUT_MSGTYPE, - commandRequestId, - failureReason: ok ? undefined : failureReason, - 'm.relates_to': { - event_id: indicatorEventId, - key: ok ? 'applied' : 'invalid', - rel_type: APP_BOXEL_COMMAND_RESULT_REL_TYPE, - }, - data: { context: { agentId } }, - }; -} diff --git a/packages/ai-bot/lib/read-realm-file.ts b/packages/ai-bot/lib/read-realm-file.ts index ec46d13479d..7667e5ddc9c 100644 --- a/packages/ai-bot/lib/read-realm-file.ts +++ b/packages/ai-bot/lib/read-realm-file.ts @@ -2,6 +2,10 @@ import { logger, SupportedMimeType } from '@cardstack/runtime-common'; import { ensureTrailingSlash } from '@cardstack/runtime-common/paths'; import { DelegatedUserRealmSessionError } from '@cardstack/runtime-common/user-delegated-realm-server-session'; import type { Tool } from 'https://cardstack.com/base/matrix-event'; +import type { + ChatCompletion, + ChatCompletionMessageToolCall, +} from 'openai/resources'; import type { DelegatedUserRealmSessionManager } from './user-delegated-realm-server-session.ts'; let log = logger('ai-bot:read-realm-file'); @@ -150,3 +154,52 @@ export async function executeReadRealmFile( return { ok: true, url, content: await response.text() }; } + +export interface ClassifiedToolCalls { + // Tool calls ai-bot runs itself (readRealmFile). + botToolCalls: ChatCompletionMessageToolCall[]; + // Tool calls the host runs (everything else). + hostToolCalls: ChatCompletionMessageToolCall[]; +} + +// Split a completion's tool calls into the ones ai-bot fulfills itself +// (readRealmFile) and the ones the host fulfills. Both kinds now resolve the +// same way — as command requests answered by a command-result event on a later +// turn — so a single response may freely contain both; the caller fulfills the +// bot ones and leaves the rest to the host. +export function classifyToolCalls( + assistantMessage: ChatCompletion.Choice['message'], +): ClassifiedToolCalls { + let botToolCalls: ChatCompletionMessageToolCall[] = []; + let hostToolCalls: ChatCompletionMessageToolCall[] = []; + for (let call of assistantMessage.tool_calls ?? []) { + if ( + call.type === 'function' && + call.function.name === READ_REALM_FILE_TOOL_NAME + ) { + botToolCalls.push(call); + } else { + hostToolCalls.push(call); + } + } + return { botToolCalls, hostToolCalls }; +} + +// A short human label for a file being read, derived from its URL: +// `…/skills//SKILL.md` → `/SKILL.md`, otherwise the file name. +// Shown in the command-result indicator so the user sees which file was read. +export function fileLabelFromUrl(url: string | undefined): string | undefined { + if (!url) { + return undefined; + } + try { + let segments = new URL(url).pathname.split('/').filter(Boolean); + let last = segments[segments.length - 1]; + if (last === 'SKILL.md' && segments.length >= 2) { + return `${segments[segments.length - 2]}/SKILL.md`; + } + return last ?? url; + } catch { + return url; + } +} diff --git a/packages/ai-bot/lib/responder.ts b/packages/ai-bot/lib/responder.ts index 48a1297e27a..af2a287d29f 100644 --- a/packages/ai-bot/lib/responder.ts +++ b/packages/ai-bot/lib/responder.ts @@ -8,7 +8,6 @@ import { throttle } from 'lodash-es'; import type { ISendEventResponse } from 'matrix-js-sdk/lib/matrix.js'; import type { ChatCompletionMessageFunctionToolCall } from 'openai/resources/chat/completions'; import type { FunctionToolCall } from '@cardstack/runtime-common/helpers/ai'; -import type { CommandRequest } from '@cardstack/runtime-common/commands'; import type OpenAI from 'openai'; import type { ChatCompletionSnapshot } from 'openai/lib/ChatCompletionStream'; import type { MatrixEvent as DiscreteMatrixEvent } from 'matrix-js-sdk'; @@ -67,33 +66,11 @@ export class Responder { needsMessageSend = false; - // Hand the current event off as a command-result indicator for tool calls the - // bot ran itself (e.g. readRealmFile) and rotate to a fresh event for what - // streams next. The indicator keeps its slot, so it precedes the answer. - // Returns the indicator's event id so the caller can post its result - // (done/failed). - async beginCommandResultIndicator( - commandRequests: Partial[], - ): Promise { - // Drop any pending streamed update for this event — we're replacing it - // wholesale with the indicator. - this.needsMessageSend = false; - ( - this.sendMessageEventWithThrottlingInternal as unknown as { - cancel: () => void; - } - ).cancel(); - let indicatorEventId = - await this.matrixResponsePublisher.sendCommandResultIndicator( - commandRequests, - ); - // Start the next message clean and let it send fresh. - this.responseState.resetForNextEvent(); - this._lastSentTotal = 0; - this._lastSentContentLen = 0; - this._lastSentReasoningLen = 0; - this._lastSentToolCallsJson = undefined; - return indicatorEventId; + // The event id of the bot message this turn streamed into. ai-bot relates the + // command-result events for its own readRealmFile calls back to it, so they + // pair with the requests carried on that message. + get responseEventId(): string | undefined { + return this.matrixResponsePublisher.originalResponseEventId; } async ensureThinkingMessageSent() { diff --git a/packages/ai-bot/lib/response-state.ts b/packages/ai-bot/lib/response-state.ts index c828089f0af..86cd467e1ff 100644 --- a/packages/ai-bot/lib/response-state.ts +++ b/packages/ai-bot/lib/response-state.ts @@ -1,7 +1,6 @@ import { thinkingMessage } from '../constants.ts'; import type { ChatCompletionSnapshot } from 'openai/lib/ChatCompletionStream'; import { cleanContent } from '@cardstack/runtime-common/ai'; -import { READ_REALM_FILE_TOOL_NAME } from './read-realm-file.ts'; export default class ResponseState { latestReasoning: string = ''; @@ -12,18 +11,6 @@ export default class ResponseState { isStreamingFinished = false; isCanceled = false; - // Clear the accumulated stream so the next message event starts fresh. Used - // when the readRealmFile loop hands the current event off as a command-result - // indicator and - // rotates to a new event for what follows. - resetForNextEvent() { - this.latestReasoning = ''; - this.latestContent = ''; - this.toolCalls = []; - this.toolCallsJson = undefined; - this.isStreamingFinished = false; - } - setAllowedToolNames(names: Iterable | undefined) { this.allowedToolNames = names ? new Set(names) : undefined; } @@ -65,11 +52,6 @@ export default class ResponseState { if (name === 'checkCorrectness') { return false; } - // readRealmFile runs inside the bot, not on the host — never surface - // it as a command request for the user to execute. - if (name === READ_REALM_FILE_TOOL_NAME) { - return false; - } if (this.allowedToolNames && !this.allowedToolNames.has(name)) { return false; } diff --git a/packages/ai-bot/main.ts b/packages/ai-bot/main.ts index 5f3d2519b87..23a8d5f93bd 100644 --- a/packages/ai-bot/main.ts +++ b/packages/ai-bot/main.ts @@ -24,8 +24,6 @@ import { getRoomEvents, sendPromptAsDebugMessage, constructHistory, - sendMatrixEvent, - sendMessageEvent, } from '@cardstack/runtime-common/ai'; import { validateAICredits } from '@cardstack/billing/ai-billing'; import { @@ -38,13 +36,12 @@ import { import { handleDebugCommands } from './lib/debug.ts'; import { DelegatedUserRealmSessionManager } from './lib/user-delegated-realm-server-session.ts'; -import { readRealmFileTool } from './lib/read-realm-file.ts'; import { - buildReadRealmFileFollowup, + readRealmFileTool, classifyToolCalls, - readRealmFileCommandRequests, - fileReadResultContent, -} from './lib/read-realm-file-loop.ts'; + READ_REALM_FILE_TOOL_NAME, +} from './lib/read-realm-file.ts'; +import { fulfillReadRealmFileCalls } from './lib/read-realm-file-fulfillment.ts'; import { Responder } from './lib/responder.ts'; import { shouldSetRoomTitle, @@ -117,10 +114,7 @@ class Assistant { getResponse( prompt: PromptParts, senderMatrixUserId?: string, - // Lets the readRealmFile loop re-run a turn with tool results appended - // without rebuilding the rest of the prompt. - messagesOverride?: ChatCompletionMessageParam[], - // Whether to offer the bot-executed readRealmFile tool. The caller decides + // Whether to offer the bot-fulfilled readRealmFile tool. The caller decides // (delegation configured + a single-human room); the bot never advertises // a tool it won't run. offerRealmFileRead = false, @@ -131,8 +125,7 @@ class Assistant { let request: Parameters[0] = { model: this.getModel(prompt), - messages: - messagesOverride ?? (prompt.messages as ChatCompletionMessageParam[]), + messages: prompt.messages as ChatCompletionMessageParam[], }; if (prompt.reasoningEffort !== undefined) { @@ -285,9 +278,10 @@ Common issues are: // (the message sender). In a room with more than one human, "the user" // is ambiguous, so the bot must not read any realm on someone's behalf: // disable realm file reading entirely there. - let humanRoomMemberCount = room + let humanRoomMembers = room .getJoinedMembers() - .filter((member) => member.userId !== aiBotUserId).length; + .filter((member) => member.userId !== aiBotUserId); + let humanRoomMemberCount = humanRoomMembers.length; let realmFileReadingAllowed = assistant.delegatedUserRealmSessions.enabled && humanRoomMemberCount === 1; @@ -299,6 +293,23 @@ Common issues are: return; // don't print paginated results } + // A continuation the bot triggered with its own readRealmFile result + // event arrives with sender = the bot. Re-attribute it to the single + // human in the room so the guard below lets it through and all per-user + // logic (billing, the delegated read's onBehalfOf, request.user) acts on + // the user's behalf rather than the bot's. Only single-human rooms + // fulfill reads, so the human is unambiguous; every other bot-sent event + // keeps sender = bot and is ignored by the guard. getShouldRespond still + // decides whether we actually generate, so this can't loop on its own + // answer. + if ( + senderMatrixUserId === aiBotUserId && + event.getType() === APP_BOXEL_COMMAND_RESULT_EVENT_TYPE && + humanRoomMemberCount === 1 + ) { + senderMatrixUserId = humanRoomMembers[0].userId; + } + if (senderMatrixUserId === aiBotUserId) { return; } @@ -428,9 +439,13 @@ Common issues are: 'history:constructPromptParts', async () => getPromptParts(eventList, aiBotUserId, client), ); - responder.responseState.setAllowedToolNames( - promptParts.tools?.map((tool) => tool.function.name), - ); + responder.responseState.setAllowedToolNames([ + ...(promptParts.tools?.map((tool) => tool.function.name) ?? []), + // readRealmFile is offered separately (in getResponse), not via + // promptParts.tools, so allow it explicitly when this room may use + // it — otherwise the surfaced tool call would be filtered out. + ...(realmFileReadingAllowed ? [READ_REALM_FILE_TOOL_NAME] : []), + ]); if (promptParts.pendingCodePatchCorrectnessChecks) { return await publishCodePatchCorrectnessMessage( promptParts.pendingCodePatchCorrectnessChecks, @@ -523,217 +538,107 @@ Common issues are: model: promptParts.model, }); } - // Messages for the current round. The loadSkill loop appends the - // assistant's tool call and the fetched skill content here, then - // generates again. - let workingMessages: ChatCompletionMessageParam[] = - promptParts.messages as ChatCompletionMessageParam[]; - try { - // Usually one pass. When the model calls only the bot-executed - // loadSkill tool, run those calls and generate again with the - // results in context, repeating until it stops asking. - for (;;) { - let roundCostInUsd: number | undefined; - const runner = assistant - .getResponse( - promptParts, - senderMatrixUserId, - workingMessages, - realmFileReadingAllowed, - ) - .on('chunk', async (chunk, snapshot) => { - log.info(`[${eventId}] Received chunk %s`, chunk.id); - if (profEnabled() && firstChunkAt == null) { - firstChunkAt = Date.now(); - profNote(eventId, 'llm:ttft', { - ms: firstChunkAt - requestStart, - model: promptParts.model, - }); - } - generationId = chunk.id; - if (chunk.usage && (chunk.usage as any).cost != null) { - roundCostInUsd = (chunk.usage as any).cost; - } - let activeGeneration = activeGenerations.get(room.roomId); - if (activeGeneration) { - activeGeneration.lastGeneratedChunkId = generationId; - } - - let chunkProcessingResult = await profTime( - eventId, - 'llm:chunk:onChunk', - async () => responder.onChunk(chunk, snapshot), - ); - let chunkProcessingResultError = - chunkProcessingResult.find( - (promiseResult) => - promiseResult && - 'errorMessage' in promiseResult && - promiseResult.errorMessage != null, - ) as { errorMessage: string } | undefined; - - if (chunkProcessingResultError) { - chunkHandlingError = - chunkProcessingResultError.errorMessage; - - // If there was an error processing the chunk, e.g. matrix sending error (e.g. event too large), - // then we want to stop accepting more chunks by aborting the runner. This will throw an error - // where the await responder.finalize() is called (the catch block below will handle this) - runner.abort(); - } - }) - .on('error', async (error) => { - await responder.onError(error); - }); + let roundCostInUsd: number | undefined; + const runner = assistant + .getResponse( + promptParts, + senderMatrixUserId, + realmFileReadingAllowed, + ) + .on('chunk', async (chunk, snapshot) => { + log.info(`[${eventId}] Received chunk %s`, chunk.id); + if (profEnabled() && firstChunkAt == null) { + firstChunkAt = Date.now(); + profNote(eventId, 'llm:ttft', { + ms: firstChunkAt - requestStart, + model: promptParts.model, + }); + } + generationId = chunk.id; + if (chunk.usage && (chunk.usage as any).cost != null) { + roundCostInUsd = (chunk.usage as any).cost; + } + let activeGeneration = activeGenerations.get(room.roomId); + if (activeGeneration) { + activeGeneration.lastGeneratedChunkId = generationId; + } - activeGenerations.set(room.roomId, { - responder, - runner, - lastGeneratedChunkId: generationId, - completionPromise: generationCompletionPromise, + let chunkProcessingResult = await profTime( + eventId, + 'llm:chunk:onChunk', + async () => responder.onChunk(chunk, snapshot), + ); + let chunkProcessingResultError = chunkProcessingResult.find( + (promiseResult) => + promiseResult && + 'errorMessage' in promiseResult && + promiseResult.errorMessage != null, + ) as { errorMessage: string } | undefined; + + if (chunkProcessingResultError) { + chunkHandlingError = + chunkProcessingResultError.errorMessage; + + // If there was an error processing the chunk, e.g. matrix sending error (e.g. event too large), + // then we want to stop accepting more chunks by aborting the runner. This will throw an error + // where the await responder.finalize() is called (the catch block below will handle this) + runner.abort(); + } + }) + .on('error', async (error) => { + await responder.onError(error); }); - let completion = await profTime( - eventId, - 'llm:finalChatCompletion', - async () => runner.finalChatCompletion(), - ); - // Each round's chunk usage reports that round's running total; - // sum them so billing sees the whole turn. - if (typeof roundCostInUsd === 'number') { - costInUsd = (costInUsd ?? 0) + roundCostInUsd; - } - - let message = completion.choices?.[0]?.message; - // Bot tools (readRealmFile) run in-process this turn; host - // commands run on a later turn. Those time models don't mix, - // so classify and handle each set explicitly — never silently - // drop a bot call. - let { botToolCalls, hostToolCalls } = message - ? classifyToolCalls(message) - : { botToolCalls: [], hostToolCalls: [] }; - if ( - message && - senderMatrixUserId && - realmFileReadingAllowed && - botToolCalls.length > 0 - ) { - let fileReadRequests = - readRealmFileCommandRequests(botToolCalls); - - if (hostToolCalls.length === 0) { - // Bot-only round: turn the current "thinking" bubble into - // a loading command-result indicator (tagged executedBy: - // 'ai-bot'; the host shows it but never runs it) and rotate - // so the answer streams into a new message after it — the - // read shows applying → done, then the answer. - let indicatorEventId = - await responder.beginCommandResultIndicator( - fileReadRequests, - ); - - let followup = await buildReadRealmFileFollowup( - message, - botToolCalls, - { - onBehalfOf: senderMatrixUserId, - delegatedUserRealmSessions: - assistant.delegatedUserRealmSessions, - }, - ); - - // Resolve each indicator — applied on success, invalid + - // reason on failure, so a failed read reads as failed - // rather than as a clean read. - if (indicatorEventId) { - for (let outcome of followup.outcomes) { - await sendMatrixEvent( - client, - room.roomId, - APP_BOXEL_COMMAND_RESULT_EVENT_TYPE, - fileReadResultContent({ - commandRequestId: outcome.commandRequestId, - indicatorEventId, - ok: outcome.ok, - failureReason: outcome.error, - agentId, - }), - undefined, - ).catch((e) => - log.error( - `[${eventId}] Failed to mark file read: ${ - e?.message ?? e - }`, - ), - ); - } - } - workingMessages = [ - ...workingMessages, - ...followup.messages, - ]; - continue; - } + activeGenerations.set(room.roomId, { + responder, + runner, + lastGeneratedChunkId: generationId, + completionPromise: generationCompletionPromise, + }); - // Mixed round: bot tools are same-turn only and host - // commands are next-turn only, so we can't honor both. Don't - // loop; reject the reads explicitly (a visible failed - // indicator, never a silent drop) and let the host commands - // proceed on the normal path below. - let rejectionEventId = ( - await sendMessageEvent( - client, - room.roomId, - '', - undefined, - { - isStreamingFinished: true, - data: { context: { agentId } }, - }, - fileReadRequests, - ).catch((e) => { - log.error( - `[${eventId}] Failed to record rejected reads: ${ - e?.message ?? e - }`, - ); - return undefined; - }) - )?.event_id; - if (rejectionEventId) { - for (let request of fileReadRequests) { - await sendMatrixEvent( - client, - room.roomId, - APP_BOXEL_COMMAND_RESULT_EVENT_TYPE, - fileReadResultContent({ - commandRequestId: request.id!, - indicatorEventId: rejectionEventId, - ok: false, - failureReason: - 'readRealmFile cannot be mixed with host-dispatched commands in the same round; call it on its own.', - agentId, - }), - undefined, - ).catch((e) => - log.error( - `[${eventId}] Failed to mark read rejected: ${ - e?.message ?? e - }`, - ), - ); - } - } - // Fall through to finalize so the host commands proceed. - } + let completion = await profTime( + eventId, + 'llm:finalChatCompletion', + async () => runner.finalChatCompletion(), + ); + if (typeof roundCostInUsd === 'number') { + costInUsd = (costInUsd ?? 0) + roundCostInUsd; + } - log.info(`[${eventId}] Generation complete`); - await profTime(eventId, 'response:finalize', async () => - responder.finalize(), - ); - log.info(`[${eventId}] Response finalized`); - break; + log.info(`[${eventId}] Generation complete`); + await profTime(eventId, 'response:finalize', async () => + responder.finalize(), + ); + log.info(`[${eventId}] Response finalized`); + + // readRealmFile is a tool ai-bot fulfills itself. The answer has + // already streamed (with the reads surfaced as executedBy: + // 'ai-bot' command requests); now fetch each file, attach it to + // a command-result event, and let the normal command-result path + // drive the continuation on a later turn — exactly as a host + // command result would. Because reads now resolve next-turn like + // host commands, an answer may freely mix the two; the host + // fulfills its commands, we fulfill ours, and getShouldRespond + // waits for all of them before generating again. + let message = completion.choices?.[0]?.message; + let { botToolCalls } = message + ? classifyToolCalls(message) + : { botToolCalls: [] }; + if ( + realmFileReadingAllowed && + botToolCalls.length > 0 && + responder.responseEventId + ) { + await fulfillReadRealmFileCalls(botToolCalls, { + client, + roomId: room.roomId, + requestEventId: responder.responseEventId, + agentId, + onBehalfOf: senderMatrixUserId, + delegatedUserRealmSessions: + assistant.delegatedUserRealmSessions, + }); } } catch (error) { // When the cancel handler aborts the runner, diff --git a/packages/ai-bot/tests/index.ts b/packages/ai-bot/tests/index.ts index 99e6b13c527..2ecd2449ab6 100644 --- a/packages/ai-bot/tests/index.ts +++ b/packages/ai-bot/tests/index.ts @@ -14,6 +14,6 @@ import './interrupt-test.ts'; import './credit-tracking-test.ts'; import './user-delegated-realm-server-session-test.ts'; import './read-realm-file-test.ts'; -import './read-realm-file-loop-test.ts'; +import './read-realm-file-fulfillment-test.ts'; QUnit.start(); diff --git a/packages/ai-bot/tests/read-realm-file-fulfillment-test.ts b/packages/ai-bot/tests/read-realm-file-fulfillment-test.ts new file mode 100644 index 00000000000..aca3a994eb7 --- /dev/null +++ b/packages/ai-bot/tests/read-realm-file-fulfillment-test.ts @@ -0,0 +1,208 @@ +import QUnit from 'qunit'; +const { module, test, assert } = QUnit; + +import { fulfillReadRealmFileCalls } from '../lib/read-realm-file-fulfillment.ts'; +import { READ_REALM_FILE_TOOL_NAME } from '../lib/read-realm-file.ts'; +import { + APP_BOXEL_COMMAND_RESULT_EVENT_TYPE, + APP_BOXEL_COMMAND_RESULT_WITH_NO_OUTPUT_MSGTYPE, + APP_BOXEL_COMMAND_RESULT_WITH_OUTPUT_MSGTYPE, +} from '@cardstack/runtime-common/matrix-constants'; + +const ON_BEHALF_OF = '@user:localhost'; +const AGENT_ID = 'agent-1'; +const ROOM_ID = '!room:localhost'; +const REQUEST_EVENT_ID = '$request:localhost'; +const REALM = 'https://localhost:4201/user/jane/'; +const FILE_URL = + 'https://localhost:4201/user/jane/skills/trip-planner/SKILL.md'; + +function readRealmFileCall(id: string, args: object) { + return { + id, + type: 'function', + function: { + name: READ_REALM_FILE_TOOL_NAME, + arguments: JSON.stringify(args), + }, + } as any; +} + +// A fake Matrix client that records sent events and uploads. sendMatrixEvent +// JSON-stringifies content.data on the way out, so the recorded data is a +// string — tests parse it back. +function fakeClient() { + let sent: { eventType: string; content: any }[] = []; + let uploads: { content: any; opts: any }[] = []; + let client = { + sendEvent: async (_roomId: string, eventType: string, content: any) => { + sent.push({ eventType, content }); + return { event_id: `$sent-${sent.length}:localhost` }; + }, + uploadContent: async (content: any, opts: any) => { + uploads.push({ content, opts }); + return { content_uri: `mxc://localhost/upload-${uploads.length}` }; + }, + mxcUrlToHttp: (uri: string) => + `https://localhost/_matrix/media/v3/download/${uri.replace( + 'mxc://', + '', + )}`, + } as any; + return { client, sent, uploads }; +} + +function sessions() { + return { + getToken: async () => 'tok', + invalidate: () => {}, + }; +} + +function baseDeps(client: any, extra: object = {}) { + return { + client, + roomId: ROOM_ID, + requestEventId: REQUEST_EVENT_ID, + agentId: AGENT_ID, + onBehalfOf: ON_BEHALF_OF, + delegatedUserRealmSessions: sessions(), + ...extra, + }; +} + +// Reads the (stringified) data payload back off a recorded event. +function dataOf(sentEvent: { content: any }) { + return JSON.parse(sentEvent.content.data); +} + +module('fulfillReadRealmFileCalls', () => { + test('a successful read attaches the file to an applied command-result event', async () => { + let { client, sent } = fakeClient(); + let fetch = (async () => + new Response('# Trip Planner', { + status: 200, + })) as unknown as typeof globalThis.fetch; + + let outcomes = await fulfillReadRealmFileCalls( + [readRealmFileCall('c1', { realm: REALM, url: FILE_URL })], + baseDeps(client, { + fetch, + // Inject the uploader so this case doesn't touch the dedup cache. + uploadText: async () => 'https://localhost/media/trip-planner', + }), + ); + + assert.deepEqual(outcomes, [{ commandRequestId: 'c1', ok: true }]); + assert.strictEqual(sent.length, 1, 'one command-result event posted'); + let { eventType, content } = sent[0]; + assert.strictEqual(eventType, APP_BOXEL_COMMAND_RESULT_EVENT_TYPE); + assert.strictEqual( + content.msgtype, + APP_BOXEL_COMMAND_RESULT_WITH_OUTPUT_MSGTYPE, + ); + assert.strictEqual(content.commandRequestId, 'c1'); + assert.strictEqual(content['m.relates_to'].key, 'applied'); + assert.strictEqual(content['m.relates_to'].event_id, REQUEST_EVENT_ID); + let data = dataOf(sent[0]); + assert.strictEqual(data.attachedFiles.length, 1); + assert.strictEqual( + data.attachedFiles[0].sourceUrl, + FILE_URL, + 'attachment keeps the realm source url for scoping/supersession', + ); + assert.strictEqual( + data.attachedFiles[0].url, + 'https://localhost/media/trip-planner', + 'attachment points at the uploaded media url', + ); + assert.strictEqual(data.context.agentId, AGENT_ID); + }); + + test('a failed read posts an invalid result carrying the reason, no attachment', async () => { + let { client, sent } = fakeClient(); + let fetch = (async () => + new Response('nope', { + status: 404, + })) as unknown as typeof globalThis.fetch; + + let outcomes = await fulfillReadRealmFileCalls( + [readRealmFileCall('c1', { realm: REALM, url: FILE_URL })], + baseDeps(client, { fetch }), + ); + + assert.false(outcomes[0].ok, 'a 404 read is reported as failed'); + assert.strictEqual(sent.length, 1); + let { content } = sent[0]; + assert.strictEqual( + content.msgtype, + APP_BOXEL_COMMAND_RESULT_WITH_NO_OUTPUT_MSGTYPE, + ); + assert.strictEqual(content['m.relates_to'].key, 'invalid'); + assert.true( + content.failureReason.includes('404'), + 'the reason rides along so the user sees why it failed', + ); + assert.strictEqual(dataOf(sent[0]).attachedFiles, undefined); + }); + + test('malformed arguments fail without fetching', async () => { + let { client, sent } = fakeClient(); + let fetched = false; + let fetch = (async () => { + fetched = true; + return new Response('x', { status: 200 }); + }) as unknown as typeof globalThis.fetch; + + let outcomes = await fulfillReadRealmFileCalls( + [ + { + id: 'c1', + type: 'function', + function: { + name: READ_REALM_FILE_TOOL_NAME, + arguments: '{not json', + }, + } as any, + ], + baseDeps(client, { fetch }), + ); + + assert.false(outcomes[0].ok); + assert.false(fetched, 'never fetched for malformed arguments'); + assert.strictEqual(sent[0].content['m.relates_to'].key, 'invalid'); + }); + + test('identical content is uploaded once and re-referenced (dedup)', async () => { + let { client, sent, uploads } = fakeClient(); + // Content unique to this test so the module-level hash cache is exercised + // cleanly regardless of other tests (this is the only test that uploads via + // the real, caching uploader). + let body = '# Dedup Fixture — reuse-me-across-rooms'; + let fetch = (async () => + new Response(body, { + status: 200, + })) as unknown as typeof globalThis.fetch; + + await fulfillReadRealmFileCalls( + [readRealmFileCall('c1', { realm: REALM, url: FILE_URL })], + baseDeps(client, { fetch }), + ); + await fulfillReadRealmFileCalls( + [readRealmFileCall('c2', { realm: REALM, url: FILE_URL })], + baseDeps(client, { fetch }), + ); + + assert.strictEqual(sent.length, 2, 'both reads post their own result'); + assert.strictEqual( + uploads.length, + 1, + 'the same bytes are uploaded to Matrix only once', + ); + // Both results point at the same uploaded media url. + assert.strictEqual( + dataOf(sent[0]).attachedFiles[0].url, + dataOf(sent[1]).attachedFiles[0].url, + ); + }); +}); diff --git a/packages/ai-bot/tests/read-realm-file-loop-test.ts b/packages/ai-bot/tests/read-realm-file-loop-test.ts deleted file mode 100644 index 250397f273d..00000000000 --- a/packages/ai-bot/tests/read-realm-file-loop-test.ts +++ /dev/null @@ -1,258 +0,0 @@ -import QUnit from 'qunit'; -const { module, test, assert } = QUnit; - -import { - buildReadRealmFileFollowup, - classifyToolCalls, - readRealmFileCommandRequests, - fileReadResultContent, -} from '../lib/read-realm-file-loop.ts'; -import { READ_REALM_FILE_TOOL_NAME } from '../lib/read-realm-file.ts'; -import { - APP_BOXEL_COMMAND_RESULT_REL_TYPE, - APP_BOXEL_COMMAND_RESULT_WITH_NO_OUTPUT_MSGTYPE, -} from '@cardstack/runtime-common/matrix-constants'; - -const ON_BEHALF_OF = '@user:localhost'; -const REALM = 'https://localhost:4201/user/jane/'; -const FILE_URL = - 'https://localhost:4201/user/jane/skills/trip-planner/SKILL.md'; - -function assistantMessage(toolCalls: any[]): any { - return { role: 'assistant', content: null, tool_calls: toolCalls }; -} - -function readRealmFileCall(id: string, args: object) { - return { - id, - type: 'function', - function: { - name: READ_REALM_FILE_TOOL_NAME, - arguments: JSON.stringify(args), - }, - }; -} - -function hostCommandCall(id: string) { - return { - id, - type: 'function', - function: { name: 'SomeHostCommand', arguments: '{}' }, - }; -} - -function deps(body = 'FILE BODY') { - let calls: { onBehalfOf: string; realm: string }[] = []; - return { - calls, - onBehalfOf: ON_BEHALF_OF, - delegatedUserRealmSessions: { - getToken: async (a: { onBehalfOf: string; realm: string }) => { - calls.push(a); - return 'tok'; - }, - invalidate: () => {}, - }, - fetch: (async () => - new Response(body, { - status: 200, - })) as unknown as typeof globalThis.fetch, - }; -} - -module('classifyToolCalls', () => { - test('no tool calls → both sets empty', () => { - let { botToolCalls, hostToolCalls } = classifyToolCalls( - assistantMessage([]), - ); - assert.deepEqual(botToolCalls, []); - assert.deepEqual(hostToolCalls, []); - }); - - test('readRealmFile-only → all bot, no host', () => { - let { botToolCalls, hostToolCalls } = classifyToolCalls( - assistantMessage([ - readRealmFileCall('c1', { realm: REALM, url: FILE_URL }), - ]), - ); - assert.deepEqual( - botToolCalls.map((c) => c.id), - ['c1'], - ); - assert.deepEqual(hostToolCalls, []); - }); - - test('host-only → no bot, all host', () => { - let { botToolCalls, hostToolCalls } = classifyToolCalls( - assistantMessage([hostCommandCall('c1')]), - ); - assert.deepEqual(botToolCalls, []); - assert.deepEqual( - hostToolCalls.map((c) => c.id), - ['c1'], - ); - }); - - test('mixed → split, nothing dropped', () => { - let { botToolCalls, hostToolCalls } = classifyToolCalls( - assistantMessage([ - readRealmFileCall('c1', { realm: REALM, url: FILE_URL }), - hostCommandCall('c2'), - ]), - ); - assert.deepEqual( - botToolCalls.map((c) => c.id), - ['c1'], - ); - assert.deepEqual( - hostToolCalls.map((c) => c.id), - ['c2'], - ); - }); -}); - -module('buildReadRealmFileFollowup', () => { - test('returns nothing when given no bot tool calls', async () => { - let d = deps(); - let out = await buildReadRealmFileFollowup(assistantMessage([]), [], d); - assert.deepEqual(out.messages, []); - assert.deepEqual(out.outcomes, []); - }); - - test('runs the bot tool calls and returns assistant turn + one tool result each', async () => { - let d = deps('# Trip Planner'); - let msg = assistantMessage([ - readRealmFileCall('c1', { realm: REALM, url: FILE_URL }), - ]); - let out = await buildReadRealmFileFollowup( - msg, - classifyToolCalls(msg).botToolCalls, - d, - ); - - assert.strictEqual( - out.messages.length, - 2, - 'assistant message + one tool result', - ); - assert.strictEqual( - out.messages[0], - msg, - 'assistant turn is preserved first', - ); - assert.strictEqual((out.messages[1] as any).role, 'tool'); - assert.strictEqual((out.messages[1] as any).tool_call_id, 'c1'); - assert.strictEqual((out.messages[1] as any).content, '# Trip Planner'); - assert.strictEqual(out.outcomes.length, 1); - assert.strictEqual(out.outcomes[0].commandRequestId, 'c1'); - assert.true(out.outcomes[0].ok, 'a successful read is reported ok'); - assert.deepEqual(d.calls, [{ onBehalfOf: ON_BEHALF_OF, realm: REALM }]); - }); - - test('reports a failed outcome for malformed arguments', async () => { - let d = deps(); - let msg = assistantMessage([ - { - id: 'c1', - type: 'function', - function: { - name: READ_REALM_FILE_TOOL_NAME, - arguments: '{not json', - }, - }, - ]); - let out = await buildReadRealmFileFollowup( - msg, - classifyToolCalls(msg).botToolCalls, - d, - ); - assert.strictEqual(out.messages.length, 2); - assert.true( - (out.messages[1] as any).content.startsWith('Error:'), - 'tool result carries an error the model can read', - ); - assert.false(out.outcomes[0].ok, 'outcome is marked failed'); - }); - - test('feeds a fetch failure back as an error tool result', async () => { - let d = deps(); - d.fetch = (async () => - new Response('nope', { - status: 404, - })) as unknown as typeof globalThis.fetch; - let msg = assistantMessage([ - readRealmFileCall('c1', { realm: REALM, url: FILE_URL }), - ]); - let out = await buildReadRealmFileFollowup( - msg, - classifyToolCalls(msg).botToolCalls, - d, - ); - assert.true((out.messages[1] as any).content.includes('404')); - assert.false(out.outcomes[0].ok, 'a 404 read is reported as failed'); - }); -}); - -module('readRealmFileCommandRequests', () => { - test('expresses bot tool calls as ai-bot-executed command requests', () => { - let msg = assistantMessage([ - readRealmFileCall('c1', { realm: REALM, url: FILE_URL }), - ]); - let requests = readRealmFileCommandRequests( - classifyToolCalls(msg).botToolCalls, - ); - assert.strictEqual(requests.length, 1); - assert.strictEqual(requests[0].id, 'c1'); - assert.strictEqual(requests[0].name, READ_REALM_FILE_TOOL_NAME); - assert.strictEqual( - requests[0].executedBy, - 'ai-bot', - 'tagged so the host records but skips it', - ); - assert.strictEqual(requests[0].arguments!.realm, REALM); - assert.strictEqual(requests[0].arguments!.url, FILE_URL); - assert.strictEqual( - requests[0].arguments!.description, - 'Read file: trip-planner/SKILL.md', - 'the indicator carries a human label derived from the url', - ); - }); -}); - -module('fileReadResultContent', () => { - test('a successful read resolves its indicator to applied', () => { - let content = fileReadResultContent({ - commandRequestId: 'c1', - indicatorEventId: '$indicator:localhost', - ok: true, - agentId: 'agent-1', - }); - assert.strictEqual( - content.msgtype, - APP_BOXEL_COMMAND_RESULT_WITH_NO_OUTPUT_MSGTYPE, - ); - assert.strictEqual(content.commandRequestId, 'c1'); - assert.strictEqual(content.failureReason, undefined); - assert.deepEqual(content['m.relates_to'], { - event_id: '$indicator:localhost', - key: 'applied', - rel_type: APP_BOXEL_COMMAND_RESULT_REL_TYPE, - }); - }); - - test('a failed read resolves its indicator to invalid with the reason', () => { - let content = fileReadResultContent({ - commandRequestId: 'c1', - indicatorEventId: '$indicator:localhost', - ok: false, - failureReason: 'could not load SKILL.md (HTTP 404)', - agentId: 'agent-1', - }); - assert.strictEqual(content['m.relates_to'].key, 'invalid'); - assert.strictEqual( - content.failureReason, - 'could not load SKILL.md (HTTP 404)', - 'the reason rides along so the user sees why it failed', - ); - }); -}); diff --git a/packages/ai-bot/tests/read-realm-file-test.ts b/packages/ai-bot/tests/read-realm-file-test.ts index c943838b29b..e387328523a 100644 --- a/packages/ai-bot/tests/read-realm-file-test.ts +++ b/packages/ai-bot/tests/read-realm-file-test.ts @@ -6,6 +6,8 @@ import { DelegatedUserRealmSessionError } from '@cardstack/runtime-common/user-d import { executeReadRealmFile, readRealmFileTool, + classifyToolCalls, + fileLabelFromUrl, READ_REALM_FILE_TOOL_NAME, } from '../lib/read-realm-file.ts'; @@ -194,3 +196,67 @@ module('executeReadRealmFile', () => { assert.strictEqual(sessions.calls.length, 2, 're-minted a fresh token'); }); }); + +function assistantMessage(toolCalls: any[]): any { + return { role: 'assistant', content: null, tool_calls: toolCalls }; +} + +function fnCall(id: string, name: string, args: object = {}) { + return { + id, + type: 'function', + function: { name, arguments: JSON.stringify(args) }, + }; +} + +module('classifyToolCalls', () => { + test('splits readRealmFile (bot) from everything else (host)', () => { + let { botToolCalls, hostToolCalls } = classifyToolCalls( + assistantMessage([ + fnCall('c1', READ_REALM_FILE_TOOL_NAME, { + realm: REALM, + url: FILE_URL, + }), + fnCall('c2', 'SomeHostCommand'), + ]), + ); + assert.deepEqual( + botToolCalls.map((c) => c.id), + ['c1'], + ); + assert.deepEqual( + hostToolCalls.map((c) => c.id), + ['c2'], + 'a host command and a read coexist — neither is dropped', + ); + }); + + test('no tool calls → both sets empty', () => { + let { botToolCalls, hostToolCalls } = classifyToolCalls( + assistantMessage([]), + ); + assert.deepEqual(botToolCalls, []); + assert.deepEqual(hostToolCalls, []); + }); +}); + +module('fileLabelFromUrl', () => { + test('keeps the skill folder for a SKILL.md', () => { + assert.strictEqual( + fileLabelFromUrl(FILE_URL), + 'trip-planner/SKILL.md', + 'a skill reads as /SKILL.md', + ); + }); + + test('falls back to the file name otherwise', () => { + assert.strictEqual( + fileLabelFromUrl('https://localhost:4201/user/jane/notes.md'), + 'notes.md', + ); + }); + + test('undefined url → undefined label', () => { + assert.strictEqual(fileLabelFromUrl(undefined), undefined); + }); +}); From cb5098fd55f13d11cd8a880373519d7c9801091b Mon Sep 17 00:00:00 2001 From: Matic Jurglic Date: Tue, 30 Jun 2026 18:34:48 +0200 Subject: [PATCH 2/2] ai-bot: make the self-triggered readRealmFile continuation fire reliably The bot fulfills a readRealmFile read by posting a command-result event, which is meant to re-trigger the bot for the continuation turn. Two things prevented that continuation from running: - Fulfillment happened while the room lock was still held. The re-trigger has to acquire that lock, so it was dropped. Fulfill after the lock is released instead. - The re-trigger runs on the result event's local echo, before the homeserver has indexed it into /messages. getRoomEvents is a server fetch, so the continuation's history missed the just-posted result and the read looked unresolved (shouldRespond=false). Splice the in-hand result event into the history when the fetch didn't include it. Host command results arrive via sync (already server-side), so this only affects the bot's own results. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/ai-bot/main.ts | 75 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 9 deletions(-) diff --git a/packages/ai-bot/main.ts b/packages/ai-bot/main.ts index 23a8d5f93bd..0cca488b276 100644 --- a/packages/ai-bot/main.ts +++ b/packages/ai-bot/main.ts @@ -378,6 +378,17 @@ Common issues are: resolveGenerationCompletion = resolve; }); + // readRealmFile reads are fulfilled after the room lock is released + // (see the finally below). Fulfilling posts a command-result event that + // re-triggers the bot for the continuation turn, and that re-trigger + // has to acquire the room lock this handler holds — so fulfillment must + // wait until the lock is free. + let pendingFulfillBotToolCalls: ReturnType< + typeof classifyToolCalls + >['botToolCalls'] = []; + let pendingFulfillRequestEventId: string | undefined; + let pendingFulfillAgentId: string | undefined; + try { if (!Responder.eventMayTriggerResponse(event)) { return; // early exit for events that will not trigger a response @@ -413,6 +424,35 @@ Common issues are: return; } + // The bot drives its own continuation by posting a readRealmFile + // result event, and the handler runs on that event's *local echo* — + // before the homeserver has indexed it into /messages. getRoomEvents + // is a server fetch, so it misses the just-posted result, which would + // leave the read looking unresolved (shouldRespond=false) and stall + // the continuation. Splice the in-hand event into the history when the + // fetch didn't include it. Host command results arrive via sync + // (already server-side), so they're never missing and this is inert. + let triggerCommandRequestId = (event.getContent() as any) + ?.commandRequestId; + if ( + event.getType() === APP_BOXEL_COMMAND_RESULT_EVENT_TYPE && + triggerCommandRequestId && + !eventList.some( + (e: any) => + e.type === APP_BOXEL_COMMAND_RESULT_EVENT_TYPE && + e.content?.commandRequestId === triggerCommandRequestId, + ) + ) { + eventList.push({ + type: event.getType(), + sender: event.getSender()!, + content: event.getContent(), + event_id: event.getId()!, + origin_server_ts: event.event.origin_server_ts!, + room_id: room.roomId, + } as unknown as DiscreteMatrixEvent); + } + // Return early here if it's a debug event if (isRecognisedDebugCommand(eventBody)) { return await assistant.handleDebugCommands( @@ -630,15 +670,13 @@ Common issues are: botToolCalls.length > 0 && responder.responseEventId ) { - await fulfillReadRealmFileCalls(botToolCalls, { - client, - roomId: room.roomId, - requestEventId: responder.responseEventId, - agentId, - onBehalfOf: senderMatrixUserId, - delegatedUserRealmSessions: - assistant.delegatedUserRealmSessions, - }); + // Defer fulfillment until after the room lock is released + // (see the finally below): fulfilling posts a result event + // that re-triggers the bot, and that re-trigger needs the + // room lock this handler still holds here. + pendingFulfillBotToolCalls = botToolCalls; + pendingFulfillRequestEventId = responder.responseEventId; + pendingFulfillAgentId = agentId; } } catch (error) { // When the cancel handler aborts the runner, @@ -750,6 +788,25 @@ Common issues are: // lock as released before attempting to acquire it. await releaseRoomLock(assistant.pgAdapter, room.roomId); resolveGenerationCompletion(); + + // Now that the lock is free, fulfill any reads. Each fulfillment + // posts a command-result event that re-triggers the bot for the + // continuation turn; that re-trigger acquires the room lock this + // handler just released. Fulfilling here (rather than inside the + // lock) is what lets the continuation proceed. + if ( + pendingFulfillRequestEventId && + pendingFulfillBotToolCalls.length > 0 + ) { + await fulfillReadRealmFileCalls(pendingFulfillBotToolCalls, { + client, + roomId: room.roomId, + requestEventId: pendingFulfillRequestEventId, + agentId: pendingFulfillAgentId, + onBehalfOf: senderMatrixUserId, + delegatedUserRealmSessions: assistant.delegatedUserRealmSessions, + }); + } } } catch (e) { log.error(e);