diff --git a/AGENT_APPLICATIONS_PLAN.md b/AGENT_APPLICATIONS_PLAN.md new file mode 100644 index 0000000000..90755f2680 --- /dev/null +++ b/AGENT_APPLICATIONS_PLAN.md @@ -0,0 +1,367 @@ +# Agent Applications — port plan & status + +> **TEMPORARY DOC — remove before merge.** This is the working plan for the +> agent-console → posthog/code port (PR #2700). It exists so the plan travels +> with the branch across sessions. Delete it before this PR is merged. + +## Goal + +Port the **agent-console** (deployed "agent applications") from +posthog/posthog's `ass` branch into posthog/code, surfaced under +`/code/agents`. The aim is **functional parity** with the console — every core +capability an operator has in the console should exist in code. We are **not** +porting the console's code; we re-implement each feature in code's layered +architecture and visual style. Where it makes sense, deployed-agent +conversations render through code's **native `ConversationView`** via an +SSE→ACP adapter, so deployed-agent chat looks and behaves like a local session. + +The console source of truth lives at +`products/agent_platform/services/agent-console` (Next.js) on the `ass` branch; +the Django API it consumes lives at `products/agent_platform/backend`. + +## Information architecture + +`/code/agents` is a two-tab surface (shared chrome: `AgentsTabLayout`): + +- **Scouts** (`/code/agents/scouts`) — the existing scheduled-agent / + self-driving configuration (`ConfigureAgentsSection`). Unchanged in content. +- **Applications** (`/code/agents/applications`) — deployed agent-platform + applications. This is where the entire console surface lands. + +`/code/agents` redirects to the Scouts tab. The tab bar lives only on the two +list views; detail pages (a scout, an agent, a session) keep their own focused +chrome. + +### Applications-tab IA (target) + +The console had top-level surfaces + per-agent sub-tabs. In code they all sit +under the Applications tab: + +**Applications landing / fleet surfaces** +- **Fleet overview** — stat strip (agents / live now / sessions·24h / spend·24h + / failures / pending approvals), **live-now panel** (cross-agent in-flight + sessions), recent activity, quick links. +- **Agent list** — all deployed agents with per-agent inline stats; filter by + All / Live / Drafts / Archived. +- **Global approvals queue** — fleet-wide inbox of approval-gated tool calls. +- **Fleet analytics** — cross-agent `$ai_*` observability dashboard. + +> Billing (AI-gateway wallet / ledger) and the Registry (skills / custom tools +> / native-tool catalog) are **out of scope here** — owned elsewhere. + +**Per-agent detail sub-tabs** (`/applications/{slug}/…`) +- **Overview** — status, triggers, live revision, recent activity, observability + links. +- **Sessions** — filterable session history → session detail (Conversation + + **Logs** panes). +- **Approvals** — this agent's approval queue. +- **Configuration** — spec explorer (model / instructions / triggers / tools / + skills / mcps / integrations / limits) + bundle file viewer + revisions. +- **Secrets** — encrypted env management. +- **Memory** — S3-backed file store + tables. +- **Connections** — Slack setup + integrations. +- **Observability** — per-agent observability **summary page**. + +## Architecture decisions + +- **Authoring is the Agent Builder's job; this surface renders.** Agents are + created and edited only through the Agent Builder (a meta-agent you chat + with), never through hand-built config forms. So the Applications surface is + **render-first**: it shows how an agent is configured (spec, revisions, + secrets, memory, connections) and exposes only **operational** mutations — + enable/disable an agent, and promote / freeze / archive a revision. There is + **no** spec editor, bundle file editor, trigger-config form, or secret-value + editor. (This retires most of the old M12 "authoring" milestone.) +- **Functional parity, not code reuse.** Re-implement each console feature to + code's conventions (Inversify services, `useAuthenticatedQuery` hooks, + Tailwind/Radix, `@posthog/quill`). The console's React is a behavioral spec, + not a source to copy. +- **Wire types** live in `@posthog/shared/agent-platform-types` (plain TS, + snake_case, no Zod — matches `inbox-types.ts`). Already covers applications, + stats, sessions, approvals, revisions, fleet, SSE events. Still **missing** + types for: memory files/tables, env keys, bundle/manifest, analytics + rollups, system-prompt render, validation, preview tokens. +- **Reads** live on `PostHogAPIClient` (raw fetcher + cast, mirroring the + signals methods). UI calls via `useAuthenticatedQuery` hooks. The read methods + for apps / stats / sessions / approvals / revisions / fleet already exist. +- **Mutations & lifecycle** (approve/reject, freeze/promote/archive/rollback, + set_env, memory CRUD, file writes, cron-fire, preview) are **not** trivial + passthroughs — they carry orchestration, optimistic state and cross-resource + effects (e.g. promote rewrites `live_revision`). These cross the threshold + into a **`@posthog/core` AgentApplications service** rather than living as raw + client methods. Decide per-feature; flag in each milestone. +- **SSE→ACP mappers** are pure, unit-tested modules in + `packages/ui/src/features/agent-applications/chat/`. They translate each + agent_platform event into the equivalent ACP JSON-RPC message and let code's + existing `buildConversationItems` do the reduction — we do **not** + re-implement the console's `runnerReducer`. +- **Analytics** read the team's own PostHog project `$ai_*` events via the + `/query/` HogQL endpoint on `PostHogAPIClient`, not a bespoke service. +- Backend routes (`GET /api/projects/{teamId}/agent_applications/…`, + `/agent_fleet/…`) exist only on posthog/posthog's `ass` branch, not + production Cloud yet. + +## Feature parity map + +Status legend: **✅ done** · **🟡 API-ready** (client method/types exist, UI not +built) · **⬜ missing** (needs types + client + UI) · **🔴 live** (needs SSE +transport, blocked on the M-Live open question). + +| # | Feature | What it does | Backend | Status | +|---|---------|--------------|---------|--------| +| **Browsing & monitoring** |||| +| 1 | Fleet overview / stat strip | Aggregate KPIs across all agents | `/query/` HogQL `$ai_*` | ✅ (analytics KPIs: spend/sessions/failure/p95 + trends + WoW deltas on the Applications overview; operational live-now/approvals counts dropped — see M6) | +| 2 | Live-now panel | Cross-agent in-flight sessions, live state dots | `agent_fleet/live_sessions/` | ✅ (Applications-landing panel; see M6) | +| 3 | Agent list + filters | All agents, per-agent inline stats, All/Live/Drafts/Archived | `agent_applications/`, `/query/` | ✅ list + inline stats / 🟡 filters | +| 4 | Per-agent overview | Status, triggers, live revision, recent activity | `agent_applications/{slug}/` | ✅ | +| 5 | Session list + filters | History; filter by state / revision / date; pagination | `…/sessions/?state=&revision_id=&…` | ✅ (state filter + load-more; date/revision filters pending) | +| 6 | Session transcript | Stored transcript via native `ConversationView` | (mapper) | ✅ | +| 7 | Session KPI strip + "fired by" | Per-session messages/tools/cost/duration/errors; cron badge | (in session detail) | ✅ | +| 8 | **Session logs pane** | Structured log viewer, level filter + search | `…/sessions/{id}/logs/` | ✅ | +| **Approvals** |||| +| 9 | Per-agent approvals queue | List approval-gated tool calls, filter by state | `…/approvals/` | ✅ | +| 10 | Global approvals queue | Fleet-wide approval inbox | `agent_fleet/approvals/` | ✅ (master/detail at `/code/agents/applications/approvals`; see M6) | +| 11 | Approval detail + decide | Reasoning snapshot, proposed args, approve/reject + edit args + reason; embedded session | `…/approvals/{id}/decide/` | ✅ (master/detail) | +| **Configuration & authoring** |||| +| 12 | Spec explorer | Filesystem-style view of model/triggers/tools/skills/mcps/integrations/secrets/limits | revision `spec` JSONB | ✅ | +| 13 | Bundle file viewer | Tree + read file (markdown/code/json), via reusable `FileExplorer` | `…/revisions/{id}/bundle/` | ✅ | +| 14 | Revision list + lifecycle | picker (drives explorer) + freeze(ready) → promote(live) → archive | `…/revisions/`, `/freeze/`, `/promote/`, `/archive/` | ✅ | +| 15 | Spec editing + validate | Edit spec on a draft, validate, render system prompt | `…/revisions/{id}/` PATCH | ~~retired~~ (Agent Builder authors) | +| 16 | Bundle file editing | Write/delete files, bulk bundle upload | `…/revisions/{id}/file/` PUT/DELETE | ~~retired~~ (Agent Builder authors) | +| 17 | Trigger config | chat / webhook / mcp / slack / cron + auth modes + endpoints/usage | revision `spec` | ✅ (view; editing retired) | +| 18 | Cron fire (run-now) | Manually fire a cron out-of-band to test → jump to session | `…/revisions/{id}/cron/fire/` | ✅ | +| **Secrets & env** |||| +| 19 | Env / secrets management | List keys, set/rotate/clear per key (guarded), discover required secrets from spec | `…/env_keys/…` | ✅ | +| **Memory** |||| +| 20 | Memory file store | Tree + read file (markdown); create/update/delete | `…/memory/files/`, `/tree/`, `/by_path/` | 🟡 (read in progress; edit deferred) | +| 21 | Memory search | BM25 full-text search (FileExplorer search mode) | `…/memory/search/?q=` | 🟡 (in progress) | +| 22 | Memory tables | List tables + read rows | `…/memory/tables/…` | 🟡 (in progress) | +| **Connections** |||| +| 23 | Slack setup | Derived Slack app manifest + request URLs (under the slack trigger) | `…/revisions/{id}/slack_manifest/` | ✅ | +| 24 | Integrations | PostHog integrations attached to an agent | (spec `integrations` + integ API) | ⬜ (view stub) | +| **Observability & analytics** |||| +| 25 | Per-agent observability summary | Rollup: spend, sessions, failure rate, p95 (+ trends/deltas), cost-by-model, tool reliability for this agent | `/query/` HogQL `$ai_*` | ✅ (Observability tab + KPIs on the Overview tab) | +| 26 | Fleet analytics dashboard | Cross-agent KPIs + WoW deltas, spend/cost, tool reliability | `/query/` HogQL `$ai_*` | ✅ (blended into the Applications overview: KPI strip + per-agent row stats; cost-by-model + tool reliability on the per-agent tab) | +| **Live & interactive** |||| +| 27 | Live chat / streaming | SSE transport → ACP; send message; cancel; new/resume chats | ingress `/agents/{slug}/run\|send\|listen\|cancel` | ✅ (per-agent **Chat** preview tab — region-derived ingress, optimistic send, info banner, local recent-chats rail with transcript-rebuilding resume; commit `c0688cfa`) | +| 28 | In-chat approvals | ACP tool-call permission prompts during a live turn | ingress + approvals | ✅ (inline card below the conversation in the chat preview; reuses the M5 decide path; see M-Live-InChat) | +| 29 | Draft preview | Run a non-live draft revision live before promoting | `…/preview-proxy/…`, `/preview_token/` | 🔴 | +| 30 | Agent Builder / "edit with AI" | Always-on dock chat with `agent-concierge` that drives UI (`focus_*`) + secrets (`set_secret`) + staged authoring; seed prompts from inline buttons | ingress + client tools | ✅ (global dock, page-context envelope + `get_context`, `focus_*` navigation, `set_secret` punch-out, edit-with-AI seeds; see M-Agent-Builder) | + +> **Out of scope (owned elsewhere):** billing (AI-gateway wallet + ledger) and +> the registry (native tools / skill templates / custom tool templates). + +## Milestones + +### Done + +- [x] **M1** — IA scaffold: feature dir, routes, surfaced under `/code/agents`. +- [x] **M2** — read surface: shared types, `PostHogAPIClient` methods (apps / + sessions / approvals / revisions / fleet), query hooks, list + per-agent + detail views. +- [x] **M3** — SSE→ACP chat adapter: + - [x] M3a — typed `AgentSessionEvent` SSE union + transcript content types. + - [x] M3b — pure mappers (stored transcript + live SSE) → `AcpMessage[]`, + 18 unit tests. + - [x] M3c — stored session transcripts render through `ConversationView`. +- [x] **Tabs** — split Agents into Scouts + Applications tabs. + +### Done this session + +- [x] **M4 — Sessions & logs** (features 5, 7, 8) — filterable session list, + session-detail KPI strip + "fired by" cron badge, structured Logs pane. + Commit `0f15929f` (incl. the latent empty-conversation render fix). +- [x] **M5 — Approvals** (features 9, 11) — per-agent approvals queue, decide + (approve/reject + edited args + reason). Commit `a2fa9115`, then reworked into + **master/detail with embedded session + refresh controls** (`a376b61b`). The + fleet-wide global approvals queue (feature 10) shipped later in M6 (`5bfeccef`). +- [x] **M7 — Observability** (features 25, 26; also 1, 3) — agent observability + over the team's own `$ai_*` events via a `getAgentAnalytics()` HogQL rollup on + `PostHogAPIClient` (5 parallel `/query/` panels, pure unit-tested shaping in + `agent-analytics.ts`). Surfaced as: the **Applications overview** (KPI strip — + spend/sessions/failure/p95 with 14-day sparkline trends + WoW deltas — blended + on top of the agent list, with per-agent rollups merged into each row); the + per-agent **Overview** tab (same KPI strip + link to the Observability tab); + and a new per-agent **Observability** tab (KPIs + cost-by-model + tool + reliability). Each surface has a small "Open in AI observability" deep link. + Data layer committed in `aed89291`; UI in `0856ea5f`. **Design note:** there is + no separate fleet-analytics page — analytics is blended into the overview and + the per-agent tabs. This replaced the old operational fleet/agent stat strips + (live-now + pending-approvals counts) — those returned in M6 (`5bfeccef`). +- [x] **Configuration explorer** (features 12, 13, 14, 17, 18, 23 + M8/M9) — + full-bleed filesystem explorer on a reusable `FileExplorer` primitive: a tree + (instructions · model · triggers · secrets · skills · tools · mcps · + integrations · limits) + per-node detail panes + bundle viewer + (markdown/code/json), selection in `?node=`. Stage A shell `1e5c1b91`; Stage B + trigger richness — auth modes/blurbs, public warning, **Slack setup card**, + trigger endpoints + curl/MCP usage, **cron "Run now"**, missing-secret + warnings, MCP tools grid — `aed89291`; **M9 revision bar** (picker drives the + explorer via `?revision=` + freeze/promote/archive behind confirms) `55dbb1b8`. +- [x] **M10 — Secrets** (feature 19) — set/rotate/**guarded clear** inline in the + secret detail (`env_keys` PUT/DELETE), status flips across the tree on success; + a set secret hides its input behind Rotate and Clear is a two-step confirm. + Commit `32e8749d`. +- [x] **M11 — Memory** (features 20, 21, 22) — Memory tab on the reusable + `FileExplorer`: folder tree + read file (markdown, with description/tags), a + Files/Tables toggle, BM25 search mode, and a tables view (list + row grid). + Render-only; create/update/delete deferred. Commit `22caee62`. +- [x] **M6 — Live-now & operational counts + global approvals queue** (features + 2, 10; restoration of operational counts from feature 1) — parity work that + restored the operational signal the M7 analytics KPIs displaced. Two pieces: + - **Live-now panel** on the Applications landing — a compact list of + cross-agent in-flight sessions (state badge, agent name, trigger kind, turn + count, preview, started-ago), each row linking to the per-agent session + detail. Backed by `useAgentFleetLiveSessions` (5s poll) over + `client.listAgentFleetLiveSessions`. Joins `application_id → name/slug` from + the already-cached `useAgentApplications()` query — no extra requests beyond + the panel's own. + - **Fleet approvals queue** at `/code/agents/applications/approvals` — + master/detail mirroring the per-agent `AgentApprovalsPane` but cross-agent: + each row shows the agent it belongs to, the detail pane reuses + `AgentApprovalDetail` (passing the joined slug), and the filter chips share + a single `agentApprovalsFilters.ts` source of truth between the per-agent + and fleet panes. Backed by `useAgentFleetApprovals` (10s poll) over + `client.listAgentFleetApprovals`. Standalone route with its own focused + chrome (back link + title), matching how per-agent detail pages render. + - **Operational strip** on the landing: `X live now · Y pending approvals →` + above the analytics KPI strip. `Y` deep-links to the new approvals route and + flips amber when non-zero. Counts come from the same fleet hooks the live-now + panel uses, so the strip is "free" beyond the requests already in flight. + Commit `5bfeccef`. +- [x] **M-Live (chat preview)** (feature 27) — per-agent **Chat** tab that runs a + live session against the agent's ingress and renders it through the native + `ConversationView`. **This resolved the M-Live "where does cloud-SSE transport + live" open question:** transport is a renderer-scoped UI hook (`useAgentChat`) + driving `run/send/cancel` + the `/listen` SSE loop via the api-client, mapped + to `AcpMessage[]` (`createAgentChatMapper`) into a new core `agentChatStore`; + no main-process/tRPC seam needed. Ingress is **region-derived** + (`resolveIngressBaseUrl`: dev → `localhost:3030` because the dev trycloudflare + tunnel buffers SSE; us/eu use `ingress_base_url`). QoL: `ConversationView` got + an optional `collapseMode` override (preview passes `"none"` so prose isn't + folded into a tool-call chip); the user message renders optimistically on send + (echo deduped); an info banner names the deployed revision; a local + recent-chats rail lists only chats started **here** (persisted per agent — not + the server session list), with new-chat + resume that rebuilds the transcript + from the stored session detail (`/listen` only tails, never replays) and + re-attaches the live stream for active sessions. Client tools: `toast` / + `get_context` resolve inline; `focus_*` / `set_secret` are wired by the Agent + Builder dock (`useAgentChat` takes an optional `clientTools` handler). Commit + `c0688cfa`. + +### Remaining (parity work) + +Reframed around the **render-first / Agent-Builder-authoring** principle above: +config/revisions/secrets/memory are read surfaces with only operational +controls. Ordered by core value. + +- [ ] **Enable / disable agent** — archive/unarchive the application (an + operational control deferred from M9; needs the destroy/restore endpoint). +- [ ] ~~**M12 — Spec & bundle authoring**~~ — **retired.** Spec/bundle/trigger + editing is the Agent Builder's job; the render views live in M8, operational + lifecycle in M9. + +### Deferred + +- [ ] **M13 — Connections** (feature 24) — **deferred** (legacy idea). A separate + integrations view isn't needed right now: Slack setup (feature 23) already + ships under the slack trigger, and `spec.integrations` renders in the config + explorer. +- [ ] **M-Live (remainder)** (feature 29) — the live transport itself shipped + (see "Done this session"); the **open transport question is resolved** + (renderer-hook + region-derived ingress). What's left on the live track: + - [ ] **Draft preview** (feature 29) — run a non-live **draft** revision live + before promoting, via the preview-proxy / short-lived `preview_token` + (`AgentChat` in the console mints/refreshes it on `preview_token_required`). + Lets the Agent Builder "test before promote". +- [x] **M-Live-InChat — in-chat approvals** (feature 28) — when a live turn + proposes an approval-gated tool call, the chat preview now surfaces the + decision inline as a card between the conversation and the composer, and + parks the composer until decided. Detection is poll-based (2s + `useAgentChatPendingApproval` filtered to the chat's session id), since the + agent-runner emits no SSE event for "waiting on approval" — confirmed by + reading `AgentSessionEvent` (the `waiting` event is for + `@posthog/meta-ask-for-input`, not approvals). The decide path reuses the + M5 `useDecideAgentApproval` mutation unchanged; on success the same + `agentApplicationsKeys.approvals(...)` prefix invalidation clears the + pending-card hook, and the SSE follow-up resumes the conversation + naturally. The decision form was extracted from `AgentApprovalDetail` into + a presentational `AgentApprovalDecisionForm` reused by both surfaces. Wired + into both the per-agent **Chat** tab (`AgentChatPane`) and the **Agent + Builder dock** (`AgentBuilderDock`) — same `AgentChatSurface` underneath, so + the punch-out is consistent across surfaces. +- [x] **M-Agent-Builder** (feature 30) — **shipped.** An always-on **right-hand + dock** ("Agent Builder") across all of `/code/agents` that chats with the + deployed `agent-concierge` (the meta-agent's slug is unchanged; only the + product/UI name is "Agent Builder"). It inspects/debugs agents and authors/edits + them via consent-gated **staged draft revisions** — the agent does the spec + edits server-side through its `@posthog/agent-applications-*` tools; code renders + the chat, drives the UI, and handles secrets. Reuses the live-chat stack + (`useAgentChat` / `agentChatStore` / mapper / `ConversationView` / region-derived + ingress). Lives in `features/agent-applications/agent-builder/`. All stages done: + - [x] **C1 — Dock shell.** Global resizable right rail in the `/code/agents` + layout (`AgentBuilderDockLayout`, react-resizable-panels + `autoSaveId`), + toggled via an edge affordance / hide button / **Cmd-Ctrl+I**; open + follow + mode persisted (`useAgentBuilderStore`). The core `agentChatStore` was + generalized to hold multiple chats keyed by `chatId` (dock `"agent-builder"` + + preview `"preview:"`); `useAgentChat` takes a `chatId`. Shared + `AgentChatSurface` extracted (now on the Quill `InputGroup` composer). + - [x] **C2 — Page-context registry.** `useSetAgentBuilderPage` wired in + `AgentDetailLayout`, `AgentsTabLayout`, and the session transcript; the context + is prepended as a stripped/deduped `[console-context]{…}` envelope on message + one and answers the `get_context` tool. + - [x] **C3 — `focus_*` UI-driving tools.** `useAgentBuilderClientTools` navigates + code's agent routes (`focus_tab/file/revision/spec_section/session`), gated by + the follow-mode toggle; `toast` wired. + - [x] **C4 — `set_secret` punch-out.** Interactive client tool: the server-side + tool returns `{queued, interactive}` and parks; the handler defers + (`{defer:true}`) and stores a `pendingSecret`; the dock renders + `AgentBuilderSecretForm` above the composer; on submit it `PUT`s the env key + straight to the API (raw value never reaches the agent) and posts the outcome + via `POST /send` (`sendAgentInteractiveToolResult` → `client_tool_result` + marker) to wake the parked session. Verified live: env_keys PUT 200 + the + session resumed confirming the set. + - [x] **C5 — "Edit with AI" seeds.** `EditWithAIButton` seeds the dock with a + prompt; an "Ask the agent builder about this agent" entry point on the agent + overview. (A "start fresh / continue" confirm dialog when a chat is already + active is a possible refinement — currently it sends into the active chat.) + - [ ] **Message-format deep dive (optional).** The acute issue — assistant prose + hidden inside a collapsed tool-call chip — is fixed via the `collapseMode` + override. A deeper side-by-side audit of our pi-ai conversation/part shape + vs. what `buildConversationItems` expects (turn bracketing, content-block + shapes) could still tighten `chat/conversationToAcp.ts` + `acpEnvelope.ts` + for pixel-faithful rendering. + +## What's demoable today (M1–M3 + tabs) + +Requires the app authenticated against a PostHog backend that has the +`agent_platform` app deployed (i.e. the `ass` branch). Against production Cloud +the data endpoints 404, so the data surfaces show error/empty states — the UI +shell, tabs, and navigation still work. + +With a backend that has deployed agents + sessions: + +- `/code/agents` → **Scouts / Applications tabs**. +- **Applications tab**: observability KPIs (spend / sessions / failure rate / + p95 with 14-day sparkline trends + WoW deltas) blended on top of the agent + list, with per-agent rollups on each row. +- **Per-agent detail**: **Overview** (observability KPIs + recent sessions) and + an **Observability** tab (KPIs + cost-by-model + tool reliability), each with + an "Open in AI observability" deep link. +- **Session transcript**: a stored session rendered read-only through code's + native chat UI (streaming text, thinking, tool calls + results). +- **Chat preview** (chat-trigger agents): the per-agent **Chat** tab runs a live + session against the agent's ingress — send/cancel, optimistic echo, a + recent-chats rail with resume. (Try it against `agent-approval-demo`.) +- **Operational strip + live-now panel** on the landing: "X live now · Y pending + approvals →"; click pending → fleet-wide approvals master/detail at + `/code/agents/applications/approvals`. Live-now lists cross-agent in-flight + sessions and links each row to the per-agent session detail. + +- **In-chat approvals** in the per-agent Chat tab: when the agent proposes an + approval-gated tool call, an inline card appears below the conversation with + the proposed args, approve / reject controls (with optional "approve with + edits" and a reason), and a deep link to the full Approvals tab. The composer + parks until decided; the SSE follow-up resumes the chat naturally. + +Not yet built: everything in the parity map still marked 🟡 / ⬜ / 🔴 — draft +preview (feature 29). Authoring stays the Agent Builder's job. diff --git a/apps/code/src/main/di/bindings.ts b/apps/code/src/main/di/bindings.ts index 0ad4eccdd8..90b3b39cca 100644 --- a/apps/code/src/main/di/bindings.ts +++ b/apps/code/src/main/di/bindings.ts @@ -45,7 +45,9 @@ import type { SLACK_INTEGRATION_SERVICE, } from "@posthog/core/integrations/identifiers"; import type { SlackIntegrationService } from "@posthog/core/integrations/slack"; +import type { ApprovalLinkService } from "@posthog/core/links/approval-link"; import type { + APPROVAL_LINK_SERVICE, INBOX_LINK_SERVICE, NEW_TASK_LINK_SERVICE, SCOUT_LINK_SERVICE, @@ -234,6 +236,7 @@ import type { WorkspaceServerService } from "../services/workspace-server/servic import type { rendererStore } from "../utils/store"; import type { APP_LIFECYCLE_SERVICE as MAIN_APP_LIFECYCLE_SERVICE, + APPROVAL_LINK_SERVICE as MAIN_APPROVAL_LINK_SERVICE, ARCHIVE_REPOSITORY as MAIN_ARCHIVE_REPOSITORY, AUTH_PREFERENCE_REPOSITORY as MAIN_AUTH_PREFERENCE_REPOSITORY, AUTH_SERVICE as MAIN_AUTH_SERVICE, @@ -401,10 +404,12 @@ export interface MainBindings { [MAIN_INBOX_LINK_SERVICE]: InboxLinkService; [MAIN_SCOUT_LINK_SERVICE]: ScoutLinkService; [MAIN_NEW_TASK_LINK_SERVICE]: NewTaskLinkService; + [MAIN_APPROVAL_LINK_SERVICE]: ApprovalLinkService; [TASK_LINK_SERVICE]: TaskLinkService; [INBOX_LINK_SERVICE]: InboxLinkService; [SCOUT_LINK_SERVICE]: ScoutLinkService; [NEW_TASK_LINK_SERVICE]: NewTaskLinkService; + [APPROVAL_LINK_SERVICE]: ApprovalLinkService; // Watcher registry [MAIN_WATCHER_REGISTRY_SERVICE]: WatcherRegistryService; diff --git a/apps/code/src/main/di/container.ts b/apps/code/src/main/di/container.ts index b1bc3cfaae..224454a293 100644 --- a/apps/code/src/main/di/container.ts +++ b/apps/code/src/main/di/container.ts @@ -47,7 +47,9 @@ import { GIT_DIFF_SOURCE } from "@posthog/core/git-pr/identifiers"; import { handoffModule } from "@posthog/core/handoff/handoff.module"; import { HANDOFF_HOST } from "@posthog/core/handoff/identifiers"; import { integrationsModule } from "@posthog/core/integrations/integrations.module"; +import { ApprovalLinkService } from "@posthog/core/links/approval-link"; import { + APPROVAL_LINK_SERVICE, INBOX_LINK_SERVICE, NEW_TASK_LINK_SERVICE, SCOUT_LINK_SERVICE, @@ -246,6 +248,7 @@ import { rendererStore } from "../utils/store"; import type { MainBindings } from "./bindings"; import { APP_LIFECYCLE_SERVICE as MAIN_APP_LIFECYCLE_SERVICE, + APPROVAL_LINK_SERVICE as MAIN_APPROVAL_LINK_SERVICE, ARCHIVE_REPOSITORY as MAIN_ARCHIVE_REPOSITORY, AUTH_PREFERENCE_REPOSITORY as MAIN_AUTH_PREFERENCE_REPOSITORY, AUTH_SERVICE as MAIN_AUTH_SERVICE, @@ -620,6 +623,10 @@ container.bind(MAIN_SCOUT_LINK_SERVICE).to(ScoutLinkService); container.bind(SCOUT_LINK_SERVICE).toService(MAIN_TOKENS.ScoutLinkService); container.bind(MAIN_NEW_TASK_LINK_SERVICE).to(NewTaskLinkService); container.bind(NEW_TASK_LINK_SERVICE).toService(MAIN_TOKENS.NewTaskLinkService); +container.bind(MAIN_APPROVAL_LINK_SERVICE).to(ApprovalLinkService); +container + .bind(APPROVAL_LINK_SERVICE) + .toService(MAIN_TOKENS.ApprovalLinkService); container.load(watcherRegistryModule); container .bind(MAIN_WATCHER_REGISTRY_SERVICE) diff --git a/apps/code/src/main/di/tokens.ts b/apps/code/src/main/di/tokens.ts index 80f0e6c773..e5de0d2eec 100644 --- a/apps/code/src/main/di/tokens.ts +++ b/apps/code/src/main/di/tokens.ts @@ -103,6 +103,9 @@ export const SCOUT_LINK_SERVICE = Symbol.for( export const NEW_TASK_LINK_SERVICE = Symbol.for( "posthog.host.main.new-task-link.service", ); +export const APPROVAL_LINK_SERVICE = Symbol.for( + "posthog.host.main.approval-link.service", +); export const WATCHER_REGISTRY_SERVICE = Symbol.for( "posthog.host.main.watcher-registry.service", ); @@ -155,6 +158,7 @@ export const MAIN_TOKENS = Object.freeze({ InboxLinkService: INBOX_LINK_SERVICE, ScoutLinkService: SCOUT_LINK_SERVICE, NewTaskLinkService: NEW_TASK_LINK_SERVICE, + ApprovalLinkService: APPROVAL_LINK_SERVICE, WatcherRegistryService: WATCHER_REGISTRY_SERVICE, ProvisioningService: PROVISIONING_SERVICE, WorkspaceService: WORKSPACE_SERVICE, diff --git a/apps/code/src/main/index.ts b/apps/code/src/main/index.ts index 4530152d9e..702ec0b536 100644 --- a/apps/code/src/main/index.ts +++ b/apps/code/src/main/index.ts @@ -22,6 +22,7 @@ import { SLACK_INTEGRATION_SERVICE, } from "@posthog/core/integrations/identifiers"; import type { SlackIntegrationService } from "@posthog/core/integrations/slack"; +import type { ApprovalLinkService } from "@posthog/core/links/approval-link"; import type { InboxLinkService } from "@posthog/core/links/inbox-link"; import type { NewTaskLinkService } from "@posthog/core/links/new-task-link"; import type { ScoutLinkService } from "@posthog/core/links/scout-link"; @@ -226,6 +227,7 @@ async function initializeServices(): Promise { container.get(MAIN_TOKENS.InboxLinkService); container.get(MAIN_TOKENS.ScoutLinkService); container.get(MAIN_TOKENS.NewTaskLinkService); + container.get(MAIN_TOKENS.ApprovalLinkService); container.get(GITHUB_INTEGRATION_SERVICE); container.get(SLACK_INTEGRATION_SERVICE); container.get(MAIN_TOKENS.ExternalAppsService); diff --git a/docs/DEEP-LINKS.md b/docs/DEEP-LINKS.md index bf5aea1999..0e145f19ff 100644 --- a/docs/DEEP-LINKS.md +++ b/docs/DEEP-LINKS.md @@ -10,7 +10,7 @@ PostHog Code registers custom URL schemes so the desktop app can be opened with | Development | `posthog-code-dev://` | | Legacy (production only) | `twig://`, `array://` | -All schemes route through the same dispatcher. The host portion of the URL selects the handler (`task`, `inbox`, `scout`, `new`, `plan`, `issue`, `callback`, `integration`, `slack-integration`, `mcp-oauth-complete`). +All schemes route through the same dispatcher. The host portion of the URL selects the handler (`task`, `inbox`, `scout`, `approval`, `new`, `plan`, `issue`, `callback`, `integration`, `slack-integration`, `mcp-oauth-complete`). If the app is not running, the OS launches it and the link is queued until the renderer is ready. If the app is minimised, it is restored and focused before the link is handled. @@ -116,6 +116,20 @@ posthog-code://scout/error-tracking posthog-code://scout/error-tracking?finding=abc123 ``` +### `posthog-code://approval/` + +Open the agent fleet approvals inbox focused on a specific tool-approval request. +Emitted by the agent-runner on a gated tool call so non-PostHog-Code clients +(Slack, MCP) can land on the approval; the request id alone resolves it. + +| Segment / Parameter | Required | Description | +|---|---|---| +| `` | Yes | Agent tool-approval request id (e.g. `ar_...`). | + +``` +posthog-code://approval/ar_abc123 +``` + ## OAuth callback links These are issued by external services and consumed by the app. You should not need to construct them yourself, but they are documented for completeness. @@ -180,6 +194,7 @@ In development the same payload is delivered to `http://localhost:8238/mcp-oauth | `task` | [packages/core/src/links/task-link.ts](../packages/core/src/links/task-link.ts) | | `inbox` | [packages/core/src/links/inbox-link.ts](../packages/core/src/links/inbox-link.ts) | | `scout` | [packages/core/src/links/scout-link.ts](../packages/core/src/links/scout-link.ts) | +| `approval` | [packages/core/src/links/approval-link.ts](../packages/core/src/links/approval-link.ts) | | `new`, `plan`, `issue` | [packages/core/src/links/new-task-link.ts](../packages/core/src/links/new-task-link.ts) | | `callback` | [packages/core/src/oauth/oauth.ts](../packages/core/src/oauth/oauth.ts) | | `integration` | [packages/core/src/integrations/github.ts](../packages/core/src/integrations/github.ts) | diff --git a/docs/LOCAL-DEVELOPMENT.md b/docs/LOCAL-DEVELOPMENT.md index 15b4f94dc6..f8dfbd7972 100644 --- a/docs/LOCAL-DEVELOPMENT.md +++ b/docs/LOCAL-DEVELOPMENT.md @@ -95,6 +95,35 @@ Open devtools in the dev build and type: Source: `apps/code/src/renderer/features/inbox/devtools/inboxDemoConsole.ts`. +## Feature flags in local dev + +Feature flags are read through posthog-js, configured by the `VITE_POSTHOG_*` +vars in `.env`. By default these point at PostHog's internal analytics instance, +so flags you create locally never resolve in the dev build (and flag-gated UI — +e.g. the agent-platform surface behind the `agent-platform` flag — stays hidden). + +To point the flags/analytics client at your local PostHog so locally-synced +flags take effect: + +```bash +# In your PostHog repo: create + enable all frontend-defined flags locally +python manage.py sync_feature_flags + +# In this repo: rewrite VITE_POSTHOG_* to your local instance, then restart dev +pnpm posthog:local +pnpm dev +``` + +`pnpm posthog:local` auto-reads the project API key from a sibling `../posthog` +checkout (or pass it: `pnpm posthog:local phc_xxx`, or set `POSTHOG_DIR`). This +only affects the analytics/flags client — the data API still uses the **Dev** +region you pick at login. + +> One-off override without changing `.env`: the dev build exposes the client on +> `window.posthog`, so you can run +> `posthog.featureFlags.override({ "agent-platform": true })` in the renderer +> console (clear with `posthog.featureFlags.override(false)`). + ## Troubleshooting ### "Invalid client_id" error during OAuth diff --git a/package.json b/package.json index 3cc027e2f1..eb5dea2f51 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "dev:git": "pnpm --filter @posthog/git dev", "dev:code": "pnpm --filter code start", "app:cdp": "node scripts/electron-cdp.mjs", + "posthog:local": "node scripts/use-local-posthog.mjs", "build": "turbo build", "build:deps": "turbo build --filter=@posthog/code^...", "package": "turbo build && pnpm --filter code package", diff --git a/packages/api-client/src/agent-analytics.test.ts b/packages/api-client/src/agent-analytics.test.ts new file mode 100644 index 0000000000..e17d3b06d3 --- /dev/null +++ b/packages/api-client/src/agent-analytics.test.ts @@ -0,0 +1,155 @@ +import { describe, expect, it } from "vitest"; +import { + type AgentAnalyticsRaw, + buildAgentAnalyticsQueries, + EMPTY_AGENT_ANALYTICS, + type HogQLGrid, + shapeAgentAnalytics, +} from "./agent-analytics"; + +const grid = (results: unknown[][]): HogQLGrid => ({ results, columns: [] }); + +// A 14-day daily series where every day is identical, so prior(7) === recent(7) +// → zero deltas. Columns: [day, cost, sessions, errors, generations]. +function flatDaily(): unknown[][] { + return Array.from({ length: 14 }, (_, i) => [ + `2026-06-${String(i + 1).padStart(2, "0")}T00:00:00`, + 2, // cost + 5, // sessions + 1, // errors + 10, // generations + ]); +} + +describe("buildAgentAnalyticsQueries", () => { + it("scopes to agent-platform origin only when no application id", () => { + const q = buildAgentAnalyticsQueries(); + expect(q.kpi).toContain("$ai_origin = 'agent_platform_runner'"); + expect(q.kpi).not.toContain("$agent_application_id ="); + expect(q.kpi).toContain("event = '$ai_generation'"); + expect(q.toolErrors).toContain("event = '$ai_span'"); + }); + + it("narrows to a single application id when given", () => { + const q = buildAgentAnalyticsQueries("app-uuid-123"); + expect(q.kpi).toContain( + "properties.$agent_application_id = 'app-uuid-123'", + ); + expect(q.byModel).toContain( + "properties.$agent_application_id = 'app-uuid-123'", + ); + }); +}); + +describe("shapeAgentAnalytics", () => { + it("returns an empty board for empty grids", () => { + const out = shapeAgentAnalytics({}); + expect(out.empty).toBe(true); + expect(out.kpis).toEqual(EMPTY_AGENT_ANALYTICS.kpis); + expect(out.byAgent).toEqual([]); + expect(out.deltas).toEqual({ + spend: null, + sessions: null, + failureRatePoints: null, + }); + }); + + it("derives KPIs incl. failure rate from generations", () => { + const raw: Partial = { + // cost, sessions, errors, generations, p95 + kpi: grid([[12.5, 8, 3, 12, 4.2]]), + }; + const out = shapeAgentAnalytics(raw); + expect(out.kpis.spendUsd).toBe(12.5); + expect(out.kpis.sessions).toBe(8); + expect(out.kpis.failureRate).toBeCloseTo(3 / 12); + expect(out.kpis.p95LatencyS).toBe(4.2); + expect(out.empty).toBe(false); + }); + + it("coerces numeric strings (HogQL returns decimals as strings)", () => { + const out = shapeAgentAnalytics({ + kpi: grid([["1.50", "4", "0", "4", "2"]]), + }); + expect(out.kpis.spendUsd).toBe(1.5); + expect(out.kpis.sessions).toBe(4); + expect(out.kpis.failureRate).toBe(0); + }); + + it("builds a 14-day daily series with zero deltas for a flat trend", () => { + const out = shapeAgentAnalytics({ daily: grid(flatDaily()) }); + expect(out.daily.labels).toHaveLength(14); + expect(out.daily.spend).toHaveLength(14); + expect(out.daily.failureRate.every((r) => r === 0.1)).toBe(true); + // prior 7 === recent 7 → 0% change, and failure-rate delta is 0pp. + expect(out.deltas.spend).toBe(0); + expect(out.deltas.sessions).toBe(0); + expect(out.deltas.failureRatePoints).toBe(0); + }); + + it("computes a positive spend delta when recent exceeds prior", () => { + // 7 days at cost 1, then 7 days at cost 3 → +200%. + const days = Array.from({ length: 14 }, (_, i) => [ + `2026-06-${String(i + 1).padStart(2, "0")}T00:00:00`, + i < 7 ? 1 : 3, + 1, + 0, + 1, + ]); + const out = shapeAgentAnalytics({ daily: grid(days) }); + expect(out.deltas.spend).toBeCloseTo(200); + }); + + it("maps per-agent rows and resolves names via the id→name map", () => { + const raw: Partial = { + // agent_id, sessions, generations, cost, tokens, errors, p95 + perAgent: grid([ + ["11111111-2222-3333-4444-555566667777", 5, 10, 4, 2000, 2, 1.5], + ["aaaa", 1, 4, 0.5, 100, 0, 0.2], + ]), + }; + const names = new Map([ + ["11111111-2222-3333-4444-555566667777", "Support Bot"], + ]); + const out = shapeAgentAnalytics(raw, names); + expect(out.byAgent[0]).toMatchObject({ + name: "Support Bot", + sessions: 5, + spendUsd: 4, + tokens: 2000, + p95LatencyS: 1.5, + }); + expect(out.byAgent[0].failureRate).toBeCloseTo(2 / 10); + // Unknown id falls back to a short id. + expect(out.byAgent[1].name).toBe("aaaa"); + }); + + it("maps model spend and tool error rates", () => { + const out = shapeAgentAnalytics({ + byModel: grid([["claude-opus-4-8", 9.99, 42]]), + toolErrors: grid([ + ["search", 20, 4], + ["fetch", 5, 0], + ]), + }); + expect(out.byModel[0]).toEqual({ + model: "claude-opus-4-8", + spendUsd: 9.99, + calls: 42, + }); + expect(out.toolErrors[0].errorRate).toBeCloseTo(4 / 20); + expect(out.toolErrors[1].errorRate).toBe(0); + }); + + it("ignores non-array rows defensively", () => { + const out = shapeAgentAnalytics({ + kpi: grid([[1, 1, 0, 1, 1]]), + perAgent: { + results: [null, "oops"] as unknown as unknown[][], + columns: [], + }, + }); + expect(out.byAgent).toEqual([]); + expect(out.empty).toBe(false); + }); +}); diff --git a/packages/api-client/src/agent-analytics.ts b/packages/api-client/src/agent-analytics.ts new file mode 100644 index 0000000000..7e1afdb814 --- /dev/null +++ b/packages/api-client/src/agent-analytics.ts @@ -0,0 +1,260 @@ +// Agent observability analytics — rolls up the agents' `$ai_*` AI-observability +// events (the runner captures them into the team's OWN PostHog project) into a +// cross-agent / per-agent dashboard. Read-only HogQL via the `/query/` endpoint. +// +// Everything is scoped to `$ai_origin = 'agent_platform_runner'` so a team's +// *other* LLM usage (their own posthog-ai apps) never bleeds into the agent +// view. The runner stamps these props in `agent-shared`'s `analytics-sink` +// (`$ai_origin`, `$agent_application_id`, `$ai_trace_id`, `$ai_total_cost_usd`, +// `$ai_latency` in seconds, `$ai_is_error`, `$ai_model`, token counts; tool +// spans carry `$ai_span_name`). +// +// The query builders + shaping are kept here (pure, unit-tested) so the client +// method stays a thin "fire queries, shape result" passthrough. + +import type { + AgentAnalyticsData, + AgentAnalyticsModelRow, + AgentAnalyticsToolRow, +} from "@posthog/shared/agent-platform-types"; + +/** A raw HogQL `/query/` result grid: rows of cells plus column names. */ +export interface HogQLGrid { + results: unknown[][]; + columns: string[]; +} + +/** The five panels' raw grids, keyed by panel. */ +export interface AgentAnalyticsRaw { + kpi: HogQLGrid; + daily: HogQLGrid; + perAgent: HogQLGrid; + byModel: HogQLGrid; + toolErrors: HogQLGrid; +} + +/** Only the agents' own traffic — not the team's other LLM events. */ +const AGENT_ORIGIN = "properties.$ai_origin = 'agent_platform_runner'"; + +/** + * Shared WHERE scope. `applicationId` (a trusted UUID from the agent record) + * narrows the board to a single agent for the per-agent Observability tab. + */ +function scope(applicationId?: string): string { + const agent = applicationId + ? ` AND properties.$agent_application_id = '${applicationId}'` + : ""; + return `${AGENT_ORIGIN}${agent}`; +} + +const kpiQuery = (id?: string): string => ` +SELECT + coalesce(sum(toFloat(properties.$ai_total_cost_usd)), 0) AS cost, + uniq(properties.$ai_trace_id) AS sessions, + countIf(toString(properties.$ai_is_error) = 'true') AS errors, + count() AS generations, + coalesce(quantile(0.95)(toFloat(properties.$ai_latency)), 0) AS p95 +FROM events +WHERE event = '$ai_generation' AND ${scope(id)} + AND timestamp > now() - INTERVAL 7 DAY +`; + +const dailyQuery = (id?: string): string => ` +SELECT + toStartOfDay(timestamp) AS day, + coalesce(sum(toFloat(properties.$ai_total_cost_usd)), 0) AS cost, + uniq(properties.$ai_trace_id) AS sessions, + countIf(toString(properties.$ai_is_error) = 'true') AS errors, + count() AS generations +FROM events +WHERE event = '$ai_generation' AND ${scope(id)} + AND timestamp > now() - INTERVAL 14 DAY +GROUP BY day ORDER BY day +`; + +const perAgentQuery = (id?: string): string => ` +SELECT + properties.$agent_application_id AS agent_id, + uniq(properties.$ai_trace_id) AS sessions, + count() AS generations, + coalesce(sum(toFloat(properties.$ai_total_cost_usd)), 0) AS cost, + coalesce(sum(toInt(properties.$ai_input_tokens)), 0) + + coalesce(sum(toInt(properties.$ai_output_tokens)), 0) AS tokens, + countIf(toString(properties.$ai_is_error) = 'true') AS errors, + coalesce(quantile(0.95)(toFloat(properties.$ai_latency)), 0) AS p95 +FROM events +WHERE event = '$ai_generation' AND ${scope(id)} + AND timestamp > now() - INTERVAL 7 DAY AND notEmpty(properties.$agent_application_id) +GROUP BY agent_id ORDER BY cost DESC LIMIT 50 +`; + +const byModelQuery = (id?: string): string => ` +SELECT + properties.$ai_model AS model, + coalesce(sum(toFloat(properties.$ai_total_cost_usd)), 0) AS cost, + count() AS calls +FROM events +WHERE event = '$ai_generation' AND ${scope(id)} + AND timestamp > now() - INTERVAL 7 DAY AND notEmpty(properties.$ai_model) +GROUP BY model ORDER BY cost DESC LIMIT 8 +`; + +const toolErrorsQuery = (id?: string): string => ` +SELECT + properties.$ai_span_name AS tool, + count() AS calls, + countIf(toString(properties.$ai_is_error) = 'true') AS errors +FROM events +WHERE event = '$ai_span' AND ${scope(id)} + AND timestamp > now() - INTERVAL 7 DAY AND notEmpty(properties.$ai_span_name) +GROUP BY tool ORDER BY errors DESC, calls DESC LIMIT 8 +`; + +/** + * Build the five panel queries. `applicationId` scopes them to a single agent + * (the per-agent Observability tab); omit it for the fleet-wide board. + */ +export function buildAgentAnalyticsQueries(applicationId?: string): { + kpi: string; + daily: string; + perAgent: string; + byModel: string; + toolErrors: string; +} { + return { + kpi: kpiQuery(applicationId), + daily: dailyQuery(applicationId), + perAgent: perAgentQuery(applicationId), + byModel: byModelQuery(applicationId), + toolErrors: toolErrorsQuery(applicationId), + }; +} + +const EMPTY_GRID: HogQLGrid = { results: [], columns: [] }; + +/** Zeroed placeholder rendered while the first load is in flight. */ +export const EMPTY_AGENT_ANALYTICS: AgentAnalyticsData = { + kpis: { spendUsd: 0, sessions: 0, failureRate: 0, p95LatencyS: 0 }, + daily: { labels: [], spend: [], sessions: [], failureRate: [] }, + deltas: { spend: null, sessions: null, failureRatePoints: null }, + byAgent: [], + byModel: [], + toolErrors: [], + empty: true, +}; + +function num(v: unknown): number { + const n = typeof v === "number" ? v : Number(v); + return Number.isFinite(n) ? n : 0; +} + +function pctChange(recent: number, prior: number): number | null { + if (prior <= 0) { + return null; + } + return ((recent - prior) / prior) * 100; +} + +function shortId(id: string): string { + return id.split("-").at(-1)?.slice(0, 8) ?? id.slice(0, 8); +} + +function formatDay(iso: string): string { + const d = new Date(iso); + if (Number.isNaN(d.getTime())) { + return iso.slice(5, 10); + } + return d.toLocaleDateString(undefined, { month: "short", day: "numeric" }); +} + +/** Coerce a raw HogQL grid into rows of cells, dropping non-array rows. */ +function rows(grid: HogQLGrid | undefined): unknown[][] { + return (grid?.results ?? []).filter((r): r is unknown[] => Array.isArray(r)); +} + +/** + * Fold the five raw HogQL grids into the analytics dashboard shape. Pure: the + * caller fires the queries (and resolves `nameById` from the agent list). + */ +export function shapeAgentAnalytics( + raw: Partial, + nameById: Map = new Map(), +): AgentAnalyticsData { + // KPIs (single row): cost, sessions, errors, generations, p95 + const k = rows(raw.kpi)[0] ?? [0, 0, 0, 0, 0]; + const generations = num(k[3]); + const kpis = { + spendUsd: num(k[0]), + sessions: num(k[1]), + failureRate: generations > 0 ? num(k[2]) / generations : 0, + p95LatencyS: num(k[4]), + }; + + // Daily 14-day series → sparklines + prior-vs-recent deltas. + const dayRows = rows(raw.daily); + const labels = dayRows.map((r) => formatDay(String(r[0]))); + const spend = dayRows.map((r) => num(r[1])); + const sessionsByDay = dayRows.map((r) => num(r[2])); + const errorsByDay = dayRows.map((r) => num(r[3])); + const genByDay = dayRows.map((r) => num(r[4])); + const failureRate = dayRows.map((_, i) => + genByDay[i] > 0 ? errorsByDay[i] / genByDay[i] : 0, + ); + + const recent = (arr: number[]): number => + arr.slice(-7).reduce((s, v) => s + v, 0); + const prior = (arr: number[]): number => + arr.slice(0, Math.max(0, arr.length - 7)).reduce((s, v) => s + v, 0); + const recentGen = recent(genByDay); + const priorGen = prior(genByDay); + const recentRate = recentGen > 0 ? recent(errorsByDay) / recentGen : 0; + const priorRate = priorGen > 0 ? prior(errorsByDay) / priorGen : 0; + const deltas = { + spend: pctChange(recent(spend), prior(spend)), + sessions: pctChange(recent(sessionsByDay), prior(sessionsByDay)), + failureRatePoints: priorGen > 0 ? (recentRate - priorRate) * 100 : null, + }; + + const byAgent = rows(raw.perAgent).map((r) => { + const id = String(r[0]); + const gens = num(r[2]); + return { + id, + name: nameById.get(id) ?? shortId(id), + sessions: num(r[1]), + spendUsd: num(r[3]), + tokens: num(r[4]), + failureRate: gens > 0 ? num(r[5]) / gens : 0, + p95LatencyS: num(r[6]), + }; + }); + + const byModel: AgentAnalyticsModelRow[] = rows(raw.byModel).map((r) => ({ + model: String(r[0]), + spendUsd: num(r[1]), + calls: num(r[2]), + })); + + const toolErrors: AgentAnalyticsToolRow[] = rows(raw.toolErrors).map((r) => { + const calls = num(r[1]); + const errors = num(r[2]); + return { + tool: String(r[0]), + calls, + errors, + errorRate: calls > 0 ? errors / calls : 0, + }; + }); + + return { + kpis, + daily: { labels, spend, sessions: sessionsByDay, failureRate }, + deltas, + byAgent, + byModel, + toolErrors, + empty: kpis.sessions === 0 && byAgent.length === 0 && generations === 0, + }; +} + +export { EMPTY_GRID }; diff --git a/packages/api-client/src/posthog-client.ts b/packages/api-client/src/posthog-client.ts index e4e76e63eb..40d1e43da8 100644 --- a/packages/api-client/src/posthog-client.ts +++ b/packages/api-client/src/posthog-client.ts @@ -12,6 +12,28 @@ import { type DismissalReasonOptionValue, SEAT_PRODUCT_KEY, } from "@posthog/shared"; +import type { + AgentAnalyticsData, + AgentApplication, + AgentApplicationSessionDetail, + AgentApplicationSessionsListResponse, + AgentApprovalRequest, + AgentApprovalsListParams, + AgentFleetLiveSessionsResponse, + AgentMemoryFile, + AgentMemorySearchResult, + AgentMemoryTableHeader, + AgentMemoryTableRows, + AgentMemoryTreeNode, + AgentRevision, + AgentSessionEvent, + AgentSessionLogEntry, + AgentSessionLogsParams, + AgentSessionsListParams, + AgentSlackManifest, + BundleFile, + DecideApprovalRequest, +} from "@posthog/shared/agent-platform-types"; import type { ActionabilityJudgmentArtefact, AvailableSuggestedReviewer, @@ -42,6 +64,11 @@ import type { Task, TaskRun, } from "@posthog/shared/domain-types"; +import { + buildAgentAnalyticsQueries, + type HogQLGrid, + shapeAgentAnalytics, +} from "./agent-analytics"; import { buildApiFetcher } from "./fetcher"; import { createApiClient, type Schemas } from "./generated"; import type { SpendAnalysisResponse } from "./spend-analysis"; @@ -3969,4 +3996,729 @@ export class PostHogAPIClient { } return (await response.json()) as LlmSkillFile; } + + // --- Agent platform ------------------------------------------------------ + // Deployed agents (`agent_platform` Django app). These routes aren't in the + // generated OpenAPI client, so they use the raw fetcher. Applications are + // addressable by UUID or slug in the `{idOrSlug}` segment. + + private agentApplicationsPath(teamId: number): string { + return `/api/projects/${teamId}/agent_applications/`; + } + + /** Lists non-archived agent applications for the current team. */ + async listAgentApplications(): Promise { + const MAX_PAGES = 50; + const teamId = await this.getTeamId(); + const all: AgentApplication[] = []; + let urlPath = `${this.agentApplicationsPath(teamId)}?limit=100`; + for (let i = 0; i < MAX_PAGES; i++) { + const url = new URL(`${this.api.baseUrl}${urlPath}`); + const response = await this.api.fetcher.fetch({ + method: "get", + url, + path: urlPath, + }); + const page = (await response.json()) as { + results?: AgentApplication[]; + next?: string | null; + }; + all.push(...(page.results ?? [])); + if (!page.next) return all; + const nextUrl = new URL(page.next); + urlPath = `${nextUrl.pathname}${nextUrl.search}`; + } + return all; + } + + /** Fetches a single agent application by UUID or slug; null if not found. */ + async getAgentApplication( + idOrSlug: string, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/`; + const url = new URL(`${this.api.baseUrl}${path}`); + try { + const response = await this.api.fetcher.fetch({ + method: "get", + url, + path, + }); + return (await response.json()) as AgentApplication; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + if (msg.includes("[404]") || msg.includes("[403]")) { + return null; + } + throw error; + } + } + + /** Lists sessions for an application (paginated, filterable by state). */ + async listAgentApplicationSessions( + idOrSlug: string, + params?: AgentSessionsListParams, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/sessions/`; + const url = new URL(`${this.api.baseUrl}${path}`); + if (params?.limit != null) { + url.searchParams.set("limit", String(params.limit)); + } + if (params?.offset != null) { + url.searchParams.set("offset", String(params.offset)); + } + if (params?.state?.length) { + url.searchParams.set("state", params.state.join(",")); + } + if (params?.revision_id) { + url.searchParams.set("revision_id", params.revision_id); + } + if (params?.created_after) { + url.searchParams.set("created_after", params.created_after); + } + if (params?.created_before) { + url.searchParams.set("created_before", params.created_before); + } + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { + results?: AgentApplicationSessionsListResponse["results"]; + count?: number; + }; + return { + results: data.results ?? [], + count: data.count ?? data.results?.length ?? 0, + }; + } + + /** Full session detail incl. transcript; `lastN` trims to trailing messages. */ + async getAgentApplicationSession( + idOrSlug: string, + sessionId: string, + lastN?: number, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/sessions/${encodeURIComponent(sessionId)}/`; + const url = new URL(`${this.api.baseUrl}${path}`); + if (lastN != null) { + url.searchParams.set("last_n", String(lastN)); + } + try { + const response = await this.api.fetcher.fetch({ + method: "get", + url, + path, + }); + return (await response.json()) as AgentApplicationSessionDetail; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + if (msg.includes("[404]") || msg.includes("[403]")) { + return null; + } + throw error; + } + } + + /** Structured runtime logs for one session (ClickHouse log_entries). */ + async getAgentApplicationSessionLogs( + idOrSlug: string, + sessionId: string, + params?: AgentSessionLogsParams, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/sessions/${encodeURIComponent(sessionId)}/logs/`; + const url = new URL(`${this.api.baseUrl}${path}`); + if (params?.limit != null) { + url.searchParams.set("limit", String(params.limit)); + } + if (params?.level?.length) { + url.searchParams.set("level", params.level.join(",")); + } + if (params?.search) { + url.searchParams.set("search", params.search); + } + if (params?.after) { + url.searchParams.set("after", params.after); + } + if (params?.before) { + url.searchParams.set("before", params.before); + } + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { + results?: AgentSessionLogEntry[]; + }; + return data.results ?? []; + } + + /** Lists tool-approval requests for an application (team-admin only). */ + async listAgentApplicationApprovals( + idOrSlug: string, + params?: AgentApprovalsListParams, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/approvals/`; + const url = new URL(`${this.api.baseUrl}${path}`); + if (params?.state) { + url.searchParams.set("state", params.state); + } + if (params?.limit != null) { + url.searchParams.set("limit", String(params.limit)); + } + if (params?.offset != null) { + url.searchParams.set("offset", String(params.offset)); + } + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { + results?: AgentApprovalRequest[]; + }; + return data.results ?? []; + } + + /** Approve or reject a queued tool-approval request. */ + async decideAgentApproval( + idOrSlug: string, + approvalId: string, + body: DecideApprovalRequest, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/approvals/${encodeURIComponent(approvalId)}/decide/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ + method: "post", + url, + path, + overrides: { body: JSON.stringify(body) }, + }); + return (await response.json()) as AgentApprovalRequest; + } + + /** Lists revisions for an application (newest first, paginated). */ + async listAgentRevisions(idOrSlug: string): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/revisions/`; + const url = new URL(`${this.api.baseUrl}${path}?limit=100`); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { results?: AgentRevision[] }; + return data.results ?? []; + } + + /** Fetches a single revision by id; null if not found. */ + async getAgentRevision( + idOrSlug: string, + revisionId: string, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/revisions/${encodeURIComponent(revisionId)}/`; + const url = new URL(`${this.api.baseUrl}${path}`); + try { + const response = await this.api.fetcher.fetch({ + method: "get", + url, + path, + }); + return (await response.json()) as AgentRevision; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + if (msg.includes("[404]") || msg.includes("[403]")) { + return null; + } + throw error; + } + } + + /** Run a revision lifecycle transition: freeze (draft→ready), promote + * (ready→live, demoting the old live), or archive. Returns the updated revision. */ + async transitionAgentRevision( + idOrSlug: string, + revisionId: string, + action: "freeze" | "promote" | "archive", + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/revisions/${encodeURIComponent(revisionId)}/${action}/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ + method: "post", + url, + path, + }); + return (await response.json()) as AgentRevision; + } + + /** + * A revision's bundle, flattened to per-file rows. The server returns a typed + * `{ bundle: { agent_md, skills[], tools[] } }`; we expand it to the canonical + * file paths the explorer renders (agent.md, skills//SKILL.md, + * tools//source.ts, tools//schema.json). + */ + async getAgentRevisionBundle( + idOrSlug: string, + revisionId: string, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/revisions/${encodeURIComponent(revisionId)}/bundle/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { + bundle?: { + agent_md?: string; + skills?: { id: string; description?: string; body: string }[]; + tools?: { + id: string; + description?: string; + args_schema?: Record; + source: string; + }[]; + }; + }; + const bundle = data.bundle ?? {}; + const out: BundleFile[] = []; + if (bundle.agent_md !== undefined) { + out.push({ + path: "agent.md", + content: bundle.agent_md, + language: "markdown", + }); + } + for (const skill of bundle.skills ?? []) { + out.push({ + path: `skills/${skill.id}/SKILL.md`, + content: skill.body, + language: "markdown", + }); + } + for (const tool of bundle.tools ?? []) { + out.push({ + path: `tools/${tool.id}/source.ts`, + content: tool.source, + language: "typescript", + }); + out.push({ + path: `tools/${tool.id}/schema.json`, + content: JSON.stringify( + { description: tool.description, args_schema: tool.args_schema }, + null, + 2, + ), + language: "json", + }); + } + out.sort((a, b) => a.path.localeCompare(b.path)); + return out; + } + + /** + * The Slack app manifest derived from a revision's slack trigger + tools, + * plus the live Event/Interactivity request URLs and setup notes. + */ + async getAgentSlackManifest( + idOrSlug: string, + revisionId: string, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/revisions/${encodeURIComponent(revisionId)}/slack_manifest/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + return (await response.json()) as AgentSlackManifest; + } + + /** Fire a cron trigger out-of-band; returns the created session id. */ + async fireAgentCron( + idOrSlug: string, + revisionId: string, + cronName: string, + requestId?: string, + ): Promise<{ session_id: string }> { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/revisions/${encodeURIComponent(revisionId)}/cron/fire/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ + method: "post", + url, + path, + overrides: { + body: JSON.stringify({ + cron_name: cronName, + ...(requestId ? { request_id: requestId } : {}), + }), + }, + }); + return (await response.json()) as { session_id: string }; + } + + /** The names of env keys currently set on an agent (values never returned). */ + async listAgentEnvKeys(idOrSlug: string): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/env_keys/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { + keys?: string[]; + results?: string[]; + }; + return data.keys ?? data.results ?? []; + } + + /** Set or rotate one encrypted env key. The value is write-only. */ + async setAgentEnvKey( + idOrSlug: string, + key: string, + value: string, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/env_keys/${encodeURIComponent(key)}/`; + const url = new URL(`${this.api.baseUrl}${path}`); + await this.api.fetcher.fetch({ + method: "put", + url, + path, + overrides: { body: JSON.stringify({ value }) }, + }); + } + + /** Clear one encrypted env key. No-op server-side if it isn't set. */ + async clearAgentEnvKey(idOrSlug: string, key: string): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/env_keys/${encodeURIComponent(key)}/`; + const url = new URL(`${this.api.baseUrl}${path}`); + await this.api.fetcher.fetch({ method: "delete", url, path }); + } + + private agentMemoryPath(teamId: number, idOrSlug: string): string { + return `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/memory`; + } + + /** Pre-aggregated folder tree of the agent's memory store. */ + async getAgentMemoryTree(idOrSlug: string): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentMemoryPath(teamId, idOrSlug)}/tree/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { root?: AgentMemoryTreeNode }; + return data.root ?? { name: "root", type: "folder", children: [] }; + } + + /** Read one memory file (header + content). */ + async readAgentMemoryFile( + idOrSlug: string, + filePath: string, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentMemoryPath(teamId, idOrSlug)}/files/by_path/`; + const url = new URL(`${this.api.baseUrl}${path}`); + url.searchParams.set("path", filePath); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + return (await response.json()) as AgentMemoryFile; + } + + /** BM25 full-text search across the agent's memory. */ + async searchAgentMemory( + idOrSlug: string, + query: string, + limit?: number, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentMemoryPath(teamId, idOrSlug)}/search/`; + const url = new URL(`${this.api.baseUrl}${path}`); + url.searchParams.set("q", query); + if (limit != null) url.searchParams.set("limit", String(limit)); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { + results?: AgentMemorySearchResult[]; + }; + return data.results ?? []; + } + + /** List the agent's JSONL reference tables. */ + async listAgentMemoryTables( + idOrSlug: string, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentMemoryPath(teamId, idOrSlug)}/tables/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { + tables?: AgentMemoryTableHeader[]; + }; + return data.tables ?? []; + } + + /** Read rows from one memory table. */ + async readAgentMemoryTable( + idOrSlug: string, + name: string, + limit?: number, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentMemoryPath(teamId, idOrSlug)}/tables/${encodeURIComponent(name)}/`; + const url = new URL(`${this.api.baseUrl}${path}`); + if (limit != null) url.searchParams.set("limit", String(limit)); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + return (await response.json()) as AgentMemoryTableRows; + } + + // --- Live chat (agent-ingress) ------------------------------------------- + // These hit the agent's ingress host (`ingress_base_url`, which already + // includes `/agents/`), not the PostHog API. The shared fetcher + // attaches the same bearer regardless of host, so no proxy is needed (unlike + // the console, which proxied only because browser EventSource can't set + // an Authorization header — `fetch` can). + + /** Start a chat session; returns the new session id. */ + async runAgentSession( + ingressBaseUrl: string, + message: string, + ): Promise<{ session_id: string; resumed?: boolean }> { + const url = new URL(`${ingressBaseUrl.replace(/\/$/, "")}/run`); + const response = await this.api.fetcher.fetch({ + method: "post", + url, + path: url.pathname, + overrides: { body: JSON.stringify({ message }) }, + }); + return (await response.json()) as { session_id: string; resumed?: boolean }; + } + + /** Send a follow-up user message to an open session. */ + async sendAgentMessage( + ingressBaseUrl: string, + sessionId: string, + message: string, + ): Promise { + const url = new URL(`${ingressBaseUrl.replace(/\/$/, "")}/send`); + await this.api.fetcher.fetch({ + method: "post", + url, + path: url.pathname, + overrides: { body: JSON.stringify({ session_id: sessionId, message }) }, + }); + } + + /** Return a client-tool result to an open session. */ + async sendAgentClientToolResult( + ingressBaseUrl: string, + sessionId: string, + callId: string, + outcome: { result?: unknown; error?: string }, + ): Promise { + const url = new URL( + `${ingressBaseUrl.replace(/\/$/, "")}/client_tool_result`, + ); + await this.api.fetcher.fetch({ + method: "post", + url, + path: url.pathname, + overrides: { + body: JSON.stringify({ + session_id: sessionId, + call_id: callId, + ...outcome, + }), + }, + }); + } + + /** + * Return an *interactive* client-tool outcome (e.g. `set_secret`). Unlike the + * sync `/client_tool_result` path, the server-side tool returned `queued` and + * parked the session; posting the outcome via `/send` (as a `client_tool_result` + * marker) wakes it on a fresh turn. Exactly one of `result` / `error` is set. + */ + async sendAgentInteractiveToolResult( + ingressBaseUrl: string, + sessionId: string, + callId: string, + outcome: { result: Record } | { error: string }, + ): Promise { + const url = new URL(`${ingressBaseUrl.replace(/\/$/, "")}/send`); + const clientToolResult = + "error" in outcome + ? { call_id: callId, error: outcome.error } + : { call_id: callId, result: outcome.result }; + await this.api.fetcher.fetch({ + method: "post", + url, + path: url.pathname, + overrides: { + body: JSON.stringify({ + session_id: sessionId, + client_tool_result: clientToolResult, + }), + }, + }); + } + + /** Cancel an open session (terminal). */ + async cancelAgentSession( + ingressBaseUrl: string, + sessionId: string, + ): Promise { + const url = new URL(`${ingressBaseUrl.replace(/\/$/, "")}/cancel`); + await this.api.fetcher.fetch({ + method: "post", + url, + path: url.pathname, + overrides: { body: JSON.stringify({ session_id: sessionId }) }, + }); + } + + /** + * Stream a session's SSE events as an async iterator. Reads the raw response + * body and parses `text/event-stream` frames into `AgentSessionEvent`s. + */ + async *streamAgentSession( + ingressBaseUrl: string, + sessionId: string, + signal?: AbortSignal, + ): AsyncGenerator { + const url = new URL(`${ingressBaseUrl.replace(/\/$/, "")}/listen`); + url.searchParams.set("session_id", sessionId); + // NB: only `signal` in overrides. Passing `headers` here would replace the + // fetcher's Authorization header (it spreads overrides over the built + // headers), which 401s the stream. /listen streams SSE without an explicit + // Accept header. + const response = await this.api.fetcher.fetch({ + method: "get", + url, + path: url.pathname, + overrides: { signal }, + }); + if (!response.body) return; + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + // Frames are separated by a blank line. + let sep = buffer.indexOf("\n\n"); + while (sep !== -1) { + const frame = buffer.slice(0, sep); + buffer = buffer.slice(sep + 2); + const data = frame + .split("\n") + .filter((line) => line.startsWith("data:")) + .map((line) => line.slice(5).trimStart()) + .join("\n"); + if (data) { + try { + yield JSON.parse(data) as AgentSessionEvent; + } catch { + // Skip unparseable frames (keep-alives, comments). + } + } + sep = buffer.indexOf("\n\n"); + } + } + } finally { + reader.releaseLock(); + } + } + + /** Live (non-terminal) sessions across every agent on the team. */ + async listAgentFleetLiveSessions( + limit?: number, + ): Promise { + const teamId = await this.getTeamId(); + const path = `/api/projects/${teamId}/agent_fleet/live_sessions/`; + const url = new URL(`${this.api.baseUrl}${path}`); + if (limit != null) { + url.searchParams.set("limit", String(limit)); + } + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { + results?: AgentFleetLiveSessionsResponse["results"]; + }; + return { results: data.results ?? [] }; + } + + /** All tool-approval requests across the team (team-admin only). */ + async listAgentFleetApprovals( + params?: AgentApprovalsListParams, + ): Promise { + const teamId = await this.getTeamId(); + const path = `/api/projects/${teamId}/agent_fleet/approvals/`; + const url = new URL(`${this.api.baseUrl}${path}`); + if (params?.state) { + url.searchParams.set("state", params.state); + } + if (params?.agent_id) { + url.searchParams.set("agent_id", params.agent_id); + } + if (params?.limit != null) { + url.searchParams.set("limit", String(params.limit)); + } + if (params?.offset != null) { + url.searchParams.set("offset", String(params.offset)); + } + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + const data = (await response.json()) as { + results?: AgentApprovalRequest[]; + }; + return data.results ?? []; + } + + /** + * Runs a read-only HogQL query against the team's project and returns the raw + * result grid. Backs the agent observability rollups (`$ai_*` events the + * runner captures into this team's own project). The endpoint can answer 200 + * with an `error` field; that's surfaced as a throw. + */ + async runHogQLQuery(query: string): Promise { + const teamId = await this.getTeamId(); + const path = `/api/projects/${teamId}/query/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ + method: "post", + url, + path, + overrides: { + body: JSON.stringify({ query: { kind: "HogQLQuery", query } }), + }, + }); + const data = (await response.json()) as { + results?: unknown[][]; + columns?: string[]; + error?: string | null; + }; + if (data.error) { + throw new Error(data.error); + } + return { results: data.results ?? [], columns: data.columns ?? [] }; + } + + /** + * Agent observability rollup over the agents' `$ai_*` events — KPIs (spend, + * sessions, failure rate, p95), a 14-day daily trend + WoW deltas, and + * spend-by-agent / cost-by-model / tool-reliability breakdowns. Pass an + * `applicationId` (the agent's UUID) to scope it to a single agent; omit it + * for the fleet-wide board. + * + * The five panels are independent HogQL round-trips fired in parallel. The + * KPI query is the gate — a systemic failure (auth, bad query) rejects the + * whole call so the UI shows an error rather than a silently-empty board; the + * secondary panels degrade to empty individually. The fleet board also reads + * the agent list to label per-agent rows by name. + */ + async getAgentAnalytics(applicationId?: string): Promise { + const queries = buildAgentAnalyticsQueries(applicationId); + const empty: HogQLGrid = { results: [], columns: [] }; + const [agents, kpi, daily, perAgent, byModel, toolErrors] = + await Promise.all([ + applicationId + ? Promise.resolve([]) + : this.listAgentApplications().catch(() => [] as AgentApplication[]), + this.runHogQLQuery(queries.kpi), + this.runHogQLQuery(queries.daily).catch(() => empty), + this.runHogQLQuery(queries.perAgent).catch(() => empty), + this.runHogQLQuery(queries.byModel).catch(() => empty), + this.runHogQLQuery(queries.toolErrors).catch(() => empty), + ]); + const nameById = new Map(agents.map((a) => [a.id, a.name])); + return shapeAgentAnalytics( + { kpi, daily, perAgent, byModel, toolErrors }, + nameById, + ); + } } diff --git a/packages/core/src/agent-chat/agentChatStore.ts b/packages/core/src/agent-chat/agentChatStore.ts new file mode 100644 index 0000000000..9f4bbfca89 --- /dev/null +++ b/packages/core/src/agent-chat/agentChatStore.ts @@ -0,0 +1,88 @@ +import type { AcpMessage } from "@posthog/shared"; +import { createStore } from "zustand/vanilla"; + +/** + * Domain state for deployed-agent live chats. Keyed by an opaque `chatId` so + * several chats can be live at once — e.g. the always-on agent builder dock + * (`"agent-builder"`) and a per-agent preview (`"preview:"`) side by side. + * The UI hook (`useAgentChat`) owns the transport (run/send/cancel + the SSE + * loop, via the api-client) and pumps mapped `AcpMessage`s in here; components + * read one chat by id and render it through `ConversationView`. + */ + +export type AgentChatStatus = + | "idle" + | "starting" + | "streaming" + | "awaiting_input" + | "completed" + | "failed" + | "cancelled"; + +export interface AgentChatState { + /** Which agent this chat targets (slug), or null when idle. */ + agentKey: string | null; + sessionId: string | null; + status: AgentChatStatus; + /** Accumulated ACP messages (mapper output) for ConversationView. */ + messages: AcpMessage[]; + error: string | null; +} + +export const EMPTY_CHAT: AgentChatState = { + agentKey: null, + sessionId: null, + status: "idle", + messages: [], + error: null, +}; + +interface AgentChatStore { + /** All live chats, keyed by `chatId`. */ + chats: Record; + + /** Reset `chatId` for a brand-new chat against `agentKey`. */ + begin: (chatId: string, agentKey: string) => void; + setSessionId: (chatId: string, sessionId: string) => void; + setStatus: (chatId: string, status: AgentChatStatus) => void; + appendMessages: (chatId: string, messages: AcpMessage[]) => void; + setError: (chatId: string, error: string | null) => void; + reset: (chatId: string) => void; +} + +export const agentChatStore = createStore((set) => { + const patch = ( + chatId: string, + next: Partial, + ): ((s: AgentChatStore) => AgentChatStore) => { + return (s) => ({ + ...s, + chats: { + ...s.chats, + [chatId]: { ...(s.chats[chatId] ?? EMPTY_CHAT), ...next }, + }, + }); + }; + + return { + chats: {}, + begin: (chatId, agentKey) => + set(patch(chatId, { ...EMPTY_CHAT, agentKey, status: "starting" })), + setSessionId: (chatId, sessionId) => set(patch(chatId, { sessionId })), + setStatus: (chatId, status) => set(patch(chatId, { status })), + appendMessages: (chatId, messages) => + set((s) => { + if (messages.length === 0) return s; + const cur = s.chats[chatId] ?? EMPTY_CHAT; + return { + ...s, + chats: { + ...s.chats, + [chatId]: { ...cur, messages: [...cur.messages, ...messages] }, + }, + }; + }), + setError: (chatId, error) => set(patch(chatId, { error })), + reset: (chatId) => set(patch(chatId, { ...EMPTY_CHAT })), + }; +}); diff --git a/packages/core/src/auth/auth.ts b/packages/core/src/auth/auth.ts index 28045f2efe..bdfd59847e 100644 --- a/packages/core/src/auth/auth.ts +++ b/packages/core/src/auth/auth.ts @@ -555,17 +555,49 @@ export class AuthService extends TypedEventEmitter { options: TokenResponseOptions, ): Promise { const scopedOrgIds = tokenResponse.scoped_organizations ?? []; - const { accountKey, currentOrgId } = await this.fetchUserContext( - tokenResponse.access_token, - options.cloudRegion, + const { accountKey, currentOrgId, currentOrgProjects } = + await this.fetchUserContext( + tokenResponse.access_token, + options.cloudRegion, + ); + // If /api/users/@me/ already carried `organization.teams[]` for the + // current org, use that as a seed for the org map. This unblocks Local + // Development OAuth, where tokens are project-scoped and can't hit + // /api/organizations/{id}/ at all (returns 403 "API keys with scoped + // projects are only supported on project-based endpoints."), so the + // /organizations fetch in buildOrgProjectsMap would otherwise leave the + // map empty. Only seed when @me/ actually returned projects — otherwise + // we'd clobber previously-known data on refresh. + const seedMap: OrgProjectsMap = { + ...(this.session?.orgProjectsMap ?? {}), + }; + const haveCurrentOrgFromMe = !!( + currentOrgId && + currentOrgProjects && + currentOrgProjects.projects.length > 0 ); + if (haveCurrentOrgFromMe && currentOrgId && currentOrgProjects) { + seedMap[currentOrgId] = currentOrgProjects; + } + // When the backend grants no scoped orgs but @me/ already gave us the + // current org's projects, skip the org fetch entirely (the seed has what + // we need). Without @me/ projects and without scoped orgs, fall through + // to the existing "do nothing" behavior so users with genuinely no orgs + // aren't paranoid-fetched. + const orgIdsToFetch = + scopedOrgIds.length > 0 ? scopedOrgIds : haveCurrentOrgFromMe ? [] : []; const { map: orgProjectsMap, incomplete: orgProjectsIncomplete } = await this.buildOrgProjectsMap( tokenResponse.access_token, options.cloudRegion, - scopedOrgIds, - this.session?.orgProjectsMap ?? {}, + orgIdsToFetch, + seedMap, ); + // Make sure the @me/-seeded org is in the final map even when no fetch + // ran (buildOrgProjectsMap only returns entries for orgIds it processed). + if (haveCurrentOrgFromMe && currentOrgId && !orgProjectsMap[currentOrgId]) { + orgProjectsMap[currentOrgId] = seedMap[currentOrgId]; + } const lastPrefs = accountKey ? this.authPreference.get(accountKey, options.cloudRegion) : null; @@ -806,7 +838,11 @@ export class AuthService extends TypedEventEmitter { private async fetchUserContext( accessToken: string, cloudRegion: CloudRegion, - ): Promise<{ accountKey: string | null; currentOrgId: string | null }> { + ): Promise<{ + accountKey: string | null; + currentOrgId: string | null; + currentOrgProjects: OrgProjects | null; + }> { try { const response = await this.executeAuthenticatedFetch( fetch, @@ -816,14 +852,22 @@ export class AuthService extends TypedEventEmitter { ); if (!response.ok) { - return { accountKey: null, currentOrgId: null }; + return { + accountKey: null, + currentOrgId: null, + currentOrgProjects: null, + }; } const data = (await response.json().catch(() => ({}))) as { uuid?: unknown; distinct_id?: unknown; email?: unknown; - organization?: { id?: unknown } | null; + organization?: { + id?: unknown; + name?: unknown; + teams?: unknown; + } | null; }; let accountKey: string | null = null; @@ -842,10 +886,39 @@ export class AuthService extends TypedEventEmitter { const currentOrgId = typeof orgId === "string" && orgId.length > 0 ? orgId : null; - return { accountKey, currentOrgId }; + // @me/ returns the current org's `organization.teams[]` with the same + // {id, name} shape as /api/organizations/{id}/, so we can populate the + // org map from it. Lets the dev app work against project-scoped OAuth + // tokens (Local Development) where /api/organizations/{id}/ 403s. + let currentOrgProjects: OrgProjects | null = null; + if (currentOrgId) { + const teamsRaw = data.organization?.teams; + const orgName = data.organization?.name; + const projects = Array.isArray(teamsRaw) + ? teamsRaw + .map((t) => t as { id?: unknown; name?: unknown }) + .filter( + (t) => typeof t.id === "number" && typeof t.name === "string", + ) + .map((t) => ({ id: t.id as number, name: t.name as string })) + : []; + currentOrgProjects = { + orgName: + typeof orgName === "string" && orgName.length > 0 + ? orgName + : "(unknown)", + projects, + }; + } + + return { accountKey, currentOrgId, currentOrgProjects }; } catch (error) { this.logger.warn("Failed to resolve user context", { error }); - return { accountKey: null, currentOrgId: null }; + return { + accountKey: null, + currentOrgId: null, + currentOrgProjects: null, + }; } } private requireSession(): InMemorySession { diff --git a/packages/core/src/links/approval-link.test.ts b/packages/core/src/links/approval-link.test.ts new file mode 100644 index 0000000000..ed5ef370e0 --- /dev/null +++ b/packages/core/src/links/approval-link.test.ts @@ -0,0 +1,148 @@ +import type { + DeepLinkHandler, + IDeepLinkRegistry, +} from "@posthog/platform/deep-link"; +import type { IMainWindow } from "@posthog/platform/main-window"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { + ApprovalLinkEvent, + type ApprovalLinkPayload, + ApprovalLinkService, +} from "./approval-link"; + +function makeLogger() { + const logger = { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + scope: vi.fn(() => logger), + }; + return logger; +} + +function makeDeepLinkService() { + const handlers = new Map(); + const service = { + registerHandler: vi.fn((key: string, handler: DeepLinkHandler) => { + handlers.set(key, handler); + }), + trigger: (key: string, path: string, search = "") => { + const handler = handlers.get(key); + if (!handler) throw new Error(`No handler for ${key}`); + return handler(path, new URLSearchParams(search)); + }, + }; + return service as unknown as IDeepLinkRegistry & { + trigger: (key: string, path: string, search?: string) => boolean; + }; +} + +function makeMainWindow() { + return { + focus: vi.fn(), + restore: vi.fn(), + isMinimized: vi.fn().mockReturnValue(false), + } as unknown as IMainWindow & { + focus: ReturnType; + restore: ReturnType; + isMinimized: ReturnType; + }; +} + +describe("ApprovalLinkService", () => { + let deepLinkService: ReturnType; + let mainWindow: ReturnType; + let service: ApprovalLinkService; + + beforeEach(() => { + deepLinkService = makeDeepLinkService(); + mainWindow = makeMainWindow(); + service = new ApprovalLinkService( + deepLinkService, + mainWindow, + makeLogger(), + ); + }); + + it("registers an 'approval' handler on the DeepLinkService", () => { + expect(deepLinkService.registerHandler).toHaveBeenCalledWith( + "approval", + expect.any(Function), + ); + }); + + it.each<{ + name: string; + path: string; + expected: ApprovalLinkPayload; + }>([ + { + name: "emits OpenApproval with the request id", + path: "ar_abc123", + expected: { requestId: "ar_abc123" }, + }, + { + name: "takes only the first path segment as the request id", + path: "ar_abc123/extra/segments", + expected: { requestId: "ar_abc123" }, + }, + { + name: "decodes a percent-encoded request id", + path: "ar_abc%2D123", + expected: { requestId: "ar_abc-123" }, + }, + ])("$name", ({ path, expected }) => { + const listener = vi.fn(); + service.on(ApprovalLinkEvent.OpenApproval, listener); + + const result = deepLinkService.trigger("approval", path); + + expect(result).toBe(true); + expect(listener).toHaveBeenCalledWith(expected); + }); + + it("queues a pending deep link when no listener is attached", () => { + deepLinkService.trigger("approval", "ar_xyz789"); + + const pending = service.consumePendingDeepLink(); + expect(pending).toEqual({ requestId: "ar_xyz789" }); + + // Draining clears it + expect(service.consumePendingDeepLink()).toBeNull(); + }); + + it("returns false and does not emit when the path is empty", () => { + const listener = vi.fn(); + service.on(ApprovalLinkEvent.OpenApproval, listener); + + const result = deepLinkService.trigger("approval", ""); + + expect(result).toBe(false); + expect(listener).not.toHaveBeenCalled(); + }); + + it.each<{ name: string; minimized: boolean; expectRestore: boolean }>([ + { + name: "focuses the main window on link arrival", + minimized: false, + expectRestore: false, + }, + { + name: "restores then focuses the main window when it is minimized", + minimized: true, + expectRestore: true, + }, + ])("$name", ({ minimized, expectRestore }) => { + mainWindow.isMinimized.mockReturnValue(minimized); + + deepLinkService.trigger("approval", "ar_abc123"); + + expect(mainWindow.focus).toHaveBeenCalledTimes(1); + if (expectRestore) { + expect(mainWindow.restore).toHaveBeenCalledTimes(1); + } else { + expect(mainWindow.restore).not.toHaveBeenCalled(); + } + }); +}); diff --git a/packages/core/src/links/approval-link.ts b/packages/core/src/links/approval-link.ts new file mode 100644 index 0000000000..7cbf38b567 --- /dev/null +++ b/packages/core/src/links/approval-link.ts @@ -0,0 +1,108 @@ +import { ROOT_LOGGER, type RootLogger } from "@posthog/di/logger"; +import { + DEEP_LINK_SERVICE, + type IDeepLinkRegistry, +} from "@posthog/platform/deep-link"; +import { + type IMainWindow, + MAIN_WINDOW_SERVICE, +} from "@posthog/platform/main-window"; +import { TypedEventEmitter } from "@posthog/shared"; +import { inject, injectable } from "inversify"; +import type { LinkLogger } from "./identifiers"; + +export const ApprovalLinkEvent = { + OpenApproval: "openApproval", +} as const; + +export interface ApprovalLinkPayload { + /** Agent tool-approval request id to open in the fleet approvals inbox. */ + requestId: string; +} + +export interface ApprovalLinkEvents { + [ApprovalLinkEvent.OpenApproval]: ApprovalLinkPayload; +} + +/** + * Handles agent approval deep links (`://approval/{requestId}`, e.g. + * `posthog-code://approval/ar_...` in production and `posthog-code-dev://…` in + * local dev). The agent-runner emits these on a gated tool call so non-PostHog-Code + * clients (Slack, MCP) can open the approval in the desktop app. The request id + * alone resolves the approval in the fleet inbox, so the link carries nothing else. + * + * Mirrors `ScoutLinkService`: queues a link that arrived before the renderer was + * ready, and emits for links delivered while the app is already running. + */ +@injectable() +export class ApprovalLinkService extends TypedEventEmitter { + private pendingDeepLink: ApprovalLinkPayload | null = null; + private readonly log: LinkLogger; + + constructor( + @inject(DEEP_LINK_SERVICE) + private readonly deepLinkService: IDeepLinkRegistry, + @inject(MAIN_WINDOW_SERVICE) + private readonly mainWindow: IMainWindow, + @inject(ROOT_LOGGER) + rootLogger: RootLogger, + ) { + super(); + this.log = rootLogger.scope("approval-link-service"); + + this.deepLinkService.registerHandler("approval", (path) => + this.handleApprovalLink(path), + ); + } + + private handleApprovalLink(path: string): boolean { + const requestId = decodeSegment(path.split("/")[0]); + + if (!requestId) { + this.log.warn("Approval link missing request id"); + return false; + } + + const payload: ApprovalLinkPayload = { requestId }; + + const hasListeners = this.listenerCount(ApprovalLinkEvent.OpenApproval) > 0; + + if (hasListeners) { + this.log.info(`Emitting approval link event: requestId=${requestId}`); + this.emit(ApprovalLinkEvent.OpenApproval, payload); + } else { + this.log.info( + `Queueing approval link (renderer not ready): requestId=${requestId}`, + ); + this.pendingDeepLink = payload; + } + + this.log.info("Deep link focusing window", { requestId }); + if (this.mainWindow.isMinimized()) { + this.mainWindow.restore(); + } + this.mainWindow.focus(); + + return true; + } + + public consumePendingDeepLink(): ApprovalLinkPayload | null { + const pending = this.pendingDeepLink; + this.pendingDeepLink = null; + if (pending) { + this.log.info( + `Consumed pending approval link: requestId=${pending.requestId}`, + ); + } + return pending; + } +} + +function decodeSegment(segment: string | undefined): string { + if (!segment) return ""; + try { + return decodeURIComponent(segment); + } catch { + return segment; + } +} diff --git a/packages/core/src/links/identifiers.ts b/packages/core/src/links/identifiers.ts index e6b67bc9f8..053e283004 100644 --- a/packages/core/src/links/identifiers.ts +++ b/packages/core/src/links/identifiers.ts @@ -11,3 +11,6 @@ export const SCOUT_LINK_SERVICE = Symbol.for("posthog.core.scoutLinkService"); export const NEW_TASK_LINK_SERVICE = Symbol.for( "posthog.core.newTaskLinkService", ); +export const APPROVAL_LINK_SERVICE = Symbol.for( + "posthog.core.approvalLinkService", +); diff --git a/packages/host-router/src/routers/deep-link.router.ts b/packages/host-router/src/routers/deep-link.router.ts index 96bb311317..64324ff5dd 100644 --- a/packages/host-router/src/routers/deep-link.router.ts +++ b/packages/host-router/src/routers/deep-link.router.ts @@ -1,4 +1,10 @@ import { + ApprovalLinkEvent, + type ApprovalLinkPayload, + type ApprovalLinkService, +} from "@posthog/core/links/approval-link"; +import { + APPROVAL_LINK_SERVICE, INBOX_LINK_SERVICE, NEW_TASK_LINK_SERVICE, SCOUT_LINK_SERVICE, @@ -102,4 +108,24 @@ export const deepLinkRouter = router({ .consumePendingLink(); }, ), + + onOpenApproval: publicProcedure.subscription(async function* (opts) { + const service = opts.ctx.container.get( + APPROVAL_LINK_SERVICE, + ); + const iterable = service.toIterable(ApprovalLinkEvent.OpenApproval, { + signal: opts.signal, + }); + for await (const data of iterable) { + yield data; + } + }), + + getPendingApprovalLink: publicProcedure.query( + ({ ctx }): ApprovalLinkPayload | null => { + return ctx.container + .get(APPROVAL_LINK_SERVICE) + .consumePendingDeepLink(); + }, + ), }); diff --git a/packages/shared/package.json b/packages/shared/package.json index 81656c5b26..3f0a2c8d02 100644 --- a/packages/shared/package.json +++ b/packages/shared/package.json @@ -12,6 +12,10 @@ "types": "./dist/analytics-events.d.ts", "import": "./dist/analytics-events.js" }, + "./agent-platform-types": { + "types": "./dist/agent-platform-types.d.ts", + "import": "./dist/agent-platform-types.js" + }, "./domain-types": { "types": "./dist/domain-types.d.ts", "import": "./dist/domain-types.js" diff --git a/packages/shared/src/agent-platform-types.ts b/packages/shared/src/agent-platform-types.ts new file mode 100644 index 0000000000..d987f05a71 --- /dev/null +++ b/packages/shared/src/agent-platform-types.ts @@ -0,0 +1,621 @@ +// Domain types for the agent_platform product surface (deployed agents, +// their revisions, sessions, approvals, and fleet rollups). These mirror the +// PostHog Cloud REST serializers (Django app `agent_platform`) and are the wire +// shapes returned by the corresponding PostHogAPIClient methods. Field names +// stay snake_case to match the JSON exactly, as with the other shared wire +// types (see inbox-types.ts). + +// --- Enums ----------------------------------------------------------------- + +export type AgentSessionState = + | "queued" + | "running" + | "completed" + | "closed" + | "cancelled" + | "failed"; + +export type AgentSessionPrincipalKind = + | "anonymous" + | "service" + | "internal" + | "shared_secret" + | "slack"; + +export type AgentRevisionState = "draft" | "ready" | "live" | "archived"; + +export type AgentApprovalRequestState = + | "queued" + | "approving" + | "dispatched" + | "dispatched_failed" + | "rejected" + | "expired"; + +export type AgentApprovalDecision = "approve" | "reject"; + +// --- Applications ---------------------------------------------------------- + +/** Resolved creator (from `created_by_id`), or null if unset/deleted. */ +export interface AgentApplicationCreator { + id?: number; + first_name?: string; + email?: string; +} + +export interface AgentApplication { + id: string; + team_id: number; + name: string; + /** Globally-unique URL identifier; server-minted unless explicitly allowed. */ + slug?: string; + description?: string; + live_revision: string | null; + archived?: boolean; + archived_at: string | null; + created_by_id: number | null; + created_by: AgentApplicationCreator | null; + created_at: string; + updated_at: string; + /** Slack Event Subscriptions request URL; null without a public ingress URL. */ + slack_events_url: string | null; + /** Slack Interactivity request URL; null without a public ingress URL. */ + slack_interactivity_url: string | null; + /** Mode-aware base URL the agent's trigger routes hang off; null without ingress. */ + ingress_base_url: string | null; +} + +// --- Revisions ------------------------------------------------------------- + +/** + * The agent spec carried on a revision. Fully typed elaboration (triggers, + * tools, mcps, skills, limits) lands with the config editor milestone; for now + * the known top-level fields are surfaced and the rest passes through. + */ +export interface AgentSpec { + model: string; + triggers?: unknown[]; + tools?: unknown[]; + mcps?: unknown[]; + skills?: unknown[]; + integrations?: string[]; + secrets?: string[]; + limits?: { + max_turns?: number; + max_tool_calls?: number; + max_wall_seconds?: number; + }; + entrypoint?: string; + reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh"; + [key: string]: unknown; +} + +export interface AgentRevision { + id: string; + application: string; + parent_revision?: string | null; + state: AgentRevisionState; + bundle_uri?: string; + bundle_sha256: string | null; + spec?: AgentSpec; + created_by_id: number | null; + created_by: AgentApplicationCreator | null; + created_at: string; + updated_at: string; +} + +// --- Bundle files ---------------------------------------------------------- +// `…/revisions/{id}/bundle/` returns a typed bundle ({ agent_md, skills, tools }); +// the client flattens it into these per-file rows keyed by canonical path +// (agent.md, skills//SKILL.md, tools//source.ts, tools//schema.json). + +export type BundleFileLanguage = "markdown" | "typescript" | "json" | "text"; + +export interface BundleFile { + path: string; + content: string; + language: BundleFileLanguage; +} + +// --- Slack setup ----------------------------------------------------------- +// `…/revisions/{id}/slack_manifest/` derives the Slack app manifest from the +// revision's slack trigger + tools (scopes + event subscriptions computed). + +export interface AgentSlackManifest { + revision_id: string; + /** Opaque Slack app manifest JSON to paste into "create from manifest". */ + manifest: Record; + notes: string[]; + events_url: string | null; + interactivity_url: string | null; +} + +// --- Memory ---------------------------------------------------------------- +// The agent's S3-backed memory store: markdown files (`…/memory/…`) plus the +// JSONL reference tables the @posthog/table-* tools write. + +export interface AgentMemoryHeader { + path: string; + description: string; + tags: string[]; + created_at: string | null; + updated_at: string | null; +} + +export interface AgentMemoryFile extends AgentMemoryHeader { + content: string; +} + +/** Pre-aggregated folder tree from `…/memory/tree/`. */ +export interface AgentMemoryTreeNode { + name: string; + type: "folder" | "file"; + path?: string; + description?: string; + tags?: string[]; + children?: AgentMemoryTreeNode[]; +} + +export interface AgentMemorySearchResult { + path: string; + description: string; + tags: string[]; + score: number; + snippet?: string | null; +} + +export interface AgentMemoryTableHeader { + name: string; + size: number; +} + +export interface AgentMemoryTableRows { + name: string; + total: number; + returned: number; + limit: number; + rows: Record[]; +} + +// --- Sessions -------------------------------------------------------------- + +export interface AgentSessionUsageTotal { + tokens_in: number; + tokens_out: number; + cache_read: number; + cache_write: number; + cost_input: number; + cost_output: number; + cost_cache_read: number; + cost_cache_write: number; + cost_total: number; +} + +export interface AgentSessionPrincipal { + kind: AgentSessionPrincipalKind; + /** Stable principal id (PAT id, slack user id, …); absent for anonymous. */ + id?: string; + team_id?: number; +} + +/** Trigger-specific metadata stamped at session creation; shape varies by kind. */ +export type AgentSessionTriggerMetadata = Record; + +export interface AgentSessionSummary { + id: string; + application_id: string; + revision_id: string; + state: AgentSessionState; + external_key: string | null; + trigger_metadata?: AgentSessionTriggerMetadata | null; + principal: AgentSessionPrincipal | null; + /** Count of messages in the conversation. */ + turns: number; + /** Last assistant text (~120 chars); null before any assistant turn. */ + preview: string | null; + usage_total: AgentSessionUsageTotal; + retry_count: number; + created_at: string; + updated_at: string; +} + +export interface AgentApplicationSessionsListResponse { + results: AgentSessionSummary[]; + count: number; +} + +// --- Conversation transcript (stored shape on a session) ------------------- +// The runtime persists pi-ai's `conversation` array. The SSE→ACP adapter and +// the session-detail transcript both narrow these `content` parts at runtime. +// Part shapes mirror what the agent-console apiClient narrows (text/thinking/ +// toolCall for assistants; text/image for users; text for tool results). + +export interface AgentTextPart { + type: "text"; + text: string; +} + +export interface AgentThinkingPart { + type: "thinking"; + thinking: string; +} + +export interface AgentImagePart { + type: "image"; + [key: string]: unknown; +} + +export interface AgentToolCallPart { + type: "toolCall"; + id: string; + name: string; + arguments: Record; +} + +export type AgentAssistantContentPart = + | AgentTextPart + | AgentThinkingPart + | AgentToolCallPart; + +export type AgentUserContentPart = AgentTextPart | AgentImagePart; + +export interface AgentConversationUserMessage { + role: "user"; + /** String shorthand, or an array of text/image parts. */ + content: string | AgentUserContentPart[]; + /** Epoch milliseconds. */ + timestamp: number; +} + +export interface AgentConversationAssistantMessage { + role: "assistant"; + /** Array of text/thinking/toolCall parts. */ + content: AgentAssistantContentPart[]; + timestamp: number; + api?: string; + provider?: string; + model?: string; + usage?: Record; + stopReason?: string; + errorMessage?: string; +} + +export interface AgentConversationToolResultMessage { + /** Wire value is `toolResult` (NOT `tool`) — matches the runtime serializer. */ + role: "toolResult"; + toolCallId: string; + toolName: string; + /** Array of text parts (image parts are dropped on render). */ + content: AgentTextPart[]; + isError: boolean; + timestamp: number; +} + +export type AgentConversationMessage = + | AgentConversationUserMessage + | AgentConversationAssistantMessage + | AgentConversationToolResultMessage; + +export interface AgentApplicationSessionDetail { + id: string; + application_id: string; + revision_id: string; + team_id: number; + state: AgentSessionState; + external_key: string | null; + trigger_metadata?: AgentSessionTriggerMetadata | null; + principal: AgentSessionPrincipal | null; + usage_total: AgentSessionUsageTotal; + conversation: AgentConversationMessage[]; + /** Messages that arrived while a turn was in flight. */ + pending_inputs: AgentConversationMessage[]; + retry_count: number; + created_at: string; + updated_at: string; + /** True when `last_n` was supplied AND the full conversation exceeded it. */ + conversation_trimmed: boolean; + /** Total messages in the untrimmed conversation; present only when trimmed. */ + conversation_total_turns?: number; +} + +// --- Session logs ---------------------------------------------------------- +// `…/sessions/{id}/logs/` returns rows from the shared ClickHouse `log_entries` +// table via `fetch_log_entries` — the same flat shape hog_function logs use. + +export type AgentLogLevel = "DEBUG" | "LOG" | "INFO" | "WARN" | "ERROR"; + +export interface AgentSessionLogEntry { + log_source_id: string; + instance_id: string; + /** ISO timestamp. */ + timestamp: string; + /** One of AgentLogLevel, but server may emit other casings — keep it open. */ + level: string; + message: string; +} + +export interface AgentSessionLogsParams { + limit?: number; + /** Comma-separated levels server-side; pass an array, joined by the client. */ + level?: AgentLogLevel[]; + search?: string; + after?: string; + before?: string; +} + +// --- Fleet ----------------------------------------------------------------- + +export interface AgentFleetLiveSessionSummary { + id: string; + application_id: string; + revision_id: string; + team_id: number; + state: AgentSessionState; + external_key: string | null; + trigger_metadata?: AgentSessionTriggerMetadata | null; + principal: AgentSessionPrincipal | null; + turns: number; + preview: string | null; + usage_total: AgentSessionUsageTotal; + created_at: string; + updated_at: string; +} + +export interface AgentFleetLiveSessionsResponse { + results: AgentFleetLiveSessionSummary[]; +} + +// --- Approvals ------------------------------------------------------------- + +export interface AgentApprovalRequest { + id: string; + session_id: string; + application_id: string; + team_id: number; + revision_id: string; + turn: number; + tool_call_id: string; + tool_name: string; + proposed_args: Record; + decided_args: Record | null; + assistant_message: Record; + approver_scope: Record; + state: AgentApprovalRequestState; + decision_by: string | null; + decision_at: string | null; + decision_reason: string | null; + dispatch_outcome: Record | null; + created_at: string; + expires_at: string; +} + +/** Body for POST …/approvals/{id}/decide/. */ +export interface DecideApprovalRequest { + decision: AgentApprovalDecision; + /** Honoured only when the tool's approval_policy.allow_edit is true. */ + edited_args?: Record; + reason?: string; +} + +// --- Query params ---------------------------------------------------------- + +export interface AgentSessionsListParams { + limit?: number; + offset?: number; + /** Comma-separated states accepted server-side; pass an array, joined by the client. */ + state?: AgentSessionState[]; + revision_id?: string; + created_after?: string; + created_before?: string; +} + +export interface AgentApprovalsListParams { + state?: AgentApprovalRequestState; + agent_id?: string; + limit?: number; + offset?: number; +} + +// --- Live session events (agent-ingress SSE stream) ------------------------ +// The chat trigger's `/listen` endpoint streams these as `text/event-stream` +// JSON frames. The SSE→ACP adapter folds them into ACP messages the native +// ConversationView renders. The `kind` discriminator and `data` payloads come +// from `agent-ingress/src/triggers/chat.ts` + `agent-runner/src/loop/bus.ts`. + +interface AgentSessionEventBase { + session_id: string; + /** ISO timestamp the runner stamped on the frame. */ + ts: string; +} + +/** Session accepted and the runner started — `{ team_id, agent, rev }`. */ +export type AgentSessionStartedEvent = AgentSessionEventBase & { + kind: "session_started"; + data: { team_id?: number; agent?: string; rev?: string }; +}; + +/** Server-confirmed user message, echoed when drained from `pending_inputs`. */ +export type AgentUserMessageEvent = AgentSessionEventBase & { + kind: "user_message"; + data: { text: string; timestamp?: string }; +}; + +/** A new assistant turn began — `{ turn }` is the turn index. */ +export type AgentTurnStartedEvent = AgentSessionEventBase & { + kind: "turn_started"; + data: { turn?: number }; +}; + +/** Streaming assistant text fragment. */ +export type AgentAssistantTextDeltaEvent = AgentSessionEventBase & { + kind: "assistant_text_delta"; + data: { turn?: number; text: string }; +}; + +/** Streaming assistant thinking fragment. */ +export type AgentAssistantThinkingDeltaEvent = AgentSessionEventBase & { + kind: "assistant_thinking_delta"; + data: { turn?: number; thinking: string }; +}; + +/** A tool call appeared (name known, args still streaming). */ +export type AgentToolCallStartEvent = AgentSessionEventBase & { + kind: "tool_call_start"; + data: { turn?: number; id: string; name: string }; +}; + +/** Incremental tool-call args — string fragment or partial object. */ +export type AgentToolCallArgsDeltaEvent = AgentSessionEventBase & { + kind: "tool_call_args_delta"; + data: { turn?: number; id: string; argsDelta: unknown }; +}; + +/** Turn-end snapshot of the full assistant text (deltas already filled it). */ +export type AgentAssistantTextEvent = AgentSessionEventBase & { + kind: "assistant_text"; + data: { text: string }; +}; + +/** Canonical tool call with finalized args. */ +export type AgentToolCallEvent = AgentSessionEventBase & { + kind: "tool_call"; + data: { id: string; name: string; args?: Record }; +}; + +/** Tool result — `ok` plus `output` on success, `error` on failure. */ +export type AgentToolResultEvent = AgentSessionEventBase & { + kind: "tool_result"; + data: { + id: string; + tool?: string; + ok?: boolean; + output?: unknown; + error?: string; + }; +}; + +/** Turn finished; session stays open for more input. */ +export type AgentCompletedEvent = AgentSessionEventBase & { + kind: "completed"; + data: { turns?: number; summary?: unknown }; +}; + +/** Session parked for a steering message (`@posthog/meta-ask-for-input`). */ +export type AgentWaitingEvent = AgentSessionEventBase & { + kind: "waiting"; + data: { turns?: number; prompt?: string }; +}; + +/** Terminal failure — `reason` is for owners/logs, not end users. */ +export type AgentFailedEvent = AgentSessionEventBase & { + kind: "failed"; + data: { reason?: string; turns?: number }; +}; + +/** Session sealed (terminal); no further `/send`s accepted. */ +export type AgentClosedEvent = AgentSessionEventBase & { + kind: "closed"; + data: Record; +}; + +/** Model invoked a client-fulfilled tool; the host runs it and posts back. */ +export type AgentClientToolCallEvent = AgentSessionEventBase & { + kind: "client_tool_call"; + data: { call_id: string; tool_id: string; args?: Record }; +}; + +/** A client tool's outcome landed (sync POST or interactive `/send` wake). */ +export type AgentClientToolResultEvent = AgentSessionEventBase & { + kind: "client_tool_result"; + data: { call_id: string; result?: unknown; error?: string }; +}; + +export type AgentSessionEvent = + | AgentSessionStartedEvent + | AgentUserMessageEvent + | AgentTurnStartedEvent + | AgentAssistantTextDeltaEvent + | AgentAssistantThinkingDeltaEvent + | AgentToolCallStartEvent + | AgentToolCallArgsDeltaEvent + | AgentAssistantTextEvent + | AgentToolCallEvent + | AgentToolResultEvent + | AgentCompletedEvent + | AgentWaitingEvent + | AgentFailedEvent + | AgentClosedEvent + | AgentClientToolCallEvent + | AgentClientToolResultEvent; + +/** Discriminator values for {@link AgentSessionEvent}. */ +export type AgentSessionEventKind = AgentSessionEvent["kind"]; + +// --- Observability / analytics -------------------------------------------- +// The runner captures `$ai_*` AI-observability events into the team's OWN +// PostHog project (tagged `$ai_origin = 'agent_platform_runner'` and +// `$agent_application_id`). The observability surface rolls those up via HogQL +// (`/query/`) into the shapes below. These are the *derived* analytics shapes +// the client produces from raw HogQL grids — not a backend wire serializer — +// but they live here so the UI hooks can import them alongside the other +// agent-platform types. + +export interface AgentAnalyticsKpis { + spendUsd: number; + sessions: number; + /** 0..1 — share of generations that errored. */ + failureRate: number; + /** p95 model latency, seconds. */ + p95LatencyS: number; +} + +export interface AgentAnalyticsDaily { + /** Short date labels, oldest → newest (14 days). */ + labels: string[]; + spend: number[]; + sessions: number[]; + /** 0..1 per day. */ + failureRate: number[]; +} + +export interface AgentAnalyticsDeltas { + /** Percent change vs the prior 7 days (e.g. 12 = +12%). `null` when undefined. */ + spend: number | null; + sessions: number | null; + /** Change in failure rate, in percentage points. `null` when undefined. */ + failureRatePoints: number | null; +} + +export interface AgentAnalyticsAgentRow { + id: string; + name: string; + sessions: number; + spendUsd: number; + failureRate: number; + p95LatencyS: number; + tokens: number; +} + +export interface AgentAnalyticsModelRow { + model: string; + spendUsd: number; + calls: number; +} + +export interface AgentAnalyticsToolRow { + tool: string; + calls: number; + errors: number; + errorRate: number; +} + +export interface AgentAnalyticsData { + kpis: AgentAnalyticsKpis; + daily: AgentAnalyticsDaily; + deltas: AgentAnalyticsDeltas; + byAgent: AgentAnalyticsAgentRow[]; + byModel: AgentAnalyticsModelRow[]; + toolErrors: AgentAnalyticsToolRow[]; + /** True when there is no agent AI activity in the window — drives the empty state. */ + empty: boolean; +} diff --git a/packages/shared/src/oauth.test.ts b/packages/shared/src/oauth.test.ts index 0c94e061d4..b402d8c379 100644 --- a/packages/shared/src/oauth.test.ts +++ b/packages/shared/src/oauth.test.ts @@ -8,9 +8,10 @@ describe("OAUTH_SCOPES guard", () => { scopes: OAUTH_SCOPES, }).toMatchInlineSnapshot(` { - "scopeVersion": 5, + "scopeVersion": 6, "scopes": [ "*", + "agent_approvals:write", ], } `); diff --git a/packages/shared/src/oauth.ts b/packages/shared/src/oauth.ts index b25eb5f6ac..002753c244 100644 --- a/packages/shared/src/oauth.ts +++ b/packages/shared/src/oauth.ts @@ -4,10 +4,17 @@ export const POSTHOG_US_CLIENT_ID = "HCWoE0aRFMYxIxFNTTwkOORn5LBjOt2GVDzwSw5W"; export const POSTHOG_EU_CLIENT_ID = "AIvijgMS0dxKEmr5z6odvRd8Pkh5vts3nPTzgzU9"; export const POSTHOG_DEV_CLIENT_ID = "DC5uRLVbGI02YQ82grxgnK6Qn12SXWpCqdPb60oZ"; -// Bump OAUTH_SCOPE_VERSION below whenever OAUTH_SCOPES changes to force re-authentication -export const OAUTH_SCOPES = ["*"]; +// Bump OAUTH_SCOPE_VERSION below whenever OAUTH_SCOPES changes to force re-authentication. +// +// `agent_approvals:write` is enumerated alongside `*` because the Django decide +// gate on `allow_agent_approver: false` approvals (see agent_platform's +// presentation/views.py:approvals_decide) does not accept `*` as a substitute — +// it's modeled on the same hardening as INTERNAL scopes (posthog/permissions.py). +// Without the explicit scope, the in-chat approval card + the per-agent +// Approvals tab 404 when deciding human-only approvals. +export const OAUTH_SCOPES = ["*", "agent_approvals:write"]; -export const OAUTH_SCOPE_VERSION = 5; +export const OAUTH_SCOPE_VERSION = 6; // Token refresh settings export const TOKEN_REFRESH_BUFFER_MS = 30 * 60 * 1000; // 30 minutes before expiry diff --git a/packages/shared/tsup.config.ts b/packages/shared/tsup.config.ts index f1685d8101..742102c8e1 100644 --- a/packages/shared/tsup.config.ts +++ b/packages/shared/tsup.config.ts @@ -3,6 +3,7 @@ import { defineConfig } from "tsup"; export default defineConfig({ entry: [ "src/index.ts", + "src/agent-platform-types.ts", "src/analytics-events.ts", "src/constants.ts", "src/deeplink.ts", diff --git a/packages/ui/src/features/agent-applications/agent-applications.module.ts b/packages/ui/src/features/agent-applications/agent-applications.module.ts new file mode 100644 index 0000000000..a64f2448cc --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-applications.module.ts @@ -0,0 +1,9 @@ +import { ContainerModule } from "inversify"; + +/** + * UI module for the agent-applications feature (deployed agent_platform + * agents). Currently holds no bindings — the chat/agent builder contributions and + * any view-state slices are added in later milestones. Registered in + * apps/code/src/renderer/desktop-contributions.ts once it binds a CONTRIBUTION. + */ +export const agentApplicationsUiModule = new ContainerModule(() => {}); diff --git a/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderDock.tsx b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderDock.tsx new file mode 100644 index 0000000000..d7408db794 --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderDock.tsx @@ -0,0 +1,342 @@ +import { + ArrowRightIcon, + NavigationArrowIcon, + PlusIcon, + SidebarSimpleIcon, + SparkleIcon, +} from "@phosphor-icons/react"; +import { useAuthenticatedClient } from "@posthog/ui/features/auth/authClient"; +import { Button } from "@posthog/ui/primitives/Button"; +import { Flex, Text, Tooltip } from "@radix-ui/themes"; +import { useEffect, useRef, useState } from "react"; +import { useAuthStateValue } from "../../auth/store"; +import { AgentChatPendingApprovalCard } from "../components/AgentChatPendingApprovalCard"; +import { AgentChatSurface } from "../components/AgentChatSurface"; +import { AgentDetailEmptyState } from "../components/AgentDetailLayout"; +import { useAgentApplication } from "../hooks/useAgentApplication"; +import { useAgentChat } from "../hooks/useAgentChat"; +import { useAgentChatPendingApproval } from "../hooks/useAgentChatPendingApproval"; +import { resolveIngressBaseUrl } from "../utils/ingress"; +import { AgentBuilderSecretForm } from "./AgentBuilderSecretForm"; +import { AgentBuilderSeedDialog } from "./AgentBuilderSeedDialog"; +import { + AGENT_BUILDER_CHAT_ID, + AGENT_BUILDER_SLUG, + type AgentBuilderPageContext, + useAgentBuilderStore, +} from "./agentBuilderStore"; +import { suggestionsForPage } from "./agentBuilderSuggestions"; +import { useAgentBuilderClientTools } from "./useAgentBuilderClientTools"; + +const CHAT_ID = AGENT_BUILDER_CHAT_ID; + +/** A rotating pool of composer placeholders — picked once per dock mount. */ +const BUILDER_PLACEHOLDERS = [ + "Build me an agent that…", + "What should we build today?", + "Ask me to inspect, debug, or edit an agent…", + "Describe an agent and I'll wire it up…", + "Spin up a new agent, or fix an existing one…", + "What's broken? Let's debug a session…", + "Audit the fleet, tweak a prompt, ship an agent…", + "Tell me what to change…", +]; + +/** The "what am I looking at" object sent to the agent builder (envelope + get_context). */ +function buildAgentBuilderContext( + page: AgentBuilderPageContext, + followEnabled: boolean, + project: { id: number | null; name: string | null; orgId: string | null }, +): Record { + const agent = "slug" in page ? page.slug : undefined; + const sessionId = page.kind === "agent-session" ? page.sessionId : undefined; + return { + page: page.kind, + agent, + session_id: sessionId, + follow_enabled: followEnabled, + // The project the user is currently in — the agent threads this into the + // `project_id` arg of every `@posthog/*` tool (it's tenant-neutral and acts + // on whatever project we report here). + project_id: project.id ?? undefined, + project_name: project.name ?? undefined, + org_id: project.orgId ?? undefined, + client: { kind: "posthog-code", version: "1" }, + }; +} + +/** + * The agent builder chat — an always-on dock talking to the deployed + * `agent-concierge`. Streams through the shared `useAgentChat`/`AgentChatSurface` + * stack, prepends the current `/code/agents` page context to the first message, + * answers `get_context`, and lets the agent drive the UI via `focus_*`. + */ +export function AgentBuilderDock() { + const { data: application } = useAgentApplication(AGENT_BUILDER_SLUG); + const cloudRegion = useAuthStateValue((s) => s.cloudRegion); + const ingressBaseUrl = resolveIngressBaseUrl( + application?.ingress_base_url, + cloudRegion, + ); + + const client = useAuthenticatedClient(); + const currentProjectId = useAuthStateValue((s) => s.currentProjectId); + const currentOrgId = useAuthStateValue((s) => s.currentOrgId); + const orgProjectsMap = useAuthStateValue((s) => s.orgProjectsMap); + const projectName = + currentOrgId != null && currentProjectId != null + ? (orgProjectsMap[currentOrgId]?.projects.find( + (p) => p.id === currentProjectId, + )?.name ?? null) + : null; + const page = useAgentBuilderStore((s) => s.page); + const followMode = useAgentBuilderStore((s) => s.followMode); + const setFollowMode = useAgentBuilderStore((s) => s.setFollowMode); + const setVisible = useAgentBuilderStore((s) => s.setVisible); + const seed = useAgentBuilderStore((s) => s.seed); + const consumeSeed = useAgentBuilderStore((s) => s.consumeSeed); + const pendingSecret = useAgentBuilderStore((s) => s.pendingSecret); + const setPendingSecret = useAgentBuilderStore((s) => s.setPendingSecret); + const [secretBusy, setSecretBusy] = useState(false); + const [placeholder] = useState( + () => + BUILDER_PLACEHOLDERS[ + Math.floor(Math.random() * BUILDER_PLACEHOLDERS.length) + ], + ); + + const clientTools = useAgentBuilderClientTools(); + const chat = useAgentChat({ + chatId: CHAT_ID, + agentSlug: AGENT_BUILDER_SLUG, + ingressBaseUrl, + contextProvider: () => + buildAgentBuilderContext(page, followMode, { + id: currentProjectId, + name: projectName, + orgId: currentOrgId, + }), + clientTools, + }); + const { data: pendingApproval } = useAgentChatPendingApproval( + AGENT_BUILDER_SLUG, + chat.sessionId, + ); + + // Resolve a pending set_secret: PUT the value straight to the env-keys API + // (never through the agent), then wake the parked session with the outcome. + async function submitSecret(value: string) { + if (!pendingSecret) return; + setSecretBusy(true); + try { + await client.setAgentEnvKey( + pendingSecret.agentSlug, + pendingSecret.secret, + value, + ); + await chat.resolveInteractiveTool(pendingSecret.callId, { + result: { + key: pendingSecret.secret, + action: pendingSecret.mode ?? "set", + }, + }); + setPendingSecret(null); + } catch (err) { + await chat.resolveInteractiveTool(pendingSecret.callId, { + error: err instanceof Error ? err.message : "set_secret_failed", + }); + setPendingSecret(null); + } finally { + setSecretBusy(false); + } + } + + function cancelSecret() { + if (!pendingSecret) return; + void chat.resolveInteractiveTool(pendingSecret.callId, { + error: "user_cancelled", + }); + setPendingSecret(null); + } + + // Edit-with-AI hand-offs: send the seeded prompt once when a new seed lands. + // An empty dock starts immediately; if a chat is already in progress, confirm + // whether to start fresh or continue (so a deliberate "New agent" / "Edit with + // AI" doesn't silently wipe or append onto an unrelated conversation). + const lastSeedRef = useRef(0); + const [seedConfirm, setSeedConfirm] = useState(null); + useEffect(() => { + if (!seed || seed.seq === lastSeedRef.current) return; + lastSeedRef.current = seed.seq; + consumeSeed(seed.seq); + if (chat.messages.length === 0) { + chat.send(seed.prompt); + } else { + setSeedConfirm(seed.prompt); + } + }, [seed, chat, consumeSeed]); + + function seedStartFresh() { + if (!seedConfirm) return; + setPendingSecret(null); + chat.newChat(); + chat.send(seedConfirm); + setSeedConfirm(null); + } + function seedContinue() { + if (!seedConfirm) return; + chat.send(seedConfirm); + setSeedConfirm(null); + } + + return ( + + + + + Agent Builder + + + + + + + + + + + + + + + {!ingressBaseUrl ? ( +
+ +
+ ) : ( + } + emptyHint="Ask the agent builder to inspect, debug, or edit your agents. It can see what you're looking at and walk you there." + belowConversation={ + pendingApproval ? ( + + ) : null + } + aboveComposer={ + pendingSecret ? ( + + ) : null + } + composerDisabledReason={ + pendingApproval ? "Waiting on your approval decision" : undefined + } + onSend={chat.send} + onCancel={chat.cancel} + /> + )} + + setSeedConfirm(null)} + /> +
+ ); +} + +/** Empty-dock state: a short prompt plus page-aware starter suggestions. */ +function AgentBuilderEmptyState({ + page, + onPick, +}: { + page: AgentBuilderPageContext; + onPick: (prompt: string) => void; +}) { + const suggestions = suggestionsForPage(page); + return ( + + + Ask the agent builder to inspect, debug, or edit your agents — it can + see what you're looking at and walk you there. Try: + + + {suggestions.map((s) => ( + + ))} + + + ); +} diff --git a/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderDockLayout.tsx b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderDockLayout.tsx new file mode 100644 index 0000000000..f92599cb72 --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderDockLayout.tsx @@ -0,0 +1,78 @@ +import { useFeatureFlag } from "@posthog/ui/features/feature-flags/useFeatureFlag"; +import { type ReactNode, useEffect } from "react"; +import { Panel, PanelGroup, PanelResizeHandle } from "react-resizable-panels"; +import { AGENT_PLATFORM_FLAG } from "../featureFlag"; +import { AgentBuilderDock } from "./AgentBuilderDock"; +import { useAgentBuilderStore } from "./agentBuilderStore"; + +/** + * Wraps the `/code/agents` content in a resizable split with the always-on + * agent builder dock pinned right. Gated behind the `agent-platform` flag — when + * disabled, the content renders unchanged with no dock or affordance. Hidden by + * default; toggled via the edge affordance, the dock's hide button, or + * Cmd/Ctrl+I. Panel sizes persist (`autoSaveId`). + */ +export function AgentBuilderDockLayout({ children }: { children: ReactNode }) { + const enabled = useFeatureFlag(AGENT_PLATFORM_FLAG); + const visible = useAgentBuilderStore((s) => s.visible); + const toggleVisible = useAgentBuilderStore((s) => s.toggleVisible); + + useEffect(() => { + if (!enabled) return; + const onKey = (e: KeyboardEvent) => { + // Cmd/Ctrl+Shift+I — Cmd+I alone is taken by the inbox. + if (!(e.metaKey || e.ctrlKey) || e.altKey || !e.shiftKey) return; + if (e.key.toLowerCase() !== "i") return; + const t = e.target as HTMLElement | null; + if ( + t?.tagName === "INPUT" || + t?.tagName === "TEXTAREA" || + t?.isContentEditable + ) { + return; + } + e.preventDefault(); + toggleVisible(); + }; + window.addEventListener("keydown", onKey); + return () => window.removeEventListener("keydown", onKey); + }, [enabled, toggleVisible]); + + // Flag off → no agent builder anywhere in /code/agents. + if (!enabled) { + return <>{children}; + } + + // Collapsed: render content unchanged. The open affordance lives in the + // agents page headers (AgentBuilderHeaderControls); Cmd/Ctrl+Shift+I toggles. + if (!visible) { + return <>{children}; + } + + return ( + + + {children} + + + + + + + ); +} diff --git a/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderHeaderControls.tsx b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderHeaderControls.tsx new file mode 100644 index 0000000000..6112ebc01c --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderHeaderControls.tsx @@ -0,0 +1,77 @@ +import { NavigationArrowIcon, SparkleIcon } from "@phosphor-icons/react"; +import { agentChatStore } from "@posthog/core/agent-chat/agentChatStore"; +import { useFeatureFlag } from "@posthog/ui/features/feature-flags/useFeatureFlag"; +import { Button } from "@posthog/ui/primitives/Button"; +import { Badge, Flex, Tooltip } from "@radix-ui/themes"; +import { useStore } from "zustand"; +import { AGENT_PLATFORM_FLAG } from "../featureFlag"; +import { headerActionForPage } from "./agentBuilderActions"; +import { + AGENT_BUILDER_CHAT_ID, + type AgentBuilderPageContext, + useAgentBuilderStore, +} from "./agentBuilderStore"; +import { EditWithAIButton } from "./EditWithAIButton"; + +/** + * The agents-header control cluster — identical across every agents view. Driven + * by the view's `context`, it renders: + * - a "Following" indicator while the agent builder is mid-turn and follow mode + * is on (so it's clear the builder is steering navigation), + * - a contextual AI button (New agent / Explain this session / …), + * - a "show" button that opens the dock, ONLY when it's hidden (the inverse of + * the hide button inside the dock header). + * All buttons are small. Renders nothing unless the `agent-platform` flag is on. + */ +export function AgentBuilderHeaderControls({ + context, +}: { + context: AgentBuilderPageContext; +}) { + const enabled = useFeatureFlag(AGENT_PLATFORM_FLAG); + const visible = useAgentBuilderStore((s) => s.visible); + const setVisible = useAgentBuilderStore((s) => s.setVisible); + const followMode = useAgentBuilderStore((s) => s.followMode); + const status = useStore( + agentChatStore, + (s) => s.chats[AGENT_BUILDER_CHAT_ID]?.status, + ); + + if (!enabled) return null; + + const running = status === "streaming" || status === "starting"; + const action = headerActionForPage(context); + + return ( + + {running && followMode ? ( + + + + Following + + + ) : null} + {action ? ( + + ) : null} + {!visible ? ( + + + + ) : null} + + ); +} diff --git a/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderSecretForm.tsx b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderSecretForm.tsx new file mode 100644 index 0000000000..3d0da2dc2c --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderSecretForm.tsx @@ -0,0 +1,87 @@ +import { KeyIcon } from "@phosphor-icons/react"; +import { Button } from "@posthog/ui/primitives/Button"; +import { Flex, Text, TextField } from "@radix-ui/themes"; +import { type KeyboardEvent, useState } from "react"; +import type { PendingSecret } from "./agentBuilderStore"; + +/** + * Inline punch-out for the agent builder's `set_secret` tool. The agent never sees + * the raw value: the form PUTs it straight to the env-keys API and only the + * `{ key, action }` outcome is posted back to wake the session. Shown above the + * dock composer while a secret is pending. + */ +export function AgentBuilderSecretForm({ + pending, + busy, + onSubmit, + onCancel, +}: { + pending: PendingSecret; + busy: boolean; + onSubmit: (value: string) => void; + onCancel: () => void; +}) { + const [value, setValue] = useState(""); + const action = pending.mode === "rotate" ? "Rotate" : "Set"; + + function submit() { + const trimmed = value.trim(); + if (!trimmed || busy) return; + onSubmit(trimmed); + } + function onKeyDown(e: KeyboardEvent) { + if (e.key === "Enter") { + e.preventDefault(); + submit(); + } + } + + return ( + + + + + {action} secret{" "} + {pending.secret} + + + {pending.purpose ? ( + + {pending.purpose} + + ) : null} + + The value is sent straight to your agent's secrets — the agent builder + never sees it. + + + setValue(e.target.value)} + onKeyDown={onKeyDown} + placeholder={`Value for ${pending.secret}`} + className="flex-1 text-[13px]" + autoFocus + /> + + + + + ); +} diff --git a/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderSeedDialog.tsx b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderSeedDialog.tsx new file mode 100644 index 0000000000..d069a04b1d --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/AgentBuilderSeedDialog.tsx @@ -0,0 +1,58 @@ +import { SparkleIcon } from "@phosphor-icons/react"; +import { AlertDialog, Button, Flex, Text } from "@radix-ui/themes"; + +/** + * Confirm dialog shown when an "Edit with AI" / "New agent" seed lands while an + * Agent Builder chat is already in progress: start a fresh chat for the seed, or + * send it into the current conversation. Mirrors the console's seed dialog. + */ +export function AgentBuilderSeedDialog({ + open, + prompt, + onStartFresh, + onContinue, + onCancel, +}: { + open: boolean; + prompt: string; + onStartFresh: () => void; + onContinue: () => void; + onCancel: () => void; +}) { + return ( + { + if (!isOpen) onCancel(); + }} + > + + + + + Start a new chat? + + + + You have an Agent Builder chat in progress. Start a fresh chat for + this, or continue the current one? + + + “{prompt}” + + + + + + + + + + ); +} diff --git a/packages/ui/src/features/agent-applications/agent-builder/EditWithAIButton.tsx b/packages/ui/src/features/agent-applications/agent-builder/EditWithAIButton.tsx new file mode 100644 index 0000000000..a23400bb7a --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/EditWithAIButton.tsx @@ -0,0 +1,35 @@ +import { SparkleIcon } from "@phosphor-icons/react"; +import { Button } from "@posthog/ui/primitives/Button"; +import { useAgentBuilderStore } from "./agentBuilderStore"; + +/** + * Opens the agent builder dock and seeds it with a prompt — the render surfaces' + * hand-off into authoring ("edit with AI"). The agent builder does the actual edits + * server-side via staged draft revisions; this just starts the conversation + * with the right context. + */ +export function EditWithAIButton({ + prompt, + agentSlug, + label = "Ask the agent builder", + variant = "soft", + size = "1", +}: { + prompt: string; + agentSlug?: string | null; + label?: string; + variant?: "soft" | "ghost" | "outline"; + size?: "1" | "2"; +}) { + const startAgentBuilder = useAgentBuilderStore((s) => s.startAgentBuilder); + return ( + + ); +} diff --git a/packages/ui/src/features/agent-applications/agent-builder/agentBuilderActions.ts b/packages/ui/src/features/agent-applications/agent-builder/agentBuilderActions.ts new file mode 100644 index 0000000000..f9cc0b4d43 --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/agentBuilderActions.ts @@ -0,0 +1,76 @@ +import type { AgentBuilderPageContext } from "./agentBuilderStore"; + +export interface AgentBuilderAction { + /** Short button label, e.g. "New agent" / "Explain this session". */ + label: string; + /** Seed prompt sent to the agent builder when clicked. */ + prompt: string; + /** Subject agent for the context envelope (null for fleet-level actions). */ + agentSlug: string | null; +} + +/** + * The contextual agent-builder action for a given view — the AI button's + * content. Drives the abstract header controls so every agents view gets a + * button that fits what you're looking at. Returns null for views with no + * obvious action (just the show/following affordances remain). + */ +export function headerActionForPage( + page: AgentBuilderPageContext, +): AgentBuilderAction | null { + switch (page.kind) { + case "agent-list": + return { + label: "New agent", + prompt: + "Help me create a new agent — walk me through what it should do, then set it up.", + agentSlug: null, + }; + case "agent": + return { + label: "Ask about this agent", + prompt: "Explain what this agent does and how it's configured.", + agentSlug: page.slug, + }; + case "agent-config": + return { + label: "Edit configuration", + prompt: "Help me change this agent's configuration.", + agentSlug: page.slug, + }; + case "agent-sessions": + return { + label: "Review sessions", + prompt: + "Review this agent's recent sessions and surface anything notable.", + agentSlug: page.slug, + }; + case "agent-session": + return { + label: "Explain this session", + prompt: "Explain what happened in this session, step by step.", + agentSlug: page.slug, + }; + case "agent-approvals": + return { + label: "Review approvals", + prompt: "Review the pending approval requests for this agent.", + agentSlug: page.slug, + }; + case "agent-memory": + return { + label: "Ask about memory", + prompt: "Summarize what's stored in this agent's memory.", + agentSlug: page.slug, + }; + case "agent-observability": + return { + label: "Ask about performance", + prompt: + "Summarize this agent's spend, volume, and failure rate, and call out anything notable.", + agentSlug: page.slug, + }; + default: + return null; + } +} diff --git a/packages/ui/src/features/agent-applications/agent-builder/agentBuilderStore.ts b/packages/ui/src/features/agent-applications/agent-builder/agentBuilderStore.ts new file mode 100644 index 0000000000..f7ad682ddf --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/agentBuilderStore.ts @@ -0,0 +1,105 @@ +import { electronStorage } from "@posthog/ui/shell/rendererStorage"; +import { create } from "zustand"; +import { persist } from "zustand/middleware"; + +/** The deployed meta-agent the agent builder dock always talks to. */ +export const AGENT_BUILDER_SLUG = "agent-concierge"; + +/** Key for the agent builder's chat in the core `agentChatStore`. */ +export const AGENT_BUILDER_CHAT_ID = "agent-builder"; + +/** + * What the user is currently looking at in `/code/agents`. Mirrors the console's + * `AgentBuilderPageContext` so the agent builder can resolve deictic references ("this + * agent", "this session") and drive the right `focus_*` target. Each route + * registers its context on mount via `useSetAgentBuilderPage`. + */ +export type AgentBuilderPageContext = + | { kind: "agent-list" } + | { kind: "scouts" } + | { kind: "agent"; slug: string } + | { kind: "agent-config"; slug: string; node?: string; revision?: string } + | { kind: "agent-sessions"; slug: string } + | { kind: "agent-session"; slug: string; sessionId: string } + | { kind: "agent-approvals"; slug: string; request?: string } + | { kind: "agent-memory"; slug: string } + | { kind: "agent-observability"; slug: string } + | { kind: "agent-chat"; slug: string } + | { kind: "unknown" }; + +/** A pending "Edit with AI" hand-off: open the dock and send `prompt`. */ +export interface AgentBuilderSeed { + /** Monotonic id so a consumer can mark exactly one seed handled. */ + seq: number; + prompt: string; + /** Agent the seed is about, for the context envelope. */ + agentSlug: string | null; +} + +/** + * An in-flight `set_secret` punch-out. The agent parked its turn; the dock + * renders a form for these, and on submit PUTs the key + wakes the session. + */ +export interface PendingSecret { + /** The parked tool call to resolve via `/send`. */ + callId: string; + agentSlug: string; + /** Env key name, e.g. "ANTHROPIC_KEY". The value is never seen by the agent. */ + secret: string; + mode?: "set" | "rotate"; + purpose?: string; +} + +interface AgentBuilderStore { + /** Dock open/closed (persisted). */ + visible: boolean; + /** Whether the agent's `focus_*` tools may navigate the UI (persisted). */ + followMode: boolean; + /** Current page context (ephemeral — re-registered per route). */ + page: AgentBuilderPageContext; + /** Pending edit-with-AI hand-off (ephemeral). */ + seed: AgentBuilderSeed | null; + /** In-flight set_secret punch-out the dock renders a form for (ephemeral). */ + pendingSecret: PendingSecret | null; + + toggleVisible: () => void; + setVisible: (visible: boolean) => void; + setFollowMode: (followMode: boolean) => void; + setPage: (page: AgentBuilderPageContext) => void; + /** Open the dock and queue a prompt to send. */ + startAgentBuilder: (prompt: string, agentSlug?: string | null) => void; + /** Mark a seed handled (no-op if a newer seed has since replaced it). */ + consumeSeed: (seq: number) => void; + setPendingSecret: (pending: PendingSecret | null) => void; +} + +export const useAgentBuilderStore = create()( + persist( + (set) => ({ + visible: false, + followMode: true, + page: { kind: "unknown" }, + seed: null, + pendingSecret: null, + + toggleVisible: () => set((s) => ({ visible: !s.visible })), + setVisible: (visible) => set({ visible }), + setFollowMode: (followMode) => set({ followMode }), + setPage: (page) => set({ page }), + startAgentBuilder: (prompt, agentSlug = null) => + set((s) => ({ + visible: true, + seed: { seq: (s.seed?.seq ?? 0) + 1, prompt, agentSlug }, + })), + consumeSeed: (seq) => + set((s) => (s.seed?.seq === seq ? { seed: null } : s)), + setPendingSecret: (pendingSecret) => set({ pendingSecret }), + }), + { + name: "agent-builder-dock", + storage: electronStorage, + // Page + seed are ephemeral; only remember the user's layout prefs. + partialize: (s) => ({ visible: s.visible, followMode: s.followMode }), + }, + ), +); diff --git a/packages/ui/src/features/agent-applications/agent-builder/agentBuilderSuggestions.ts b/packages/ui/src/features/agent-applications/agent-builder/agentBuilderSuggestions.ts new file mode 100644 index 0000000000..3b201b3235 --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/agentBuilderSuggestions.ts @@ -0,0 +1,126 @@ +import type { AgentBuilderPageContext } from "./agentBuilderStore"; + +export interface AgentBuilderSuggestion { + label: string; + prompt: string; +} + +/** + * Starter prompts shown in the empty Agent Builder dock, tailored to what the + * user is looking at. The agent resolves "this agent"/"this session" from the + * page-context envelope, so prompts can stay deictic. Mirrors the old console's + * contextual suggestions. + */ +export function suggestionsForPage( + page: AgentBuilderPageContext, +): AgentBuilderSuggestion[] { + switch (page.kind) { + case "agent-list": + case "scouts": + case "unknown": + return [ + { + label: "Create a new agent", + prompt: + "Help me create a new agent — walk me through what it should do, then set it up.", + }, + { + label: "What's changed in the last week?", + prompt: + "Summarize what's changed across my agents in the last week — new revisions, notable sessions, and any failures.", + }, + { + label: "Audit my agents", + prompt: + "Audit all my agents and flag anything underperforming or misconfigured.", + }, + ]; + case "agent": + return [ + { + label: "What does this agent do?", + prompt: "Explain what this agent does and how it's configured.", + }, + { + label: "Is this agent healthy?", + prompt: + "Check this agent's recent sessions and tell me whether it's healthy.", + }, + { + label: "Suggest improvements", + prompt: "Review this agent and suggest concrete improvements.", + }, + ]; + case "agent-config": + return [ + { + label: "Explain this configuration", + prompt: "Walk me through this agent's configuration.", + }, + { + label: "Edit the system prompt", + prompt: "I want to change this agent's system prompt.", + }, + { + label: "Add a tool or skill", + prompt: "Help me add a tool or skill to this agent.", + }, + ]; + case "agent-sessions": + return [ + { + label: "Any failing sessions?", + prompt: + "Look at this agent's recent sessions and surface any failures.", + }, + { + label: "Summarize recent activity", + prompt: "Summarize this agent's recent session activity.", + }, + ]; + case "agent-session": + return [ + { + label: "What happened here?", + prompt: "Explain what happened in this session, step by step.", + }, + { + label: "Debug this session", + prompt: "Debug this session — what went wrong and how do I fix it?", + }, + ]; + case "agent-approvals": + return [ + { + label: "Review pending approvals", + prompt: "Review the pending approval requests for this agent.", + }, + ]; + case "agent-memory": + return [ + { + label: "What's in memory?", + prompt: "Summarize what's stored in this agent's memory.", + }, + ]; + case "agent-observability": + return [ + { + label: "How is this agent performing?", + prompt: + "Summarize this agent's spend, volume, and failure rate, and call out anything notable.", + }, + ]; + default: + return [ + { + label: "Create a new agent", + prompt: "Help me create a new agent.", + }, + { + label: "What's changed in the last week?", + prompt: "Summarize what's changed across my agents in the last week.", + }, + ]; + } +} diff --git a/packages/ui/src/features/agent-applications/agent-builder/useAgentBuilderClientTools.ts b/packages/ui/src/features/agent-applications/agent-builder/useAgentBuilderClientTools.ts new file mode 100644 index 0000000000..4a43c0433d --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/useAgentBuilderClientTools.ts @@ -0,0 +1,142 @@ +import { useNavigate } from "@tanstack/react-router"; +import { useCallback, useRef } from "react"; +import type { ClientToolHandler } from "../hooks/useAgentChat"; +import { useAgentBuilderStore } from "./agentBuilderStore"; + +/** + * The agent builder's UI-driving client tools. The agent calls these to steer the + * user's screen (`focus_*`, which navigate code's agent routes and report back + * `{ focused }`) and to set secrets (`set_secret`, an interactive punch-out: + * park the call and render a form — see the dock). Returning `null` defers to + * the built-in toast/get_context handlers. + * + * `focus_*` navigations are gated by follow-mode: when off, they report + * `{ focused: false, reason: "user_paused_follow" }` instead of moving the UI. + */ +export function useAgentBuilderClientTools(): ClientToolHandler { + const navigate = useNavigate(); + const followMode = useAgentBuilderStore((s) => s.followMode); + const setPendingSecret = useAgentBuilderStore((s) => s.setPendingSecret); + const followRef = useRef(followMode); + followRef.current = followMode; + + return useCallback( + (data) => { + const args = (data.args ?? {}) as Record; + const str = (v: unknown) => (typeof v === "string" ? v : undefined); + + // set_secret — interactive punch-out. Park the call (defer) and render a + // form; the dock PUTs the key and wakes the session on submit. + if (data.tool_id === "set_secret") { + const agentSlug = str(args.agent_slug); + const secret = str(args.secret); + if (!agentSlug) return { error: "missing_arg: agent_slug" }; + if (!secret) return { error: "missing_arg: secret" }; + const mode = args.mode === "rotate" ? "rotate" : "set"; + setPendingSecret({ + callId: data.call_id, + agentSlug, + secret, + mode, + purpose: str(args.purpose), + }); + return { defer: true }; + } + + if (!data.tool_id.startsWith("focus_")) return null; + const slug = str(args.slug); + if (!followRef.current) { + return { result: { focused: false, reason: "user_paused_follow" } }; + } + if (!slug) { + return { result: { focused: false, reason: "missing_slug" } }; + } + const params = { idOrSlug: slug }; + + switch (data.tool_id) { + case "focus_tab": { + const tab = str(args.tab) ?? "overview"; + switch (tab) { + case "configuration": + navigate({ + to: "/code/agents/applications/$idOrSlug/configuration", + params, + }); + break; + case "sessions": + navigate({ + to: "/code/agents/applications/$idOrSlug/sessions", + params, + }); + break; + case "memory": + navigate({ + to: "/code/agents/applications/$idOrSlug/memory", + params, + }); + break; + case "approvals": + navigate({ + to: "/code/agents/applications/$idOrSlug/approvals", + params, + }); + break; + case "observability": + navigate({ + to: "/code/agents/applications/$idOrSlug/observability", + params, + }); + break; + case "chat": + navigate({ + to: "/code/agents/applications/$idOrSlug/chat", + params, + }); + break; + default: + navigate({ + to: "/code/agents/applications/$idOrSlug", + params, + }); + } + return { result: { focused: true } }; + } + case "focus_file": + navigate({ + to: "/code/agents/applications/$idOrSlug/configuration", + params, + search: { node: str(args.path) }, + }); + return { result: { focused: true } }; + case "focus_spec_section": + navigate({ + to: "/code/agents/applications/$idOrSlug/configuration", + params, + search: { node: str(args.section) }, + }); + return { result: { focused: true } }; + case "focus_revision": + navigate({ + to: "/code/agents/applications/$idOrSlug/configuration", + params, + search: { revision: str(args.revisionId) }, + }); + return { result: { focused: true } }; + case "focus_session": { + const sessionId = str(args.sessionId); + if (!sessionId) { + return { result: { focused: false, reason: "missing_session_id" } }; + } + navigate({ + to: "/code/agents/applications/$idOrSlug/sessions/$sessionId", + params: { idOrSlug: slug, sessionId }, + }); + return { result: { focused: true } }; + } + default: + return { result: { focused: false, reason: "unknown_focus_target" } }; + } + }, + [navigate, setPendingSecret], + ); +} diff --git a/packages/ui/src/features/agent-applications/agent-builder/useSetAgentBuilderPage.ts b/packages/ui/src/features/agent-applications/agent-builder/useSetAgentBuilderPage.ts new file mode 100644 index 0000000000..76dbe9d563 --- /dev/null +++ b/packages/ui/src/features/agent-applications/agent-builder/useSetAgentBuilderPage.ts @@ -0,0 +1,19 @@ +import { useEffect } from "react"; +import { + type AgentBuilderPageContext, + useAgentBuilderStore, +} from "./agentBuilderStore"; + +/** + * Registers what the user is currently looking at so the agent builder can resolve + * deictic references and drive the right `focus_*` target. Each `/code/agents` + * route calls this on mount. No cleanup: the next route overwrites the page, so + * the last-viewed context persists (the dock only reads it inside `/code/agents`). + */ +export function useSetAgentBuilderPage(page: AgentBuilderPageContext): void { + const setPage = useAgentBuilderStore((s) => s.setPage); + const key = JSON.stringify(page); + useEffect(() => { + setPage(JSON.parse(key) as AgentBuilderPageContext); + }, [key, setPage]); +} diff --git a/packages/ui/src/features/agent-applications/chat/acpEnvelope.ts b/packages/ui/src/features/agent-applications/chat/acpEnvelope.ts new file mode 100644 index 0000000000..c4964695d4 --- /dev/null +++ b/packages/ui/src/features/agent-applications/chat/acpEnvelope.ts @@ -0,0 +1,149 @@ +/** + * Pure constructors for the JSON-RPC `AcpMessage` envelopes that code's + * `buildConversationItems` reducer consumes. + * + * The agent_platform runtime speaks a different wire protocol (agent-ingress + * SSE frames + a stored pi-ai `conversation` array). Rather than re-implement + * the console's `runnerReducer`, we translate each agent_platform event into + * the equivalent ACP message and let code's existing builder do all the + * accumulation (streaming-text concatenation, tool-call merging, turn + * tracking). These helpers mint those messages. + * + * Mapping summary: + * user message → `session/prompt` request → opens a turn + user bubble + * assistant text → `session/update` (agent_message_chunk) + * assistant thinking → `session/update` (agent_thought_chunk) + * tool call → `session/update` (tool_call) + * tool result → `session/update` (tool_call_update) + * turn end → `_posthog/turn_complete` notification + */ + +import type { + ContentBlock, + SessionUpdate, + ToolCallContent, + ToolCallStatus, +} from "@agentclientprotocol/sdk"; +import { POSTHOG_NOTIFICATIONS } from "@posthog/agent/acp-extensions"; +import type { AcpMessage } from "@posthog/shared"; + +/** A plain-text ACP content block. */ +export function textBlock(text: string): ContentBlock { + return { type: "text", text }; +} + +/** A tool-call content item wrapping plain text (rendered under the call). */ +export function textToolContent(text: string): ToolCallContent { + return { type: "content", content: { type: "text", text } }; +} + +/** + * A `session/prompt` JSON-RPC request. The builder keys a turn off the request + * `id` and renders the prompt text as the user bubble, so each user message + * needs a unique, monotonic id within the conversation. + */ +export function promptRequestMessage( + id: number, + text: string, + ts: number, +): AcpMessage { + return { + type: "acp_message", + ts, + message: { + jsonrpc: "2.0", + id, + method: "session/prompt", + params: { prompt: [textBlock(text)] }, + }, + }; +} + +/** A `session/update` notification wrapping an ACP `SessionUpdate`. */ +export function sessionUpdateMessage( + update: SessionUpdate, + ts: number, +): AcpMessage { + return { + type: "acp_message", + ts, + message: { + jsonrpc: "2.0", + method: "session/update", + params: { update }, + }, + }; +} + +/** A `_posthog/turn_complete` notification — closes the active turn. */ +export function turnCompleteMessage( + ts: number, + stopReason?: string, +): AcpMessage { + return { + type: "acp_message", + ts, + message: { + jsonrpc: "2.0", + method: POSTHOG_NOTIFICATIONS.TURN_COMPLETE, + params: stopReason ? { stopReason } : {}, + }, + }; +} + +// --- SessionUpdate builders ------------------------------------------------- + +/** Streaming/settled assistant text fragment. */ +export function agentTextUpdate(text: string): SessionUpdate { + return { sessionUpdate: "agent_message_chunk", content: textBlock(text) }; +} + +/** Assistant thinking/reasoning fragment. */ +export function agentThoughtUpdate(text: string): SessionUpdate { + return { sessionUpdate: "agent_thought_chunk", content: textBlock(text) }; +} + +/** + * A new tool call. Status is left to a later `tool_call_update` (from the + * tool result) so a call without a result reads as still-running rather than + * falsely completed. + */ +export function toolCallStartUpdate( + toolCallId: string, + title: string, + rawInput?: unknown, + status?: ToolCallStatus, +): SessionUpdate { + return { + sessionUpdate: "tool_call", + toolCallId, + title, + ...(rawInput === undefined ? {} : { rawInput }), + ...(status ? { status } : {}), + }; +} + +/** Merge canonical args onto an existing tool call. */ +export function toolCallArgsUpdate( + toolCallId: string, + title: string, + rawInput: unknown, +): SessionUpdate { + return { sessionUpdate: "tool_call_update", toolCallId, title, rawInput }; +} + +/** Finalize a tool call with its outcome (text body + completed/failed). */ +export function toolResultUpdate( + toolCallId: string, + text: string, + isError: boolean, + rawOutput?: unknown, +): SessionUpdate { + return { + sessionUpdate: "tool_call_update", + toolCallId, + status: isError ? "failed" : "completed", + content: [textToolContent(text)], + ...(rawOutput === undefined ? {} : { rawOutput }), + }; +} diff --git a/packages/ui/src/features/agent-applications/chat/chatHistoryStore.ts b/packages/ui/src/features/agent-applications/chat/chatHistoryStore.ts new file mode 100644 index 0000000000..3473ef7c6d --- /dev/null +++ b/packages/ui/src/features/agent-applications/chat/chatHistoryStore.ts @@ -0,0 +1,56 @@ +import { electronStorage } from "@posthog/ui/shell/rendererStorage"; +import { create } from "zustand"; +import { persist } from "zustand/middleware"; + +/** + * Locally-persisted index of preview chats the user started against an agent + * *from this app*. These are the only sessions surfaced in the chat pane's + * rail — deliberately NOT the agent's full server session list, which can + * include real customer conversations. Keyed by agent slug; each entry is just + * enough to re-attach (`/listen` replays the transcript) and label the rail. + */ +export interface PreviewChatEntry { + sessionId: string; + /** First user message of the chat, for the rail label. */ + title: string; + /** Epoch ms when the chat was started here. */ + startedAt: number; +} + +interface ChatHistoryState { + byAgent: Record; + /** Record (or move-to-top) a preview chat the user started here. */ + record: (agentKey: string, entry: PreviewChatEntry) => void; + remove: (agentKey: string, sessionId: string) => void; +} + +/** Per-agent cap; preview chats are throwaway, so an old tail is fine to drop. */ +const MAX_PER_AGENT = 50; + +export const useChatHistoryStore = create()( + persist( + (set) => ({ + byAgent: {}, + record: (agentKey, entry) => + set((s) => { + const existing = s.byAgent[agentKey] ?? []; + // Newest first, de-duped by sessionId, capped. + const next = [ + entry, + ...existing.filter((e) => e.sessionId !== entry.sessionId), + ].slice(0, MAX_PER_AGENT); + return { byAgent: { ...s.byAgent, [agentKey]: next } }; + }), + remove: (agentKey, sessionId) => + set((s) => ({ + byAgent: { + ...s.byAgent, + [agentKey]: (s.byAgent[agentKey] ?? []).filter( + (e) => e.sessionId !== sessionId, + ), + }, + })), + }), + { name: "agent-preview-chats", storage: electronStorage }, + ), +); diff --git a/packages/ui/src/features/agent-applications/chat/consoleContext.test.ts b/packages/ui/src/features/agent-applications/chat/consoleContext.test.ts new file mode 100644 index 0000000000..d2482a0274 --- /dev/null +++ b/packages/ui/src/features/agent-applications/chat/consoleContext.test.ts @@ -0,0 +1,33 @@ +import { describe, expect, it } from "vitest"; +import { + buildConsoleContextEnvelope, + stripConsoleContext, +} from "./consoleContext"; + +describe("console context envelope", () => { + it("round-trips: strip removes a prepended envelope", () => { + const envelope = buildConsoleContextEnvelope({ page: "agent", agent: "x" }); + const wire = `${envelope}\n\nHello there`; + expect(stripConsoleContext(wire)).toBe("Hello there"); + }); + + it("leaves plain text untouched", () => { + expect(stripConsoleContext("just a message")).toBe("just a message"); + }); + + it("only strips a leading envelope, not one mid-message", () => { + const text = + "please render [console-context]{}[/console-context] literally"; + expect(stripConsoleContext(text)).toBe(text); + }); + + it("tolerates leading whitespace before the envelope", () => { + const wire = ` ${buildConsoleContextEnvelope({ page: "agent-list" })}\n\nhi`; + expect(stripConsoleContext(wire)).toBe("hi"); + }); + + it("is a no-op when the closing delimiter is missing", () => { + const text = "[console-context]{oops"; + expect(stripConsoleContext(text)).toBe(text); + }); +}); diff --git a/packages/ui/src/features/agent-applications/chat/consoleContext.ts b/packages/ui/src/features/agent-applications/chat/consoleContext.ts new file mode 100644 index 0000000000..4ae9d0bb6a --- /dev/null +++ b/packages/ui/src/features/agent-applications/chat/consoleContext.ts @@ -0,0 +1,29 @@ +/** + * The agent builder's "what am I looking at" envelope. Mirrors the console: the + * current page context is prepended to the *first* user message of a session, + * delimited so the agent can resolve deictic references ("this agent", "this + * session") without asking — and so the client can strip it before display. + */ + +const OPEN = "[console-context]"; +const CLOSE = "[/console-context]"; + +/** Wrap a context object in the delimited envelope (prepended to msg one). */ +export function buildConsoleContextEnvelope(context: unknown): string { + return `${OPEN}\n${JSON.stringify(context)}\n${CLOSE}`; +} + +/** + * Strip a leading console-context block (and the blank line after it) so the + * envelope never shows in the rendered transcript. No-op when absent. + */ +export function stripConsoleContext(text: string): string { + const start = text.indexOf(OPEN); + if (start === -1) return text; + const end = text.indexOf(CLOSE, start); + if (end === -1) return text; + const before = text.slice(0, start); + // Only strip a *leading* envelope (allow leading whitespace before it). + if (before.trim() !== "") return text; + return text.slice(end + CLOSE.length).replace(/^\s*\n/, ""); +} diff --git a/packages/ui/src/features/agent-applications/chat/conversationToAcp.test.ts b/packages/ui/src/features/agent-applications/chat/conversationToAcp.test.ts new file mode 100644 index 0000000000..fa6d7fc302 --- /dev/null +++ b/packages/ui/src/features/agent-applications/chat/conversationToAcp.test.ts @@ -0,0 +1,184 @@ +import type { + SessionNotification, + SessionUpdate, +} from "@agentclientprotocol/sdk"; +import type { + AcpMessage, + JsonRpcNotification, + JsonRpcRequest, +} from "@posthog/shared"; +import type { AgentConversationMessage } from "@posthog/shared/agent-platform-types"; +import { describe, expect, it } from "vitest"; +import { + conversationToAcpMessages, + userMessageText, +} from "./conversationToAcp"; + +function methodOf(m: AcpMessage): string | undefined { + return "method" in m.message ? m.message.method : undefined; +} + +function updateOf(m: AcpMessage): SessionUpdate { + const params = (m.message as JsonRpcNotification) + .params as SessionNotification; + return params.update; +} + +describe("conversationToAcpMessages", () => { + it("maps a single user→assistant turn to prompt + text chunk + turn_complete", () => { + const convo: AgentConversationMessage[] = [ + { role: "user", content: "hello there", timestamp: 1000 }, + { + role: "assistant", + content: [{ type: "text", text: "hi back" }], + timestamp: 2000, + }, + ]; + + const out = conversationToAcpMessages(convo); + + expect(out.map(methodOf)).toEqual([ + "session/prompt", + "session/update", + "_posthog/turn_complete", + ]); + const prompt = out[0].message as JsonRpcRequest<{ + prompt: { type: string; text: string }[]; + }>; + expect(prompt.id).toBe(1); + expect(prompt.params?.prompt[0]).toEqual({ + type: "text", + text: "hello there", + }); + expect(updateOf(out[1])).toEqual({ + sessionUpdate: "agent_message_chunk", + content: { type: "text", text: "hi back" }, + }); + }); + + it("emits thinking as an agent_thought_chunk", () => { + const out = conversationToAcpMessages([ + { role: "user", content: "think", timestamp: 1 }, + { + role: "assistant", + content: [ + { type: "thinking", thinking: "hmm" }, + { type: "text", text: "answer" }, + ], + timestamp: 2, + }, + ]); + expect(updateOf(out[1])).toEqual({ + sessionUpdate: "agent_thought_chunk", + content: { type: "text", text: "hmm" }, + }); + expect(updateOf(out[2]).sessionUpdate).toBe("agent_message_chunk"); + }); + + it("maps a tool call + its result onto the same toolCallId", () => { + const out = conversationToAcpMessages([ + { role: "user", content: "run it", timestamp: 1 }, + { + role: "assistant", + content: [ + { + type: "toolCall", + id: "call_1", + name: "@posthog/query", + arguments: { sql: "select 1" }, + }, + ], + timestamp: 2, + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "@posthog/query", + content: [{ type: "text", text: "42" }], + isError: false, + timestamp: 3, + }, + ]); + + const call = updateOf(out[1]); + expect(call).toMatchObject({ + sessionUpdate: "tool_call", + toolCallId: "call_1", + title: "@posthog/query", + rawInput: { sql: "select 1" }, + }); + // No premature status on the call itself. + expect("status" in call).toBe(false); + + const result = updateOf(out[2]); + expect(result).toMatchObject({ + sessionUpdate: "tool_call_update", + toolCallId: "call_1", + status: "completed", + content: [{ type: "content", content: { type: "text", text: "42" } }], + }); + }); + + it("marks an errored tool result as failed", () => { + const out = conversationToAcpMessages([ + { role: "user", content: "x", timestamp: 1 }, + { + role: "assistant", + content: [{ type: "toolCall", id: "c1", name: "t", arguments: {} }], + timestamp: 2, + }, + { + role: "toolResult", + toolCallId: "c1", + toolName: "t", + content: [{ type: "text", text: "boom" }], + isError: true, + timestamp: 3, + }, + ]); + expect(updateOf(out[2])).toMatchObject({ status: "failed" }); + }); + + it("closes the prior turn before each new user prompt and gives unique ids", () => { + const out = conversationToAcpMessages([ + { role: "user", content: "first", timestamp: 1 }, + { + role: "assistant", + content: [{ type: "text", text: "a" }], + timestamp: 2, + }, + { role: "user", content: "second", timestamp: 3 }, + { + role: "assistant", + content: [{ type: "text", text: "b" }], + timestamp: 4, + }, + ]); + + expect(out.map(methodOf)).toEqual([ + "session/prompt", + "session/update", + "_posthog/turn_complete", + "session/prompt", + "session/update", + "_posthog/turn_complete", + ]); + expect((out[0].message as JsonRpcRequest).id).toBe(1); + expect((out[3].message as JsonRpcRequest).id).toBe(2); + }); + + it("returns nothing for an empty conversation", () => { + expect(conversationToAcpMessages([])).toEqual([]); + }); + + it("flattens array-form user content", () => { + expect( + userMessageText([ + { type: "text", text: "a" }, + { type: "image", url: "x" }, + { type: "text", text: "b" }, + ]), + ).toBe("ab"); + expect(userMessageText("plain")).toBe("plain"); + }); +}); diff --git a/packages/ui/src/features/agent-applications/chat/conversationToAcp.ts b/packages/ui/src/features/agent-applications/chat/conversationToAcp.ts new file mode 100644 index 0000000000..782316affb --- /dev/null +++ b/packages/ui/src/features/agent-applications/chat/conversationToAcp.ts @@ -0,0 +1,119 @@ +/** + * Maps a stored agent_platform conversation transcript (the pi-ai + * `conversation` array returned by the session-detail endpoint) into the + * `AcpMessage[]` that code's `ConversationView` renders. + * + * Pure and order-preserving. Tool results arrive as their own `toolResult` + * messages after the assistant turn that issued the call; we emit them as + * `tool_call_update`s keyed by `toolCallId`, and the builder attaches them to + * the matching call. Each `user` message opens a new turn, so we close the + * previous one with a `_posthog/turn_complete` first (and once more at the + * end) to bracket turns for duration/finalization. + */ + +import type { AcpMessage } from "@posthog/shared"; +import type { + AgentAssistantContentPart, + AgentConversationMessage, + AgentUserContentPart, +} from "@posthog/shared/agent-platform-types"; +import { + agentTextUpdate, + agentThoughtUpdate, + promptRequestMessage, + sessionUpdateMessage, + toolCallStartUpdate, + toolResultUpdate, + turnCompleteMessage, +} from "./acpEnvelope"; + +/** Flatten a user message's content (string shorthand or text/image parts). */ +export function userMessageText( + content: string | AgentUserContentPart[], +): string { + if (typeof content === "string") { + return content; + } + return content + .map((part) => (part.type === "text" ? part.text : "")) + .join(""); +} + +/** Concatenate the text parts of a tool result's content. */ +function toolResultText(content: { type: "text"; text: string }[]): string { + return content.map((part) => part.text).join(""); +} + +function assistantPartToMessage( + part: AgentAssistantContentPart, + ts: number, +): AcpMessage | null { + switch (part.type) { + case "text": + return sessionUpdateMessage(agentTextUpdate(part.text), ts); + case "thinking": + return sessionUpdateMessage(agentThoughtUpdate(part.thinking), ts); + case "toolCall": + return sessionUpdateMessage( + toolCallStartUpdate(part.id, part.name, part.arguments), + ts, + ); + default: + return null; + } +} + +export function conversationToAcpMessages( + messages: AgentConversationMessage[], +): AcpMessage[] { + const out: AcpMessage[] = []; + let promptId = 0; + let turnOpen = false; + let lastTs = 0; + + for (const message of messages) { + const ts = message.timestamp; + lastTs = ts; + + if (message.role === "user") { + // A new user prompt starts a new turn — close the prior one first. + if (turnOpen) { + out.push(turnCompleteMessage(ts)); + turnOpen = false; + } + promptId += 1; + out.push( + promptRequestMessage(promptId, userMessageText(message.content), ts), + ); + turnOpen = true; + continue; + } + + if (message.role === "assistant") { + for (const part of message.content) { + const acp = assistantPartToMessage(part, ts); + if (acp) { + out.push(acp); + } + } + continue; + } + + // toolResult — finalize the matching tool call within the open turn. + out.push( + sessionUpdateMessage( + toolResultUpdate( + message.toolCallId, + toolResultText(message.content), + message.isError, + ), + ts, + ), + ); + } + + if (turnOpen) { + out.push(turnCompleteMessage(lastTs)); + } + return out; +} diff --git a/packages/ui/src/features/agent-applications/chat/sessionEventToAcp.test.ts b/packages/ui/src/features/agent-applications/chat/sessionEventToAcp.test.ts new file mode 100644 index 0000000000..a4b015d719 --- /dev/null +++ b/packages/ui/src/features/agent-applications/chat/sessionEventToAcp.test.ts @@ -0,0 +1,201 @@ +import type { + SessionNotification, + SessionUpdate, +} from "@agentclientprotocol/sdk"; +import type { + AcpMessage, + JsonRpcNotification, + JsonRpcRequest, +} from "@posthog/shared"; +import type { AgentSessionEvent } from "@posthog/shared/agent-platform-types"; +import { describe, expect, it } from "vitest"; +import { + createAgentChatMapper, + sessionEventsToAcpMessages, +} from "./sessionEventToAcp"; + +const TS = "2024-01-01T00:00:00.000Z"; + +function ev( + kind: K, + data: Extract["data"], +): AgentSessionEvent { + return { session_id: "s1", ts: TS, kind, data } as AgentSessionEvent; +} + +function methodOf(m: AcpMessage): string | undefined { + return "method" in m.message ? m.message.method : undefined; +} + +function updateOf(m: AcpMessage): SessionUpdate { + const params = (m.message as JsonRpcNotification) + .params as SessionNotification; + return params.update; +} + +describe("createAgentChatMapper", () => { + it("maps user messages to prompt requests with monotonic ids", () => { + const mapper = createAgentChatMapper(); + const first = mapper.apply(ev("user_message", { text: "hi" })); + const second = mapper.apply(ev("user_message", { text: "again" })); + + expect(methodOf(first[0])).toBe("session/prompt"); + expect((first[0].message as JsonRpcRequest).id).toBe(1); + expect((second[0].message as JsonRpcRequest).id).toBe(2); + }); + + it("drops empty user messages", () => { + const mapper = createAgentChatMapper(); + expect(mapper.apply(ev("user_message", { text: "" }))).toEqual([]); + }); + + it("maps text and thinking deltas", () => { + const mapper = createAgentChatMapper(); + expect( + updateOf(mapper.apply(ev("assistant_text_delta", { text: "a" }))[0]), + ).toEqual({ + sessionUpdate: "agent_message_chunk", + content: { type: "text", text: "a" }, + }); + expect( + updateOf( + mapper.apply(ev("assistant_thinking_delta", { thinking: "t" }))[0], + ), + ).toEqual({ + sessionUpdate: "agent_thought_chunk", + content: { type: "text", text: "t" }, + }); + }); + + it("emits tool_call on first sighting, tool_call_update on follow-up", () => { + const mapper = createAgentChatMapper(); + const start = mapper.apply( + ev("tool_call_start", { id: "c1", name: "@posthog/query" }), + ); + expect(updateOf(start[0])).toMatchObject({ + sessionUpdate: "tool_call", + toolCallId: "c1", + title: "@posthog/query", + status: "in_progress", + }); + + // Canonical tool_call for an already-seen id → update (merges args). + const canonical = mapper.apply( + ev("tool_call", { id: "c1", name: "@posthog/query", args: { sql: "x" } }), + ); + expect(updateOf(canonical[0])).toMatchObject({ + sessionUpdate: "tool_call_update", + toolCallId: "c1", + rawInput: { sql: "x" }, + }); + }); + + it("emits tool_call when the canonical event is the first sighting", () => { + const mapper = createAgentChatMapper(); + const out = mapper.apply( + ev("tool_call", { id: "c2", name: "t", args: { a: 1 } }), + ); + expect(updateOf(out[0])).toMatchObject({ + sessionUpdate: "tool_call", + toolCallId: "c2", + rawInput: { a: 1 }, + }); + }); + + it("maps a successful tool_result to a completed update", () => { + const mapper = createAgentChatMapper(); + mapper.apply(ev("tool_call_start", { id: "c1", name: "t" })); + const out = mapper.apply( + ev("tool_result", { id: "c1", ok: true, output: { rows: 1 } }), + ); + expect(updateOf(out[0])).toMatchObject({ + sessionUpdate: "tool_call_update", + toolCallId: "c1", + status: "completed", + }); + }); + + it("maps an errored tool_result to a failed update with the error text", () => { + const mapper = createAgentChatMapper(); + mapper.apply(ev("tool_call_start", { id: "c1", name: "t" })); + const out = mapper.apply( + ev("tool_result", { id: "c1", ok: false, error: "boom" }), + ); + const update = updateOf(out[0]); + expect(update).toMatchObject({ status: "failed" }); + expect(JSON.stringify(update)).toContain("boom"); + }); + + it("synthesizes a call for a tool_result with no prior start", () => { + const mapper = createAgentChatMapper(); + const out = mapper.apply( + ev("tool_result", { + id: "orphan", + tool: "mystery", + ok: true, + output: "ok", + }), + ); + expect(out).toHaveLength(2); + expect(updateOf(out[0])).toMatchObject({ + sessionUpdate: "tool_call", + toolCallId: "orphan", + }); + expect(updateOf(out[1])).toMatchObject({ + sessionUpdate: "tool_call_update", + status: "completed", + }); + }); + + it("drops streaming arg deltas, snapshots, and lifecycle frames", () => { + const mapper = createAgentChatMapper(); + expect(mapper.apply(ev("session_started", {}))).toEqual([]); + expect(mapper.apply(ev("turn_started", { turn: 1 }))).toEqual([]); + expect( + mapper.apply(ev("tool_call_args_delta", { id: "c1", argsDelta: '{"a' })), + ).toEqual([]); + expect(mapper.apply(ev("assistant_text", { text: "full" }))).toEqual([]); + expect(mapper.apply(ev("closed", {}))).toEqual([]); + }); + + it("maps completed/waiting/failed to turn_complete", () => { + const mapper = createAgentChatMapper(); + expect(methodOf(mapper.apply(ev("completed", {}))[0])).toBe( + "_posthog/turn_complete", + ); + expect(methodOf(mapper.apply(ev("waiting", {}))[0])).toBe( + "_posthog/turn_complete", + ); + const failed = mapper.apply(ev("failed", { reason: "x" })); + expect((failed[0].message as JsonRpcNotification).params).toEqual({ + stopReason: "failed", + }); + }); + + it("folds a full streaming turn end-to-end", () => { + const out = sessionEventsToAcpMessages([ + ev("session_started", {}), + ev("user_message", { text: "hello" }), + ev("turn_started", { turn: 1 }), + ev("assistant_thinking_delta", { thinking: "let me" }), + ev("assistant_text_delta", { text: "Hi " }), + ev("assistant_text_delta", { text: "there" }), + ev("tool_call_start", { id: "c1", name: "@posthog/query" }), + ev("tool_call", { id: "c1", name: "@posthog/query", args: { sql: "1" } }), + ev("tool_result", { id: "c1", ok: true, output: "done" }), + ev("assistant_text", { text: "Hi there" }), + ev("completed", {}), + ]); + + expect(out.map(methodOf)).toEqual([ + "session/prompt", + "session/update", // thinking + "session/update", // text "Hi " + "session/update", // text "there" + "session/update", // tool_call + "session/update", // tool_call_update (args) + "session/update", // tool_call_update (result) + "_posthog/turn_complete", + ]); + }); +}); diff --git a/packages/ui/src/features/agent-applications/chat/sessionEventToAcp.ts b/packages/ui/src/features/agent-applications/chat/sessionEventToAcp.ts new file mode 100644 index 0000000000..d35419033e --- /dev/null +++ b/packages/ui/src/features/agent-applications/chat/sessionEventToAcp.ts @@ -0,0 +1,222 @@ +/** + * Maps the live agent-ingress SSE stream (`AgentSessionEvent`) into the + * `AcpMessage[]` code's `ConversationView` renders, incrementally. + * + * Stateful by necessity: user messages need monotonic JSON-RPC request ids, + * and tool-call lifecycle events arrive by id and must be distinguished + * between "first sighting" (emit a `tool_call`) and "follow-up" (emit a + * `tool_call_update` the builder merges). A fresh mapper is created per + * session/stream. + * + * As with the stored-transcript mapper, code's `buildConversationItems` does + * all the accumulation — this only translates the wire shape. Streaming + * tool-call arg deltas (`tool_call_args_delta`) are intentionally dropped: + * the canonical `tool_call` event carries the full args a beat later, and + * rendering half-streamed JSON as `rawInput` reads worse than a brief gap. + */ + +import type { AcpMessage } from "@posthog/shared"; +import type { AgentSessionEvent } from "@posthog/shared/agent-platform-types"; +import { + agentTextUpdate, + agentThoughtUpdate, + promptRequestMessage, + sessionUpdateMessage, + toolCallArgsUpdate, + toolCallStartUpdate, + toolResultUpdate, + turnCompleteMessage, +} from "./acpEnvelope"; +import { stripConsoleContext } from "./consoleContext"; + +function toEpochMs(iso: string): number { + const ms = Date.parse(iso); + return Number.isNaN(ms) ? 0 : ms; +} + +/** Render a tool result's `output`/`error` payload as display text. */ +function outputText(value: unknown): string { + if (value == null) { + return ""; + } + if (typeof value === "string") { + return value; + } + try { + return JSON.stringify(value, null, 2); + } catch { + return String(value); + } +} + +export interface AgentChatMapper { + /** + * Optimistically emit the user's just-sent message so it renders the instant + * they hit send, before the network round-trip. The stream echoes the same + * message back a beat later as a `user_message` event — that echo is swallowed + * (matched by text, FIFO) so it isn't rendered twice. + */ + seedUserMessage(text: string, ts?: number): AcpMessage[]; + /** + * Continue prompt (request) id numbering past `count` restored turns, so a + * follow-up message on a resumed chat doesn't collide with a turn rebuilt + * from the stored transcript. + */ + setPromptIdBase(count: number): void; + /** Translate one SSE event into zero or more ACP messages. */ + apply(event: AgentSessionEvent): AcpMessage[]; +} + +export function createAgentChatMapper(): AgentChatMapper { + let promptId = 0; + const seenToolCalls = new Set(); + // Texts shown optimistically and awaiting their echoed `user_message` frame. + const pendingOptimistic: string[] = []; + + return { + seedUserMessage(text: string, ts?: number): AcpMessage[] { + if (!text) { + return []; + } + promptId += 1; + pendingOptimistic.push(text); + return [promptRequestMessage(promptId, text, ts ?? Date.now())]; + }, + + setPromptIdBase(count: number): void { + promptId = Math.max(promptId, count); + }, + + apply(event: AgentSessionEvent): AcpMessage[] { + const ts = toEpochMs(event.ts); + + switch (event.kind) { + case "user_message": { + if (!event.data.text) { + return []; + } + // The first message may carry a agent builder context envelope — strip it + // so it never shows in the transcript (and so dedup matches the clean + // optimistic text the composer rendered). + const text = stripConsoleContext(event.data.text); + // Already rendered optimistically on send — swallow the echo. + if (pendingOptimistic[0] === text) { + pendingOptimistic.shift(); + return []; + } + promptId += 1; + return [promptRequestMessage(promptId, text, ts)]; + } + + case "assistant_text_delta": + return event.data.text + ? [sessionUpdateMessage(agentTextUpdate(event.data.text), ts)] + : []; + + case "assistant_thinking_delta": + return event.data.thinking + ? [ + sessionUpdateMessage( + agentThoughtUpdate(event.data.thinking), + ts, + ), + ] + : []; + + case "tool_call_start": { + const { id, name } = event.data; + if (seenToolCalls.has(id)) { + return []; + } + seenToolCalls.add(id); + return [ + sessionUpdateMessage( + toolCallStartUpdate(id, name, undefined, "in_progress"), + ts, + ), + ]; + } + + case "tool_call": { + const { id, name, args } = event.data; + if (seenToolCalls.has(id)) { + return [ + sessionUpdateMessage( + toolCallArgsUpdate(id, name, args ?? {}), + ts, + ), + ]; + } + seenToolCalls.add(id); + return [ + sessionUpdateMessage( + toolCallStartUpdate(id, name, args ?? {}, "in_progress"), + ts, + ), + ]; + } + + case "tool_result": { + const { id, ok, output, error, tool } = event.data; + const isError = ok === false; + const text = isError ? (error ?? "tool failed") : outputText(output); + const messages: AcpMessage[] = []; + // Defensive: a result for a call we never saw start — synthesize the + // call so the builder has something to attach the result to. + if (!seenToolCalls.has(id)) { + seenToolCalls.add(id); + messages.push( + sessionUpdateMessage(toolCallStartUpdate(id, tool ?? "tool"), ts), + ); + } + messages.push( + sessionUpdateMessage( + toolResultUpdate(id, text, isError, output), + ts, + ), + ); + return messages; + } + + case "client_tool_result": { + const { call_id, result, error } = event.data; + const isError = typeof error === "string"; + const text = isError + ? (error ?? "client tool failed") + : outputText(result); + return [ + sessionUpdateMessage( + toolResultUpdate(call_id, text, isError, result), + ts, + ), + ]; + } + + case "completed": + return [turnCompleteMessage(ts)]; + + case "waiting": + return [turnCompleteMessage(ts)]; + + case "failed": + return [turnCompleteMessage(ts, "failed")]; + + // Frames that carry no renderable transcript content. `turn_started` + // is implicit in the prompt request; `assistant_text` is a turn-end + // snapshot the deltas already produced; `tool_call_args_delta` is + // dropped (see file header); `client_tool_call` is handled by the + // host transport (M4); the rest are lifecycle bookkeeping. + default: + return []; + } + }, + }; +} + +/** Fold a full event buffer through a fresh mapper — handy for replay/tests. */ +export function sessionEventsToAcpMessages( + events: AgentSessionEvent[], +): AcpMessage[] { + const mapper = createAgentChatMapper(); + return events.flatMap((event) => mapper.apply(event)); +} diff --git a/packages/ui/src/features/agent-applications/components/AgentAnalyticsView.tsx b/packages/ui/src/features/agent-applications/components/AgentAnalyticsView.tsx new file mode 100644 index 0000000000..ab0b63d4d0 --- /dev/null +++ b/packages/ui/src/features/agent-applications/components/AgentAnalyticsView.tsx @@ -0,0 +1,415 @@ +import { + ArrowDownIcon, + ArrowSquareOutIcon, + ArrowUpIcon, + ChartLineIcon, + WarningIcon, +} from "@phosphor-icons/react"; +import { + BarChart, + type Series, + Sparkline, + useChartTheme, +} from "@posthog/quill-charts"; +import type { + AgentAnalyticsData, + AgentAnalyticsModelRow, + AgentAnalyticsToolRow, +} from "@posthog/shared/agent-platform-types"; +import { openExternalUrl } from "@posthog/ui/shell/openExternal"; +import { Flex, Text } from "@radix-ui/themes"; +import type { ReactNode } from "react"; + +const usd = (v: number): string => + v >= 100 ? `$${v.toFixed(0)}` : `$${v.toFixed(2)}`; +const pct = (v: number): string => `${(v * 100).toFixed(1)}%`; +const secs = (v: number): string => `${v.toFixed(v < 10 ? 1 : 0)}s`; +const int = (v: number): string => v.toLocaleString(); + +/** + * The agent observability dashboard: top-line KPIs with 14-day spark trends + + * WoW deltas plus charts. Pure presentation — `useAgentAnalytics` runs the + * HogQL and shapes the data; this renders one of loading / error / empty / + * populated. + * + * `scope` "overview" is the fleet board blended into the Applications landing + * (KPIs + spend-by-agent + cost-by-model; the agent list below carries the + * per-agent breakdown). "agent" is the per-agent Observability tab (KPIs + + * cost-by-model + tool reliability — spend-by-agent is meaningless for one). + */ +export function AgentAnalyticsView({ + data, + title = "Observability", + subtitle = "Last 7 days · 14-day trend", + aiObservabilityUrl, + isLoading, + isError, + errorMessage, +}: { + data: AgentAnalyticsData | undefined; + title?: string; + subtitle?: string; + /** Deep link into the team's AI observability product for trace-level depth. */ + aiObservabilityUrl?: string | null; + isLoading?: boolean; + isError?: boolean; + errorMessage?: string | null; +}) { + return ( + + + {title ? ( + + + {title} + + {subtitle} + + ) : null} + {aiObservabilityUrl ? ( + + ) : null} + + + {isLoading && !data ? ( + + ) : isError ? ( + + ) : !data || data.empty ? ( + + ) : ( + <> + + + + + + + + + )} + + ); +} + +/* ── KPIs ─────────────────────────────────────────────────────────── */ + +/** + * The four top-line KPI tiles (spend / sessions / failure rate / p95) with + * 14-day spark trends + WoW deltas. Reused standalone on the per-agent Overview + * tab, where it owns its own loading / empty rendering. + */ +export function AgentAnalyticsKpiStrip({ + data, + isLoading, +}: { + data: AgentAnalyticsData | undefined; + isLoading?: boolean; +}) { + if (isLoading && !data) { + return ( +
+ {[0, 1, 2, 3].map((i) => ( + + ))} +
+ ); + } + if (!data || data.empty) { + return ; + } + const { kpis, deltas, daily } = data; + return ( + + + + 0} + /> + + + ); +} + +function KpiTile({ + label, + value, + delta, + deltaUnit = "%", + goodDirection, + attention, + trend, + last, +}: { + label: string; + value: string; + delta?: number | null; + deltaUnit?: "%" | "pp"; + goodDirection?: "up" | "down"; + attention?: boolean; + trend?: number[]; + last?: boolean; +}) { + const theme = useChartTheme(); + const hasDelta = + delta != null && Number.isFinite(delta) && goodDirection != null; + const hasTrend = (trend?.length ?? 0) > 1; + return ( + + + {label} + + + + {value} + + {hasDelta ? ( + + ) : null} + + {hasTrend ? ( +
+ +
+ ) : ( + + {hasDelta ? "vs prior 7d" : " "} + + )} +
+ ); +} + +function DeltaChip({ + value, + unit, + goodDirection, +}: { + value: number; + unit: "%" | "pp"; + goodDirection: "up" | "down"; +}) { + const up = value >= 0; + const good = goodDirection === "up" ? up : !up; + const Arrow = up ? ArrowUpIcon : ArrowDownIcon; + const magnitude = + unit === "pp" + ? Math.abs(value).toFixed(1) + : String(Math.abs(Math.round(value))); + return ( + + + {magnitude} + {unit} + + ); +} + +/* ── Charts ───────────────────────────────────────────────────────── */ + +function CostByModelChart({ rows }: { rows: AgentAnalyticsModelRow[] }) { + const theme = useChartTheme(); + if (rows.length === 0) { + return ; + } + const series: Series[] = [ + { key: "cost", label: "Cost (USD)", data: rows.map((r) => r.spendUsd) }, + ]; + return ( +
+ r.model)} + config={{ + axisOrientation: "horizontal", + showGrid: false, + bars: { fitToHeight: true }, + }} + theme={theme} + /> +
+ ); +} + +/* ── Tables ───────────────────────────────────────────────────────── */ + +function ToolTable({ rows }: { rows: AgentAnalyticsToolRow[] }) { + if (rows.length === 0) { + return ; + } + return ( + + + + + + + + + + + {rows.map((r) => ( + + + + + + + ))} + +
ToolCallsErrorsError rate
{r.tool}{int(r.calls)}{int(r.errors)} + 0 ? "text-(--red-11)" : "text-gray-12"} + > + {pct(r.errorRate)} + +
+ ); +} + +/* ── Primitives ───────────────────────────────────────────────────── */ + +function Panel({ title, children }: { title: string; children: ReactNode }) { + return ( +
+
+ + {title} + +
+
{children}
+
+ ); +} + +function Th({ + children, + className, +}: { + children: ReactNode; + className?: string; +}) { + return ( + + {children} + + ); +} + +function Td({ children }: { children: ReactNode }) { + return ( + + {children} + + ); +} + +function EmptyHint({ text }: { text: string }) { + return {text}; +} + +function EmptyState() { + return ( + + + + No AI activity yet + + + Once your agents run, their model calls, tool spans, cost and latency + show up here — and in full detail in AI observability. + + + ); +} + +function ErrorState({ message }: { message?: string | null }) { + return ( + + + + Couldn't load analytics + + + {message ?? "The query endpoint returned an error."} + + + ); +} + +function LoadingSkeleton() { + return ( + +
+ {[0, 1, 2, 3].map((i) => ( + + ))} +
+ + +
+ ); +} + +function Skel({ className }: { className?: string }) { + return ( +
+ ); +} diff --git a/packages/ui/src/features/agent-applications/components/AgentApplicationDetailView.tsx b/packages/ui/src/features/agent-applications/components/AgentApplicationDetailView.tsx new file mode 100644 index 0000000000..9ca31c8d7f --- /dev/null +++ b/packages/ui/src/features/agent-applications/components/AgentApplicationDetailView.tsx @@ -0,0 +1,80 @@ +import { Flex, Text } from "@radix-ui/themes"; +import { Link } from "@tanstack/react-router"; +import { useAgentAnalytics } from "../hooks/useAgentAnalytics"; +import { useAgentApplication } from "../hooks/useAgentApplication"; +import { useAgentApplicationSessions } from "../hooks/useAgentApplicationSessions"; +import { AgentAnalyticsKpiStrip } from "./AgentAnalyticsView"; +import { AgentDetailEmptyState, AgentDetailLayout } from "./AgentDetailLayout"; +import { AgentSessionRow } from "./AgentSessionRow"; + +/** + * Per-agent Overview pane: the top-level observability KPIs (spend / sessions / + * failure rate / p95 over the last 7 days, with trends + WoW deltas — the same + * metrics as the Observability tab) plus recent sessions. Rendered inside the + * shared {@link AgentDetailLayout} tab shell. + */ +export function AgentApplicationDetailView({ idOrSlug }: { idOrSlug: string }) { + const { data: application } = useAgentApplication(idOrSlug); + const { data: analytics, isLoading: analyticsLoading } = useAgentAnalytics( + application?.id, + "agent", + ); + const { data: sessions, isLoading: sessionsLoading } = + useAgentApplicationSessions(idOrSlug, { limit: 25 }); + + return ( + + +
+ + + Activity · last 7 days + + + View observability → + + + +
+ +
+ + Recent sessions + + {sessionsLoading ? ( + + {[0, 1, 2].map((i) => ( +
+ ))} + + ) : !sessions || sessions.results.length === 0 ? ( + + ) : ( + + {sessions.results.map((session) => ( + + ))} + + )} +
+
+
+ ); +} diff --git a/packages/ui/src/features/agent-applications/components/AgentApplicationsListView.tsx b/packages/ui/src/features/agent-applications/components/AgentApplicationsListView.tsx new file mode 100644 index 0000000000..11f38f5ae0 --- /dev/null +++ b/packages/ui/src/features/agent-applications/components/AgentApplicationsListView.tsx @@ -0,0 +1,256 @@ +import { + ArrowSquareOutIcon, + BroadcastIcon, + CaretRightIcon, + LockKeyIcon, + RobotIcon, +} from "@phosphor-icons/react"; +import type { + AgentAnalyticsAgentRow, + AgentApplication, +} from "@posthog/shared/agent-platform-types"; +import { AgentsTabLayout } from "@posthog/ui/features/agents/components/AgentsTabLayout"; +import { Badge } from "@posthog/ui/primitives/Badge"; +import { openExternalUrl } from "@posthog/ui/shell/openExternal"; +import { Flex, Text } from "@radix-ui/themes"; +import { Link } from "@tanstack/react-router"; +import { useMemo } from "react"; +import { useAuthStateValue } from "../../auth/store"; +import { useAgentAnalytics } from "../hooks/useAgentAnalytics"; +import { useAgentApplications } from "../hooks/useAgentApplications"; +import { useAgentFleetApprovals } from "../hooks/useAgentFleetApprovals"; +import { useAgentFleetLiveSessions } from "../hooks/useAgentFleetLiveSessions"; +import { formatSpendUsd } from "../utils/format"; +import { aiObservabilityTracesUrl } from "../utils/observabilityLinks"; +import { AgentAnalyticsKpiStrip } from "./AgentAnalyticsView"; +import { AgentDetailEmptyState } from "./AgentDetailLayout"; +import { AgentFleetLiveSessionsPanel } from "./AgentFleetLiveSessionsPanel"; + +/** + * The Applications tab: the fleet observability KPIs (spend / sessions / + * failure rate / p95 over the team's `$ai_*` events) blended on top of the list + * of deployed agents. The per-agent rollups from the same analytics query are + * merged into each list row as inline stats, so one fetch powers both the KPI + * strip and the rows. Each row links to the per-agent detail view. + */ +export function AgentApplicationsListView() { + const region = useAuthStateValue((s) => s.cloudRegion); + const projectId = useAuthStateValue((s) => s.currentProjectId); + + const { + data: applications, + isLoading, + isError, + error, + } = useAgentApplications(); + const { data: analytics, isLoading: analyticsLoading } = useAgentAnalytics(); + const { data: liveSessions } = useAgentFleetLiveSessions(); + const { data: queuedApprovals } = useAgentFleetApprovals({ state: "queued" }); + const aiObservabilityUrl = aiObservabilityTracesUrl(region, projectId); + const liveCount = liveSessions?.results.length ?? 0; + const pendingCount = queuedApprovals?.length ?? 0; + + // Index the per-agent rollups by application id so each row can show its own + // sessions / spend / failure rate without a second request. + const statsById = useMemo(() => { + const map = new Map(); + for (const row of analytics?.byAgent ?? []) { + map.set(row.id, row); + } + return map; + }, [analytics]); + + return ( + + + + +
+ + + Activity · last 7 days + + {aiObservabilityUrl ? ( + + ) : null} + + +
+ + + + + + Agents + + {isLoading ? ( + + ) : isError ? ( + + ) : !applications || applications.length === 0 ? ( + + ) : ( + applications.map((app) => ( + + )) + )} + +
+
+ ); +} + +function ApplicationRow({ + application, + stats, +}: { + application: AgentApplication; + stats?: AgentAnalyticsAgentRow; +}) { + const isLive = application.live_revision != null; + return ( + + + + + + + {application.name} + + + {isLive ? "Live" : "Draft"} + + + + {application.description?.trim() + ? application.description + : (application.slug ?? application.id)} + + + + + {stats ? : null} + + + + ); +} + +/** Inline 7-day rollups shown on an agent row, joined from the fleet query. */ +function RowStats({ stats }: { stats: AgentAnalyticsAgentRow }) { + return ( + + + + 0} + /> + + ); +} + +function RowStat({ + label, + value, + attention, +}: { + label: string; + value: string; + attention?: boolean; +}) { + return ( + + + {value} + + + {label} + + + ); +} + +/** + * Operational counts strip — restores the "live now / pending approvals" + * signals the M7 analytics KPIs displaced. Live count anchors the live-now + * panel below; pending links to the fleet approvals queue. + */ +function OperationalStrip({ + liveCount, + pendingCount, +}: { + liveCount: number; + pendingCount: number; +}) { + return ( + +
+ + + {liveCount} + + live now +
+ + + 0 ? "text-(--amber-11)" : "text-gray-12"}`} + > + {pendingCount} + + pending approval{pendingCount === 1 ? "" : "s"} + + +
+ ); +} + +function ApplicationsSkeleton() { + return ( + + {[0, 1, 2].map((i) => ( +
+ ))} + + ); +} diff --git a/packages/ui/src/features/agent-applications/components/AgentApprovalDecisionForm.tsx b/packages/ui/src/features/agent-applications/components/AgentApprovalDecisionForm.tsx new file mode 100644 index 0000000000..aa33a74832 --- /dev/null +++ b/packages/ui/src/features/agent-applications/components/AgentApprovalDecisionForm.tsx @@ -0,0 +1,120 @@ +import { CheckIcon, XIcon } from "@phosphor-icons/react"; +import type { + AgentApprovalRequest, + DecideApprovalRequest, +} from "@posthog/shared/agent-platform-types"; +import { Button } from "@posthog/ui/primitives/Button"; +import { Checkbox, Flex, Text, TextArea } from "@radix-ui/themes"; +import { useState } from "react"; + +/** + * Presentational approve/reject controls for a queued approval — proposed-args + * editor (when allowed), reason note, error surface, and the two action + * buttons. Owns its own UI state but takes the decision callback from the + * caller, so both the full `AgentApprovalDetail` (Approvals tab + Fleet route) + * and the inline `AgentChatPendingApprovalCard` (live chat preview) can wrap + * it with their own `useDecideAgentApproval` glue. + */ +export function AgentApprovalDecisionForm({ + approval, + busy, + error, + onSubmit, +}: { + approval: AgentApprovalRequest; + busy: boolean; + error: string | null; + onSubmit: (body: DecideApprovalRequest) => void; +}) { + const allowEdit = approval.approver_scope?.allow_edit === true; + const [reason, setReason] = useState(""); + const [editMode, setEditMode] = useState(false); + const [argsText, setArgsText] = useState(() => + JSON.stringify(approval.proposed_args, null, 2), + ); + const [parseError, setParseError] = useState(null); + + function submit(decision: "approve" | "reject") { + const body: DecideApprovalRequest = { decision }; + if (reason.trim()) body.reason = reason.trim(); + if (decision === "approve" && allowEdit && editMode) { + try { + body.edited_args = JSON.parse(argsText); + } catch (err) { + setParseError(err instanceof Error ? err.message : "Invalid JSON"); + return; + } + } + setParseError(null); + onSubmit(body); + } + + return ( + + {allowEdit ? ( + + + setEditMode(c === true)} + /> + Approve with edits + + + ) : null} + + {allowEdit && editMode ? ( +
+