diff --git a/lib/httpapi/claude.go b/lib/httpapi/claude.go index 641efe47..9ccefdd2 100644 --- a/lib/httpapi/claude.go +++ b/lib/httpapi/claude.go @@ -5,6 +5,11 @@ import ( st "github.com/coder/agentapi/lib/screentracker" ) +// formatPaste wraps message in bracketed paste escape sequences. +// These sequences start with ESC (\x1b), which TUI selection +// widgets (e.g. Claude Code's numbered-choice prompt) interpret +// as "cancel". For selection prompts, callers should use +// MessageTypeRaw to send raw keystrokes directly instead. func formatPaste(message string) []st.MessagePart { return []st.MessagePart{ // Bracketed paste mode start sequence diff --git a/lib/msgfmt/message_box.go b/lib/msgfmt/message_box.go index 1ac75c9e..ca86dd77 100644 --- a/lib/msgfmt/message_box.go +++ b/lib/msgfmt/message_box.go @@ -4,15 +4,22 @@ import ( "strings" ) +// containsHorizontalBorder reports whether the line contains a +// horizontal border made of box-drawing characters (─ or ╌). +func containsHorizontalBorder(line string) bool { + return strings.Contains(line, "───────────────") || + strings.Contains(line, "╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌") +} + // Usually something like -// ─────────────── +// ─────────────── (or ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌) // > -// ─────────────── +// ─────────────── (or ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌) // Used by Claude Code, Goose, and Aider. func findGreaterThanMessageBox(lines []string) int { for i := len(lines) - 1; i >= max(len(lines)-6, 0); i-- { if strings.Contains(lines[i], ">") { - if i > 0 && strings.Contains(lines[i-1], "───────────────") { + if i > 0 && containsHorizontalBorder(lines[i-1]) { return i - 1 } return i @@ -22,14 +29,14 @@ func findGreaterThanMessageBox(lines []string) int { } // Usually something like -// ─────────────── +// ─────────────── (or ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌) // | -// ─────────────── +// ─────────────── (or ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌) func findGenericSlimMessageBox(lines []string) int { for i := len(lines) - 3; i >= max(len(lines)-9, 0); i-- { - if strings.Contains(lines[i], "───────────────") && + if containsHorizontalBorder(lines[i]) && (strings.Contains(lines[i+1], "|") || strings.Contains(lines[i+1], "│") || strings.Contains(lines[i+1], "❯")) && - strings.Contains(lines[i+2], "───────────────") { + containsHorizontalBorder(lines[i+2]) { return i } } diff --git a/lib/msgfmt/testdata/initialization/claude/ready/msg_dashed_box.txt b/lib/msgfmt/testdata/initialization/claude/ready/msg_dashed_box.txt new file mode 100644 index 00000000..f7ab0382 --- /dev/null +++ b/lib/msgfmt/testdata/initialization/claude/ready/msg_dashed_box.txt @@ -0,0 +1,8 @@ + 1 function greet() { + 2 - console.log("Hello, World!"); + 2 + console.log("Hello, Claude!"); + 3 } + ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ + > Try "what does this code do?" + ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ + Syntax theme: Monokai Extended (ctrl+t to disable) diff --git a/lib/msgfmt/testdata/initialization/claude/ready/msg_slim_dashed_box.txt b/lib/msgfmt/testdata/initialization/claude/ready/msg_slim_dashed_box.txt new file mode 100644 index 00000000..87e285f5 --- /dev/null +++ b/lib/msgfmt/testdata/initialization/claude/ready/msg_slim_dashed_box.txt @@ -0,0 +1,8 @@ +╭────────────────────────────────────────────╮ +│ ✻ Welcome to Claude Code! │ +│ │ +│ /help for help │ +╰────────────────────────────────────────────╯ + ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ + │ Type your message... + ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go index cce8c677..452c6ae9 100644 --- a/lib/screentracker/pty_conversation.go +++ b/lib/screentracker/pty_conversation.go @@ -3,6 +3,7 @@ package screentracker import ( "context" "encoding/json" + "errors" "fmt" "log/slog" "os" @@ -16,6 +17,26 @@ import ( "golang.org/x/xerrors" ) +const ( + // writeStabilizeEchoTimeout is the timeout for the echo + // detection WaitFor loop in writeStabilize Phase 1. The + // effective ceiling may be slightly longer because the + // stability check inside the condition runs outside + // WaitFor's timeout select. Non-echoing agents (e.g. TUI + // agents using bracketed paste) will hit this timeout, + // which is non-fatal. + // + // TODO: move to PTYConversationConfig if agents need + // different echo detection windows. + writeStabilizeEchoTimeout = 2 * time.Second + + // writeStabilizeProcessTimeout is the maximum time to wait + // for the screen to change after sending a carriage return. + // This detects whether the agent is actually processing the + // input. + writeStabilizeProcessTimeout = 15 * time.Second +) + // A screenSnapshot represents a snapshot of the PTY at a specific time. type screenSnapshot struct { timestamp time.Time @@ -411,7 +432,19 @@ func (c *PTYConversation) sendMessage(ctx context.Context, messageParts ...Messa return nil } -// writeStabilize writes messageParts to the screen and waits for the screen to stabilize after the message is written. +// writeStabilize writes messageParts to the PTY and waits for +// the agent to process them. It operates in two phases: +// +// Phase 1 (echo detection): writes the message text and waits +// for the screen to change and stabilize. This detects agents +// that echo typed input. If the screen doesn't change within +// writeStabilizeEchoTimeout, this is non-fatal. Many TUI +// agents buffer bracketed-paste input without rendering it. +// +// Phase 2 (processing detection): writes a carriage return +// and waits for the screen to change, indicating the agent +// started processing. This phase is fatal on timeout: if the +// agent doesn't react to Enter, it's unresponsive. func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...MessagePart) error { screenBeforeMessage := c.cfg.AgentIO.ReadScreen() for _, part := range messageParts { @@ -419,9 +452,10 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me return xerrors.Errorf("failed to write message part: %w", err) } } - // wait for the screen to stabilize after the message is written + // Phase 1: wait for the screen to stabilize after the + // message is written (echo detection). if err := util.WaitFor(ctx, util.WaitTimeout{ - Timeout: 15 * time.Second, + Timeout: writeStabilizeEchoTimeout, MinInterval: 50 * time.Millisecond, InitialWait: true, Clock: c.cfg.Clock, @@ -441,14 +475,26 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me } return false, nil }); err != nil { - return xerrors.Errorf("failed to wait for screen to stabilize: %w", err) - } - - // wait for the screen to change after the carriage return is written + if !errors.Is(err, util.WaitTimedOut) { + // Context cancellation or condition errors are fatal. + return xerrors.Errorf("failed to wait for screen to stabilize: %w", err) + } + // Phase 1 timeout is non-fatal: the agent may not echo + // input (e.g. TUI agents buffer bracketed-paste content + // internally). Proceed to Phase 2 to send the carriage + // return. + c.cfg.Logger.Info( + "echo detection timed out, sending carriage return", + "timeout", writeStabilizeEchoTimeout, + ) + } + + // Phase 2: wait for the screen to change after the + // carriage return is written (processing detection). screenBeforeCarriageReturn := c.cfg.AgentIO.ReadScreen() lastCarriageReturnTime := time.Time{} if err := util.WaitFor(ctx, util.WaitTimeout{ - Timeout: 15 * time.Second, + Timeout: writeStabilizeProcessTimeout, MinInterval: 25 * time.Millisecond, Clock: c.cfg.Clock, }, func() (bool, error) { @@ -527,6 +573,14 @@ func (c *PTYConversation) statusLocked() ConversationStatus { return ConversationStatusChanging } + // The send loop gates stableSignal on initialPromptReady. + // Report "changing" until readiness is detected so that Send() + // rejects with ErrMessageValidationChanging instead of blocking + // indefinitely on a stableSignal that will never fire. + if !c.initialPromptReady { + return ConversationStatusChanging + } + // Handle initial prompt readiness: report "changing" until the queue is drained // to avoid the status flipping "changing" -> "stable" -> "changing" if len(c.outboundQueue) > 0 || c.sendingMessage { diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go index 0e7d9635..cad58e83 100644 --- a/lib/screentracker/pty_conversation_test.go +++ b/lib/screentracker/pty_conversation_test.go @@ -7,7 +7,9 @@ import ( "io" "log/slog" "os" + "slices" "sync" + "sync/atomic" "testing" "time" @@ -447,6 +449,125 @@ func TestMessages(t *testing.T) { c, _, _ := newConversation(context.Background(), t) assert.ErrorIs(t, c.Send(st.MessagePartText{Content: ""}), st.ErrMessageValidationEmpty) }) + + t.Run("send-message-no-echo-agent-reacts", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + t.Cleanup(cancel) + + // Given: an agent that doesn't echo typed input but + // reacts to carriage return by updating the screen. + c, _, mClock := newConversation(ctx, t, func(cfg *st.PTYConversationConfig) { + a := &testAgent{screen: "prompt"} + a.onWrite = func(data []byte) { + if string(data) == "\r" { + a.screen = "processing..." + } + } + cfg.AgentIO = a + }) + advanceFor(ctx, t, mClock, interval*threshold) + + // When: a message is sent. Phase 1 times out (no echo), + // Phase 2 writes \r and the agent reacts. + sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "hello"}) + + // Then: Send succeeds and the user message is recorded. + msgs := c.Messages() + require.True(t, len(msgs) >= 2) + assert.True(t, slices.ContainsFunc(msgs, func(m st.ConversationMessage) bool { + return m.Role == st.ConversationRoleUser && m.Message == "hello" + }), "expected user message 'hello' in conversation") + }) + + t.Run("send-message-no-echo-no-react", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + t.Cleanup(cancel) + + // Given: an agent that is completely unresponsive. It + // neither echoes input nor reacts to carriage return. + c, _, mClock := newConversation(ctx, t, func(cfg *st.PTYConversationConfig) { + a := &testAgent{screen: "prompt"} + a.onWrite = func(data []byte) {} + cfg.AgentIO = a + }) + advanceFor(ctx, t, mClock, interval*threshold) + + // When: a message is sent. Both Phase 1 (echo) and + // Phase 2 (processing) time out. + // Note: can't use sendAndAdvance here because it calls + // require.NoError internally. + var sendErr error + var sendDone atomic.Bool + go func() { + sendErr = c.Send(st.MessagePartText{Content: "hello"}) + sendDone.Store(true) + }() + advanceUntil(ctx, t, mClock, func() bool { return sendDone.Load() }) + + // Then: Send fails with a Phase 2 error (not Phase 1). + require.Error(t, sendErr) + assert.Contains(t, sendErr.Error(), "failed to wait for processing to start") + }) + + t.Run("send-message-no-echo-context-cancelled", func(t *testing.T) { + // Given: a non-echoing agent and a cancellable context. + // The onWrite signals when writeStabilize starts writing + // message parts. This is used to synchronize the cancel. + sendCtx, sendCancel := context.WithCancel(context.Background()) + t.Cleanup(sendCancel) + + writeStarted := make(chan struct{}, 1) + c, _, mClock := newConversation(sendCtx, t, func(cfg *st.PTYConversationConfig) { + a := &testAgent{screen: "prompt"} + a.onWrite = func(data []byte) { + select { + case writeStarted <- struct{}{}: + default: + } + } + cfg.AgentIO = a + }) + advanceFor(sendCtx, t, mClock, interval*threshold) + + // When: a message is sent and the context is cancelled + // during Phase 1 (after the message is written to the + // PTY, before echo detection completes). + var sendErr error + var sendDone atomic.Bool + go func() { + sendErr = c.Send(st.MessagePartText{Content: "hello"}) + sendDone.Store(true) + }() + + // Advance tick-by-tick until writeStabilize starts + // (onWrite fires). This gives the send loop goroutine + // scheduling time between advances. + advanceUntil(sendCtx, t, mClock, func() bool { + select { + case <-writeStarted: + return true + default: + return false + } + }) + + // writeStabilize Phase 1 is now running. Its WaitFor is + // blocked on a mock timer sleep select. Cancel: the + // select sees ctx.Done() immediately. + sendCancel() + + // WaitFor returns ctx.Err(). The errors.Is guard in + // Phase 1 propagates it as fatal. Use Eventually since + // the goroutine needs scheduling time. + require.Eventually(t, sendDone.Load, 5*time.Second, 10*time.Millisecond) + + // Then: the error wraps context.Canceled, not a Phase 2 + // timeout. This validates the errors.Is(WaitTimedOut) + // guard. + require.Error(t, sendErr) + assert.ErrorIs(t, sendErr, context.Canceled) + assert.NotContains(t, sendErr.Error(), "failed to wait for processing to start") + }) } func TestStatePersistence(t *testing.T) { @@ -1075,7 +1196,7 @@ func TestStatePersistence(t *testing.T) { func TestInitialPromptReadiness(t *testing.T) { discardLogger := slog.New(slog.NewTextHandler(io.Discard, nil)) - t.Run("agent not ready - status is stable until agent becomes ready", func(t *testing.T) { + t.Run("agent not ready - status is changing until agent becomes ready", func(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) t.Cleanup(cancel) mClock := quartz.NewMock(t) @@ -1098,9 +1219,9 @@ func TestInitialPromptReadiness(t *testing.T) { // Take a snapshot with "loading...". Threshold is 1 (stability 0 / interval 1s = 0 + 1 = 1). advanceFor(ctx, t, mClock, 1*time.Second) - // Screen is stable and agent is not ready, so initial prompt hasn't been enqueued yet. - // Status should be stable. - assert.Equal(t, st.ConversationStatusStable, c.Status()) + // Screen is stable but agent is not ready. Status must be + // "changing" so that Send() rejects instead of blocking. + assert.Equal(t, st.ConversationStatusChanging, c.Status()) }) t.Run("agent becomes ready - prompt enqueued and status changes to changing", func(t *testing.T) { @@ -1123,10 +1244,9 @@ func TestInitialPromptReadiness(t *testing.T) { c := st.NewPTY(ctx, cfg, &testEmitter{}) c.Start(ctx) - // Agent not ready initially, status should be stable + // Agent not ready initially, status should be changing. advanceFor(ctx, t, mClock, 1*time.Second) - assert.Equal(t, st.ConversationStatusStable, c.Status()) - + assert.Equal(t, st.ConversationStatusChanging, c.Status()) // Agent becomes ready, prompt gets enqueued, status becomes "changing" agent.setScreen("ready") advanceFor(ctx, t, mClock, 1*time.Second) @@ -1158,10 +1278,9 @@ func TestInitialPromptReadiness(t *testing.T) { c := st.NewPTY(ctx, cfg, &testEmitter{}) c.Start(ctx) - // Status is "stable" while waiting for readiness (prompt not yet enqueued). + // Status is "changing" while waiting for readiness (prompt not yet enqueued). advanceFor(ctx, t, mClock, 1*time.Second) - assert.Equal(t, st.ConversationStatusStable, c.Status()) - + assert.Equal(t, st.ConversationStatusChanging, c.Status()) // Agent becomes ready. The snapshot loop detects this, enqueues the prompt, // then sees queue + stable + ready and signals the send loop. // writeStabilize runs with onWrite changing the screen, so it completes. @@ -1179,7 +1298,7 @@ func TestInitialPromptReadiness(t *testing.T) { assert.Equal(t, st.ConversationStatusStable, c.Status()) }) - t.Run("no initial prompt - normal status logic applies", func(t *testing.T) { + t.Run("ReadyForInitialPrompt always false - status is changing", func(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) t.Cleanup(cancel) mClock := quartz.NewMock(t) @@ -1200,8 +1319,10 @@ func TestInitialPromptReadiness(t *testing.T) { advanceFor(ctx, t, mClock, 1*time.Second) - // Status should be stable because no initial prompt to wait for. - assert.Equal(t, st.ConversationStatusStable, c.Status()) + // Even without an initial prompt, stableSignal gates on + // initialPromptReady. Status must reflect that Send() + // would block. + assert.Equal(t, st.ConversationStatusChanging, c.Status()) }) t.Run("no initial prompt configured - normal status logic applies", func(t *testing.T) { @@ -1618,3 +1739,38 @@ func TestInitialPromptSent(t *testing.T) { } }) } + +func TestSendRejectsWhenInitialPromptNotReady(t *testing.T) { + // Regression test for https://github.com/coder/agentapi/issues/209. + // Send() used to block forever when ReadyForInitialPrompt never + // returned true, because statusLocked() reported "stable" while + // stableSignal required initialPromptReady. Now statusLocked() + // returns "changing" and Send() rejects immediately. + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + t.Cleanup(cancel) + + mClock := quartz.NewMock(t) + agent := &testAgent{screen: "onboarding screen without message box"} + cfg := st.PTYConversationConfig{ + Clock: mClock, + SnapshotInterval: 100 * time.Millisecond, + ScreenStabilityLength: 200 * time.Millisecond, + AgentIO: agent, + ReadyForInitialPrompt: func(message string) bool { + return false // Simulates failed message box detection. + }, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + } + c := st.NewPTY(ctx, cfg, &testEmitter{}) + c.Start(ctx) + + // Fill snapshot buffer to reach stability. + advanceFor(ctx, t, mClock, 300*time.Millisecond) + + // Status reports "changing" because initialPromptReady is false. + assert.Equal(t, st.ConversationStatusChanging, c.Status()) + + // Send() rejects immediately instead of blocking forever. + err := c.Send(st.MessagePartText{Content: "hello"}) + assert.ErrorIs(t, err, st.ErrMessageValidationChanging) +}