agentapi/lib/screentracker/pty_conversation.go at a5bbf3fa38d3eac56bf8349c420adb3b5d2bb61c · coder/agentapi · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
package screentracker

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"log/slog"
	"os"
	"path/filepath"
	"sync"
	"time"

	"github.com/coder/agentapi/lib/msgfmt"
	"github.com/coder/agentapi/lib/util"
	"github.com/coder/quartz"
	"golang.org/x/xerrors"
)

const (
	// writeStabilizeEchoTimeout is the timeout for the echo
	// detection WaitFor loop in writeStabilize Phase 1. The
	// effective ceiling may be slightly longer because the 1s
	// stability check inside the condition runs outside
	// WaitFor's timeout select. Non-echoing agents (e.g. TUI
	// agents using bracketed paste) will hit this timeout,
	// which is non-fatal.
	//
	// TODO: move to PTYConversationConfig if agents need
	// different echo detection windows.
	writeStabilizeEchoTimeout = 2 * time.Second

	// writeStabilizeProcessTimeout is the maximum time to wait
	// for the screen to change after sending a carriage return.
	// This detects whether the agent is actually processing the
	// input.
	writeStabilizeProcessTimeout = 15 * time.Second
)

// A screenSnapshot represents a snapshot of the PTY at a specific time.
type screenSnapshot struct {
	timestamp time.Time
	screen    string
}

type MessagePartText struct {
	Content string
	Alias   string
	Hidden  bool
}

type AgentState struct {
	Version           int                   `json:"version"`
	Messages          []ConversationMessage `json:"messages"`
	InitialPrompt     string                `json:"initial_prompt"`
	InitialPromptSent bool                  `json:"initial_prompt_sent"`
}

// LoadStateStatus represents the state of loading persisted conversation state.
type LoadStateStatus int

const (
	// LoadStatePending indicates state loading has not been attempted yet.
	LoadStatePending LoadStateStatus = iota
	// LoadStateSucceeded indicates state was successfully loaded.
	LoadStateSucceeded
	// LoadStateFailed indicates state loading was attempted but failed.
	LoadStateFailed
)

var _ MessagePart = &MessagePartText{}

func (p MessagePartText) Do(writer AgentIO) error {
	_, err := writer.Write([]byte(p.Content))
	return err
}

func (p MessagePartText) String() string {
	if p.Hidden {
		return ""
	}
	if p.Alias != "" {
		return p.Alias
	}
	return p.Content
}

// outboundMessage wraps a message to be sent with its error channel
type outboundMessage struct {
	parts []MessagePart
	errCh chan error
}

// PTYConversationConfig is the configuration for a PTYConversation.
type PTYConversationConfig struct {
	AgentType msgfmt.AgentType
	AgentIO   AgentIO
	// Clock provides time operations for the conversation
	Clock quartz.Clock
	// How often to take a snapshot for the stability check
	SnapshotInterval time.Duration
	// How long the screen should not change to be considered stable
	ScreenStabilityLength time.Duration
	// Function to format the messages received from the agent
	// userInput is the last user message
	FormatMessage func(message string, userInput string) string
	// ReadyForInitialPrompt detects whether the agent has initialized and is ready to accept the initial prompt
	ReadyForInitialPrompt func(message string) bool
	// FormatToolCall removes the coder report_task tool call from the agent message and also returns the array of removed tool calls
	FormatToolCall func(message string) (string, []string)
	// InitialPrompt is the initial prompt to send to the agent once ready
	InitialPrompt          []MessagePart
	Logger                 *slog.Logger
	StatePersistenceConfig StatePersistenceConfig
}

func (cfg PTYConversationConfig) getStableSnapshotsThreshold() int {
	length := cfg.ScreenStabilityLength.Milliseconds()
	interval := cfg.SnapshotInterval.Milliseconds()
	threshold := int(length / interval)
	if length%interval != 0 {
		threshold++
	}
	return threshold + 1
}

// PTYConversation is a conversation that uses a pseudo-terminal (PTY) for communication.
// It uses a combination of polling and diffs to detect changes in the screen.
type PTYConversation struct {
	cfg     PTYConversationConfig
	emitter Emitter
	// How many stable snapshots are required to consider the screen stable
	stableSnapshotsThreshold    int
	snapshotBuffer              *RingBuffer[screenSnapshot]
	messages                    []ConversationMessage
	screenBeforeLastUserMessage string
	lock                        sync.Mutex

	// outboundQueue holds messages waiting to be sent to the agent.
	// Buffer size is 1. Callers are expected to be serialized (the HTTP
	// layer holds s.mu, and Send blocks until the message is processed),
	// so ordering is preserved.
	outboundQueue chan outboundMessage
	// sendingMessage is true while the send loop is processing a message.
	// Set under lock in the snapshot loop when signaling, cleared under
	// lock in the send loop after sendMessage returns.
	sendingMessage bool
	// writingMessage is true while writeStabilize is executing.
	// When true, updateLastAgentMessageLocked skips updates to avoid capturing terminal echo.
	writingMessage bool
	// stableSignal is used by the snapshot loop to signal the send loop
	// when the agent is stable and there are items in the outbound queue.
	stableSignal chan struct{}
	// toolCallMessageSet keeps track of the tool calls that have been detected & logged in the current agent message
	toolCallMessageSet map[string]bool
	// dirty tracks whether the conversation state has changed since the last save
	dirty bool
	// userSentMessageAfterLoadState tracks if the user has sent their first message after we load the state
	userSentMessageAfterLoadState bool
	// loadStateStatus tracks the status of loading conversation state from file.
	loadStateStatus LoadStateStatus
	// initialPromptReady is set to true when ReadyForInitialPrompt returns true.
	// Checked inline in the snapshot loop on each tick.
	initialPromptReady bool
	// initialPromptSent is set to true when the initial prompt has been enqueued to the outbound queue.
	initialPromptSent bool
}

var _ Conversation = &PTYConversation{}

type noopEmitter struct{}

func (noopEmitter) EmitMessages([]ConversationMessage) {}
func (noopEmitter) EmitStatus(ConversationStatus)      {}
func (noopEmitter) EmitScreen(string)                  {}
func (noopEmitter) EmitError(_ string, _ ErrorLevel)   {}

func NewPTY(ctx context.Context, cfg PTYConversationConfig, emitter Emitter) *PTYConversation {
	if cfg.Clock == nil {
		cfg.Clock = quartz.NewReal()
	}
	if emitter == nil {
		emitter = noopEmitter{}
	}
	threshold := cfg.getStableSnapshotsThreshold()
	c := &PTYConversation{
		cfg:                      cfg,
		emitter:                  emitter,
		stableSnapshotsThreshold: threshold,
		snapshotBuffer:           NewRingBuffer[screenSnapshot](threshold),
		messages: []ConversationMessage{
			{
				Message: "",
				Role:    ConversationRoleAgent,
				Time:    cfg.Clock.Now(),
			},
		},
		outboundQueue:                 make(chan outboundMessage, 1),
		stableSignal:                  make(chan struct{}, 1),
		toolCallMessageSet:            make(map[string]bool),
		dirty:                         false,
		userSentMessageAfterLoadState: false,
		loadStateStatus:               LoadStatePending,
		writingMessage:                false,
	}
	if c.cfg.ReadyForInitialPrompt == nil {
		c.cfg.ReadyForInitialPrompt = func(string) bool { return true }
	}
	return c
}

func (c *PTYConversation) Start(ctx context.Context) {
	// Snapshot loop
	c.cfg.Clock.TickerFunc(ctx, c.cfg.SnapshotInterval, func() error {
		c.lock.Lock()
		screen := c.cfg.AgentIO.ReadScreen()
		c.snapshotLocked(screen)
		status := c.statusLocked()
		messages := c.messagesLocked()

		// Signal send loop if agent is ready and queue has items.
		// We check readiness independently of statusLocked() because
		// statusLocked() returns "changing" when queue has items.
		if !c.initialPromptReady && c.cfg.ReadyForInitialPrompt(screen) {
			c.initialPromptReady = true
		}

		var loadErr string
		if c.initialPromptReady && c.loadStateStatus == LoadStatePending && c.cfg.StatePersistenceConfig.LoadState {
			if err, shouldEmit := c.loadStateLocked(); err != nil {
				c.loadStateStatus = LoadStateFailed
				if shouldEmit {
					c.cfg.Logger.Error("Failed to load state", "error", err)
					loadErr = fmt.Sprintf("Failed to restore previous session: %v", err)
				}
			} else {
				c.loadStateStatus = LoadStateSucceeded
			}
		}

		if c.initialPromptReady && len(c.cfg.InitialPrompt) > 0 && !c.initialPromptSent {
			// Safe to send under lock: the queue is guaranteed empty here because
			// statusLocked blocks Send until the snapshot buffer fills, which
			// cannot happen before this first enqueue completes.
			c.outboundQueue <- outboundMessage{parts: c.cfg.InitialPrompt, errCh: nil}
			c.initialPromptSent = true
			c.dirty = true
		}

		if c.initialPromptReady && len(c.outboundQueue) > 0 && c.isScreenStableLocked() {
			select {
			case c.stableSignal <- struct{}{}:
				c.sendingMessage = true
			default:
				// Signal already pending
			}
		}
		c.lock.Unlock()

		if loadErr != "" {
			c.emitter.EmitError(loadErr, ErrorLevelWarning)
		}
		c.emitter.EmitStatus(status)
		c.emitter.EmitMessages(messages)
		c.emitter.EmitScreen(screen)
		return nil
	}, "snapshot")

	// Send loop - primary call site for sendLocked() in production
	go func() {
		defer func() {
			// Drain outbound queue so Send() callers don't block forever.
			for {
				select {
				case msg := <-c.outboundQueue:
					if msg.errCh != nil {
						msg.errCh <- ctx.Err()
						close(msg.errCh)
					}
				default:
					return
				}
			}
		}()
		for {
			select {
			case <-ctx.Done():
				return
			case <-c.stableSignal:
				select {
				case <-ctx.Done():
					return
				case msg := <-c.outboundQueue:
					err := c.sendMessage(ctx, msg.parts...)
					c.lock.Lock()
					c.sendingMessage = false
					c.lock.Unlock()
					if msg.errCh != nil {
						msg.errCh <- err
						// Close so the Send() caller's <-errCh never blocks
						// if it has already consumed the error value.
						close(msg.errCh)
					}
				default:
					c.cfg.Logger.Error("received stable signal but outbound queue is empty")
				}
			}
		}
	}()
}

func (c *PTYConversation) lastMessage(role ConversationRole) ConversationMessage {
	for i := len(c.messages) - 1; i >= 0; i-- {
		if c.messages[i].Role == role {
			return c.messages[i]
		}
	}
	return ConversationMessage{}
}

// caller MUST hold c.lock
func (c *PTYConversation) updateLastAgentMessageLocked(screen string, timestamp time.Time) {
	if c.writingMessage {
		return
	}
	agentMessage := screenDiff(c.screenBeforeLastUserMessage, screen, c.cfg.AgentType)
	lastUserMessage := c.lastMessage(ConversationRoleUser)
	var toolCalls []string
	if c.cfg.FormatMessage != nil {
		agentMessage = c.cfg.FormatMessage(agentMessage, lastUserMessage.Message)
	}
	if c.loadStateStatus == LoadStateSucceeded && !c.userSentMessageAfterLoadState && len(c.messages) > 0 &&
		c.messages[len(c.messages)-1].Role == ConversationRoleAgent {
		agentMessage = c.messages[len(c.messages)-1].Message
	}
	if c.cfg.FormatToolCall != nil {
		agentMessage, toolCalls = c.cfg.FormatToolCall(agentMessage)
	}
	for _, toolCall := range toolCalls {
		if c.toolCallMessageSet[toolCall] == false {
			c.toolCallMessageSet[toolCall] = true
			c.cfg.Logger.Info("Tool call detected", "toolCall", toolCall)
		}
	}
	shouldCreateNewMessage := len(c.messages) == 0 || c.messages[len(c.messages)-1].Role == ConversationRoleUser
	lastAgentMessage := c.lastMessage(ConversationRoleAgent)
	if lastAgentMessage.Message == agentMessage {
		return
	}
	conversationMessage := ConversationMessage{
		Message: agentMessage,
		Role:    ConversationRoleAgent,
		Time:    timestamp,
	}
	if shouldCreateNewMessage {
		c.messages = append(c.messages, conversationMessage)

		// Cleanup
		c.toolCallMessageSet = make(map[string]bool)

	} else {
		c.messages[len(c.messages)-1] = conversationMessage
	}
	c.messages[len(c.messages)-1].Id = len(c.messages) - 1

	c.dirty = true
}

// caller MUST hold c.lock
func (c *PTYConversation) snapshotLocked(screen string) {
	snapshot := screenSnapshot{
		timestamp: c.cfg.Clock.Now(),
		screen:    screen,
	}
	c.snapshotBuffer.Add(snapshot)
	c.updateLastAgentMessageLocked(screen, snapshot.timestamp)
}

func (c *PTYConversation) Send(messageParts ...MessagePart) error {
	// Validate message content before enqueueing
	message := buildStringFromMessageParts(messageParts)
	if message != msgfmt.TrimWhitespace(message) {
		return ErrMessageValidationWhitespace
	}
	if message == "" {
		return ErrMessageValidationEmpty
	}

	c.lock.Lock()
	if c.statusLocked() != ConversationStatusStable {
		c.lock.Unlock()
		return ErrMessageValidationChanging
	}
	c.lock.Unlock()

	errCh := make(chan error, 1)
	c.outboundQueue <- outboundMessage{parts: messageParts, errCh: errCh}
	return <-errCh
}

// sendMessage sends a message to the agent. It acquires and releases c.lock
// around the parts that access shared state, but releases it during
// writeStabilize to avoid blocking the snapshot loop.
func (c *PTYConversation) sendMessage(ctx context.Context, messageParts ...MessagePart) error {
	message := buildStringFromMessageParts(messageParts)

	c.lock.Lock()
	screenBeforeMessage := c.cfg.AgentIO.ReadScreen()
	now := c.cfg.Clock.Now()
	c.updateLastAgentMessageLocked(screenBeforeMessage, now)
	c.writingMessage = true
	c.lock.Unlock()

	if err := c.writeStabilize(ctx, messageParts...); err != nil {
		c.lock.Lock()
		defer c.lock.Unlock()
		c.writingMessage = false
		return xerrors.Errorf("failed to send message: %w", err)
	}

	c.lock.Lock()
	c.screenBeforeLastUserMessage = screenBeforeMessage
	c.messages = append(c.messages, ConversationMessage{
		Id:      len(c.messages),
		Message: message,
		Role:    ConversationRoleUser,
		Time:    now,
	})
	c.userSentMessageAfterLoadState = true
	c.writingMessage = false
	c.lock.Unlock()
	return nil
}

// writeStabilize writes messageParts to the PTY and waits for
// the agent to process them. It operates in two phases:
//
// Phase 1 (echo detection): writes the message text and waits
// for the screen to change and stabilize. This detects agents
// that echo typed input. If the screen doesn't change within
// writeStabilizeEchoTimeout, this is non-fatal — many TUI
// agents buffer bracketed-paste input without rendering it.
//
// Phase 2 (processing detection): writes a carriage return
// and waits for the screen to change, indicating the agent
// started processing. This phase is fatal on timeout — if the
// agent doesn't react to Enter, it's unresponsive.
func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...MessagePart) error {
	screenBeforeMessage := c.cfg.AgentIO.ReadScreen()
	for _, part := range messageParts {
		if err := part.Do(c.cfg.AgentIO); err != nil {
			return xerrors.Errorf("failed to write message part: %w", err)
		}
	}
	// wait for the screen to stabilize after the message is written
	if err := util.WaitFor(ctx, util.WaitTimeout{
		Timeout:     writeStabilizeEchoTimeout,
		MinInterval: 50 * time.Millisecond,
		InitialWait: true,
		Clock:       c.cfg.Clock,
	}, func() (bool, error) {
		screen := c.cfg.AgentIO.ReadScreen()
		if screen != screenBeforeMessage {
			stabilityTimer := c.cfg.Clock.NewTimer(1 * time.Second)
			select {
			case <-ctx.Done():
				stabilityTimer.Stop()
				return false, ctx.Err()
			case <-stabilityTimer.C:
			}
			stabilityTimer.Stop()
			newScreen := c.cfg.AgentIO.ReadScreen()
			return newScreen == screen, nil
		}
		return false, nil
	}); err != nil {
		if !errors.Is(err, util.WaitTimedOut) {
			// Context cancellation or condition errors are fatal.
			return xerrors.Errorf("failed to wait for screen to stabilize: %w", err)
		}
		// Phase 1 timeout is non-fatal: the agent may not echo
		// input (e.g. TUI agents buffer bracketed-paste content
		// internally). Proceed to Phase 2 to send the carriage
		// return.
		c.cfg.Logger.Info(
			"echo detection timed out, sending carriage return",
			"timeout", writeStabilizeEchoTimeout,
		)
	}

	// wait for the screen to change after the carriage return is written
	screenBeforeCarriageReturn := c.cfg.AgentIO.ReadScreen()
	lastCarriageReturnTime := time.Time{}
	if err := util.WaitFor(ctx, util.WaitTimeout{
		Timeout:     writeStabilizeProcessTimeout,
		MinInterval: 25 * time.Millisecond,
		Clock:       c.cfg.Clock,
	}, func() (bool, error) {
		// we don't want to spam additional carriage returns because the agent may process them
		// (aider does this), but we do want to retry sending one if nothing's
		// happening for a while
		if c.cfg.Clock.Since(lastCarriageReturnTime) >= 3*time.Second {
			lastCarriageReturnTime = c.cfg.Clock.Now()
			if _, err := c.cfg.AgentIO.Write([]byte("\r")); err != nil {
				return false, xerrors.Errorf("failed to write carriage return: %w", err)
			}
		}
		crTimer := c.cfg.Clock.NewTimer(25 * time.Millisecond)
		select {
		case <-ctx.Done():
			crTimer.Stop()
			return false, ctx.Err()
		case <-crTimer.C:
		}
		crTimer.Stop()
		screen := c.cfg.AgentIO.ReadScreen()

		return screen != screenBeforeCarriageReturn, nil
	}); err != nil {
		return xerrors.Errorf("failed to wait for processing to start: %w", err)
	}

	return nil
}

func (c *PTYConversation) Status() ConversationStatus {
	c.lock.Lock()
	defer c.lock.Unlock()

	return c.statusLocked()
}

// isScreenStableLocked returns true if the screen content has been stable
// for the required number of snapshots. Caller MUST hold c.lock.
func (c *PTYConversation) isScreenStableLocked() bool {
	snapshots := c.snapshotBuffer.GetAll()
	if len(snapshots) < c.stableSnapshotsThreshold {
		return false
	}
	for i := 1; i < len(snapshots); i++ {
		if snapshots[0].screen != snapshots[i].screen {
			return false
		}
	}
	return true
}

// caller MUST hold c.lock
func (c *PTYConversation) statusLocked() ConversationStatus {
	// sanity checks
	if c.snapshotBuffer.Capacity() != c.stableSnapshotsThreshold {
		panic(fmt.Sprintf("snapshot buffer capacity %d is not equal to snapshot threshold %d. can't check stability", c.snapshotBuffer.Capacity(), c.stableSnapshotsThreshold))
	}
	if c.stableSnapshotsThreshold == 0 {
		panic("stable snapshots threshold is 0. can't check stability")
	}

	snapshots := c.snapshotBuffer.GetAll()
	if len(c.messages) > 0 && c.messages[len(c.messages)-1].Role == ConversationRoleUser {
		// if the last message is a user message then the snapshot loop hasn't
		// been triggered since the last user message, and we should assume
		// the screen is changing
		return ConversationStatusChanging
	}

	if len(snapshots) != c.stableSnapshotsThreshold {
		return ConversationStatusInitializing
	}

	if !c.isScreenStableLocked() {
		return ConversationStatusChanging
	}

	// The send loop gates stableSignal on initialPromptReady.
	// Report "changing" until readiness is detected so that Send()
	// rejects with ErrMessageValidationChanging instead of blocking
	// indefinitely on a stableSignal that will never fire.
	if !c.initialPromptReady {
		return ConversationStatusChanging
	}

	// Handle initial prompt readiness: report "changing" until the queue is drained
	// to avoid the status flipping "changing" -> "stable" -> "changing"
	if len(c.outboundQueue) > 0 || c.sendingMessage {
		return ConversationStatusChanging
	}

	return ConversationStatusStable
}

func (c *PTYConversation) Messages() []ConversationMessage {
	c.lock.Lock()
	defer c.lock.Unlock()

	return c.messagesLocked()
}

// messagesLocked returns a copy of messages. Caller MUST hold c.lock.
func (c *PTYConversation) messagesLocked() []ConversationMessage {
	result := make([]ConversationMessage, len(c.messages))
	copy(result, c.messages)
	return result
}

func (c *PTYConversation) Text() string {
	c.lock.Lock()
	defer c.lock.Unlock()

	snapshots := c.snapshotBuffer.GetAll()
	if len(snapshots) == 0 {
		return ""
	}
	return snapshots[len(snapshots)-1].screen
}

func (c *PTYConversation) SaveState() error {
	c.lock.Lock()
	defer c.lock.Unlock()

	stateFile := c.cfg.StatePersistenceConfig.StateFile
	saveState := c.cfg.StatePersistenceConfig.SaveState

	if !saveState {
		c.cfg.Logger.Info("State persistence is disabled")
		return nil
	}

	// Skip if not dirty
	if !c.dirty {
		c.cfg.Logger.Info("Skipping state save: no changes since last save")
		return nil
	}

	conversation := c.messagesLocked()

	// Serialize initial prompt from message parts
	var initialPromptStr string
	if len(c.cfg.InitialPrompt) > 0 {
		initialPromptStr = buildStringFromMessageParts(c.cfg.InitialPrompt)
	}

	// Create directory if it doesn't exist
	dir := filepath.Dir(stateFile)
	if err := os.MkdirAll(dir, 0o700); err != nil {
		return xerrors.Errorf("failed to create state directory: %w", err)
	}

	// Use atomic write: write to temp file, then rename to target path
	tempFile := stateFile + ".tmp"
	f, err := os.OpenFile(tempFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
	if err != nil {
		return xerrors.Errorf("failed to create temp state file: %w", err)
	}

	// Clean up temp file on error (before successful rename)
	var renamed bool
	defer func() {
		if !renamed {
			if removeErr := os.Remove(tempFile); removeErr != nil && !os.IsNotExist(removeErr) {
				c.cfg.Logger.Warn("Failed to clean up temp state file", "path", tempFile, "err", removeErr)
			}
		}
	}()

	// Encode directly to file to avoid loading entire JSON into memory
	encoder := json.NewEncoder(f)
	if err := encoder.Encode(AgentState{
		Version:           1,
		Messages:          conversation,
		InitialPrompt:     initialPromptStr,
		InitialPromptSent: c.initialPromptSent,
	}); err != nil {
		_ = f.Close()
		return xerrors.Errorf("failed to encode state: %w", err)
	}

	// Flush to disk before rename for crash safety
	if err := f.Sync(); err != nil {
		_ = f.Close()
		return xerrors.Errorf("failed to sync state file: %w", err)
	}

	// Close file before rename
	if err := f.Close(); err != nil {
		return xerrors.Errorf("failed to close temp state file: %w", err)
	}

	// Atomic rename
	if err := os.Rename(tempFile, stateFile); err != nil {
		return xerrors.Errorf("failed to rename state file: %w", err)
	}
	renamed = true

	// Clear dirty flag after successful save
	c.dirty = false

	c.cfg.Logger.Info("State saved successfully", "path", stateFile)

	return nil
}

// loadStateLocked loads the state, this method assumes that caller holds the Lock.
// Returns (error, shouldEmit) where shouldEmit indicates if the error should be emitted to the user.
func (c *PTYConversation) loadStateLocked() (error, bool) {
	stateFile := c.cfg.StatePersistenceConfig.StateFile
	loadState := c.cfg.StatePersistenceConfig.LoadState

	if !loadState || c.loadStateStatus != LoadStatePending {
		return nil, false
	}

	// Check if file exists
	if _, err := os.Stat(stateFile); os.IsNotExist(err) {
		c.cfg.Logger.Info("No previous state to load (file does not exist)", "path", stateFile)
		return xerrors.Errorf("No previous state to load (file does not exist)"), false
	}

	// Open state file
	f, err := os.Open(stateFile)
	if err != nil {
		return xerrors.Errorf("failed to open state file: %w", err), true
	}
	defer func() {
		if closeErr := f.Close(); closeErr != nil {
			c.cfg.Logger.Warn("Failed to close state file", "path", stateFile, "err", closeErr)
		}
	}()

	var agentState AgentState
	decoder := json.NewDecoder(f)
	if err := decoder.Decode(&agentState); err != nil {
		return xerrors.Errorf("failed to unmarshal state (corrupted or invalid JSON): %w", err), true
	}

	// Validate version
	if agentState.Version != 1 {
		return xerrors.Errorf("unsupported state file version %d (expected 1)", agentState.Version), true
	}

	// Handle initial prompt restoration:
	// - If a new initial prompt was provided via flags, check if it differs from the saved one.
	//   If different, mark as not sent (will be sent). If same, preserve sent status.
	// - If no new prompt provided, restore the saved prompt and its sent status.
	c.initialPromptSent = agentState.InitialPromptSent
	if len(c.cfg.InitialPrompt) > 0 {
		isDifferent := buildStringFromMessageParts(c.cfg.InitialPrompt) != agentState.InitialPrompt
		if isDifferent {
			c.initialPromptSent = false
		}
		// If same prompt, keep agentState.InitialPromptSent
	} else if agentState.InitialPrompt != "" {
		c.cfg.InitialPrompt = []MessagePart{MessagePartText{
			Content: agentState.InitialPrompt,
			Alias:   "",
			Hidden:  false,
		}}
	}

	c.messages = agentState.Messages

	c.dirty = false

	c.cfg.Logger.Info("Successfully loaded state", "path", stateFile, "messages", len(c.messages))
	return nil, false
}