diff --git a/CLAUDE.md b/CLAUDE.md index 5dd243af..5265bff7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -25,6 +25,7 @@ ox # Start an interactive session . ├── agent.rs # Agent turn loop, stream accumulation, tool dispatch ├── agent/ +│ ├── compact_boundary.rs # Compact boundary persistence, file-tracker reset, live transcript replacement │ ├── compaction.rs # /compact driver: stripped-transcript summarization request + summary-prefix wrapping │ └── event.rs # AgentEvent, UserAction, AgentSink trait, StdioSink ├── client.rs # Client module root diff --git a/crates/oxide-code/src/agent.rs b/crates/oxide-code/src/agent.rs index f10e45f4..eb931cca 100644 --- a/crates/oxide-code/src/agent.rs +++ b/crates/oxide-code/src/agent.rs @@ -1,6 +1,7 @@ //! Agent turn loop. Streams the model response, dispatches tool calls, records to the session, //! and stops on text-only response or [`MAX_TOOL_ROUNDS`]. +pub(crate) mod compact_boundary; pub(crate) mod compaction; pub(crate) mod event; @@ -13,13 +14,16 @@ use tracing::{debug, warn}; use crate::agent::event::{AgentEvent, AgentSink, UserAction}; use crate::client::anthropic::Client; -use crate::client::anthropic::wire::{ContentBlockInfo, Delta, StreamEvent}; +use crate::client::anthropic::wire::{ContentBlockInfo, Delta, StreamEvent, Usage}; +use crate::config::AutoCompactionConfig; +use crate::file_tracker::FileTracker; use crate::message::{ContentBlock, Message, Role, strip_trailing_thinking}; use crate::prompt::PromptParts; use crate::session::handle::{RecordOutcome, SessionHandle}; use crate::tool::{ToolDefinition, ToolMetadata, ToolOutput, ToolRegistry}; const MAX_TOOL_ROUNDS: usize = 25; +const MAX_AUTO_COMPACT_FAILURES: u8 = 3; // ── Turn Abort ── @@ -67,6 +71,46 @@ impl AgentClient for Client { // ── Agent Turn ── +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub(crate) struct TokenUsage { + input_tokens: u32, + output_tokens: u32, +} + +impl TokenUsage { + #[cfg(test)] + pub(crate) const fn new(input_tokens: u32, output_tokens: u32) -> Self { + Self { + input_tokens, + output_tokens, + } + } + + pub(crate) const fn total_tokens(self) -> u32 { + self.input_tokens.saturating_add(self.output_tokens) + } + + fn observe(&mut self, usage: &Usage) { + if usage.input_tokens > 0 { + self.input_tokens = usage.input_tokens; + } + if usage.output_tokens > 0 { + self.output_tokens = usage.output_tokens; + } + } +} + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub(crate) struct TurnReport { + pub(crate) usage: Option, +} + +pub(crate) struct AutoCompact<'a> { + pub(crate) config: AutoCompactionConfig, + pub(crate) failures: &'a mut u8, + pub(crate) file_tracker: &'a FileTracker, +} + /// Drives one user prompt to a final assistant text reply. /// /// Each round streams a model response, dispatches any tool calls, and appends both the @@ -88,21 +132,24 @@ pub(crate) async fn agent_turn( sink: &dyn AgentSink, session: &SessionHandle, user_rx: &mut mpsc::Receiver, -) -> AbortResult<()> { +) -> AbortResult { let tool_defs = tools.definitions(); let mut pending_prompts: Vec = Vec::new(); + let mut latest_usage = None; for _ in 0..MAX_TOOL_ROUNDS { strip_trailing_thinking(messages); let StreamOutcome { blocks, parse_errors, + usage, } = await_unless_aborted( stream_response(client, messages, &tool_defs, prompt, sink), user_rx, &mut pending_prompts, ) .await??; + latest_usage = usage.or(latest_usage); let tool_uses = collect_tool_uses(&blocks); let assistant_msg = Message { @@ -114,7 +161,9 @@ pub(crate) async fn agent_turn( // Queued prompts drain on the TUI side at idle. record_message(session, assistant_msg.clone(), sink).await; messages.push(assistant_msg); - return Ok(()); + return Ok(TurnReport { + usage: latest_usage, + }); } let (results, sidecars) = run_tool_round( @@ -143,6 +192,65 @@ pub(crate) async fn agent_turn( ))) } +#[expect( + clippy::too_many_arguments, + reason = "auto-compaction needs the same live turn state as manual compaction plus the latest usage signal" +)] +pub(crate) async fn auto_compact_if_needed( + client: &dyn AgentClient, + session: &SessionHandle, + messages: &mut Vec, + sink: &dyn AgentSink, + user_rx: &mut mpsc::Receiver, + pending: &mut Vec, + auto: Option<&mut AutoCompact<'_>>, + usage: Option, +) -> AbortResult { + let Some(auto) = auto else { + return Ok(false); + }; + let Some(usage) = usage else { + return Ok(false); + }; + if *auto.failures >= MAX_AUTO_COMPACT_FAILURES + || !auto.config.should_trigger(usage.total_tokens()) + { + return Ok(false); + } + + _ = sink.send(AgentEvent::AutoCompactionStarted); + let summary = match await_unless_aborted( + compaction::compact_session(client, messages, None), + user_rx, + pending, + ) + .await? + { + Ok(summary) => summary, + Err(e) => { + *auto.failures += 1; + warn!("auto-compaction failed: {e:#}"); + return Ok(false); + } + }; + let compacted = compact_boundary::replace_session_with_summary( + session, + auto.file_tracker, + messages, + sink, + summary, + None, + true, + ) + .await; + if compacted { + *auto.failures = 0; + } else { + *auto.failures += 1; + } + Ok(compacted) +} + fn collect_tool_uses(blocks: &[ContentBlock]) -> Vec<(String, String, serde_json::Value)> { blocks .iter() @@ -228,7 +336,7 @@ async fn dispatch_tool_call( await_unless_aborted(tools.run(name, input), user_rx, pending).await } -async fn record_drained_prompts( +pub(crate) async fn record_drained_prompts( texts: impl IntoIterator, messages: &mut Vec, session: &SessionHandle, @@ -363,6 +471,7 @@ fn parse_tool_json(json_buf: &str) -> (serde_json::Value, Option) { struct StreamOutcome { blocks: Vec, parse_errors: HashMap, + usage: Option, } async fn stream_response( @@ -381,11 +490,19 @@ async fn stream_response( )?; let mut blocks: Vec> = Vec::new(); + let mut usage = TokenUsage::default(); + let mut saw_usage = false; while let Some(event) = rx.recv().await { let event = event.context("stream error")?; match event { + StreamEvent::MessageStart { message } => { + if let Some(observed) = message.usage { + usage.observe(&observed); + saw_usage = true; + } + } StreamEvent::ContentBlockStart { index, content_block, @@ -409,11 +526,21 @@ async fn stream_response( StreamEvent::Error { error } => { bail!("API error ({}): {}", error.error_type, error.message); } + StreamEvent::MessageDelta { + usage: Some(observed), + .. + } => { + usage.observe(&observed); + saw_usage = true; + } _ => {} } } - let mut outcome = StreamOutcome::default(); + let mut outcome = StreamOutcome { + usage: saw_usage.then_some(usage), + ..StreamOutcome::default() + }; for acc in blocks.into_iter().flatten() { let (block, parse_error) = acc.into_content_block(); outcome.parse_errors.extend(parse_error); @@ -493,6 +620,7 @@ mod tests { use std::pin::Pin; use std::sync::Arc; use std::sync::Mutex as StdMutex; + use std::sync::atomic::{AtomicUsize, Ordering}; use serde_json::json; use tokio::sync::Notify; @@ -503,9 +631,10 @@ mod tests { use crate::agent::event::CapturingSink; use crate::client::anthropic::testing::test_client; use crate::client::anthropic::wire::{ - ApiError, ContentBlockInfo, MessageResponse, StreamEvent, Usage, + ApiError, ContentBlockInfo, MessageDeltaBody, MessageResponse, StreamEvent, Usage, }; - use crate::config::{Auth, Effort}; + use crate::config::{Auth, AutoCompactionConfig, Effort}; + use crate::file_tracker::FileTracker; use crate::message::Role; use crate::model::ResolvedModelId; use crate::session::handle::{self, SessionHandle}; @@ -553,6 +682,76 @@ mod tests { } } + struct FailingCompactClient; + + impl AgentClient for FailingCompactClient { + fn stream_message( + &self, + _messages: &[Message], + _system_sections: &[&str], + _user_context: Option<&str>, + _tools: &[ToolDefinition], + ) -> Result>> { + Err(anyhow!("summarizer unavailable")) + } + } + + struct CountingFailingClient { + calls: AtomicUsize, + } + + impl CountingFailingClient { + fn new() -> Self { + Self { + calls: AtomicUsize::new(0), + } + } + + fn calls(&self) -> usize { + self.calls.load(Ordering::SeqCst) + } + } + + impl AgentClient for CountingFailingClient { + fn stream_message( + &self, + _messages: &[Message], + _system_sections: &[&str], + _user_context: Option<&str>, + _tools: &[ToolDefinition], + ) -> Result>> { + self.calls.fetch_add(1, Ordering::SeqCst); + Err(anyhow!("summarizer unavailable")) + } + } + + struct DelayedSummaryClient { + started: Arc, + release: Arc, + } + + impl AgentClient for DelayedSummaryClient { + fn stream_message( + &self, + _messages: &[Message], + _system_sections: &[&str], + _user_context: Option<&str>, + _tools: &[ToolDefinition], + ) -> Result>> { + let (tx, rx) = mpsc::channel(8); + let started = self.started.clone(); + let release = self.release.clone(); + tokio::spawn(async move { + started.notify_one(); + release.notified().await; + for event in text_turn("auto summary") { + tx.send(Ok(event)).await.expect("test receiver alive"); + } + }); + Ok(rx) + } + } + impl AgentClient for FakeClient { fn stream_message( &self, @@ -618,6 +817,41 @@ mod tests { ] } + fn text_turn_with_usage(text: &str, input_tokens: u32, output_tokens: u32) -> Vec { + vec![ + StreamEvent::MessageStart { + message: MessageResponse { + id: "msg_1".into(), + model: "claude-sonnet-4-6".into(), + usage: Some(Usage { + input_tokens, + output_tokens: 0, + }), + }, + }, + StreamEvent::ContentBlockStart { + index: 0, + content_block: ContentBlockInfo::Text { + text: String::new(), + }, + }, + StreamEvent::ContentBlockDelta { + index: 0, + delta: Delta::TextDelta { text: text.into() }, + }, + StreamEvent::MessageDelta { + delta: MessageDeltaBody { + stop_reason: Some("end_turn".into()), + }, + usage: Some(Usage { + input_tokens: 0, + output_tokens, + }), + }, + StreamEvent::MessageStop, + ] + } + fn tool_use_turn(id: &str, name: &str, input_json: &str) -> Vec { vec![ StreamEvent::ContentBlockStart { @@ -638,6 +872,30 @@ mod tests { ] } + fn tool_use_turn_with_usage( + id: &str, + name: &str, + input_json: &str, + input_tokens: u32, + output_tokens: u32, + ) -> Vec { + let mut events = tool_use_turn(id, name, input_json); + events.insert( + 0, + StreamEvent::MessageStart { + message: MessageResponse { + id: "msg_1".into(), + model: "claude-sonnet-4-6".into(), + usage: Some(Usage { + input_tokens, + output_tokens, + }), + }, + }, + ); + events + } + /// Echoes its input; exercises the tool-dispatch path without subprocess machinery. struct EchoTool; @@ -727,6 +985,322 @@ mod tests { crate::session::handle::testing::dead("dead-test-session") } + // ── auto_compact_if_needed ── + + #[tokio::test] + async fn auto_compact_if_needed_skips_without_auto_state_usage_or_threshold() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let client = FakeClient::new(Vec::new()); + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; + let mut pending = Vec::new(); + let mut failures = 0; + + let absent = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + None, + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ) + .await + .unwrap(); + assert!(!absent); + + let missing_usage = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + None, + ) + .await + .unwrap(); + assert!(!missing_usage); + + let below_threshold = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(100), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ) + .await + .unwrap(); + assert!(!below_threshold); + assert_eq!(messages.len(), 4); + assert_eq!(failures, 0); + } + + #[tokio::test] + async fn auto_compact_if_needed_counts_summarizer_failure_without_replacing_messages() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; + let mut pending = Vec::new(); + let mut failures = 0; + + let compacted = auto_compact_if_needed( + &FailingCompactClient, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ) + .await + .unwrap(); + + assert!(!compacted); + assert_eq!(failures, 1); + assert!(matches!(&messages[0].content[0], ContentBlock::Text { text } if text == "hi")); + } + + #[tokio::test] + async fn auto_compact_if_needed_counts_persist_failure_without_replacing_messages() { + let session = dead_test_session(); + let client = FakeClient::new(vec![text_turn("auto summary")]); + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; + let mut pending = Vec::new(); + let mut failures = 0; + + let compacted = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ) + .await + .unwrap(); + + assert!(!compacted); + assert_eq!(failures, 1); + assert!(matches!(&messages[0].content[0], ContentBlock::Text { text } if text == "hi")); + assert!( + sink.events() + .iter() + .any(|event| matches!(event, AgentEvent::Error(message) if message.contains("Session write failed"))) + ); + } + + #[tokio::test] + async fn auto_compact_if_needed_stops_after_failure_limit() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let client = CountingFailingClient::new(); + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; + let mut pending = Vec::new(); + let mut failures = MAX_AUTO_COMPACT_FAILURES - 1; + let usage = Some(TokenUsage { + input_tokens: 50_000, + output_tokens: 1, + }); + + let first = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(50_000), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + usage, + ) + .await + .unwrap(); + let second = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(50_000), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + usage, + ) + .await + .unwrap(); + + assert!(!first); + assert!(!second); + assert_eq!(failures, MAX_AUTO_COMPACT_FAILURES); + assert_eq!(client.calls(), 1); + } + + #[tokio::test] + async fn auto_compact_if_needed_queues_submit_while_summarizing() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let started = Arc::new(Notify::new()); + let release = Arc::new(Notify::new()); + let client = DelayedSummaryClient { + started: started.clone(), + release: release.clone(), + }; + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; + let mut pending = Vec::new(); + let mut failures = 0; + let (tx, mut rx) = mpsc::channel::(1); + let mut auto = AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }, + failures: &mut failures, + file_tracker: &tracker, + }; + + let compact = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut rx, + &mut pending, + Some(&mut auto), + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ); + let queue_prompt = async { + started.notified().await; + tx.send(UserAction::SubmitPrompt("queued after summary".into())) + .await + .unwrap(); + tokio::task::yield_now().await; + release.notify_one(); + }; + let (compacted, ()) = tokio::join!(compact, queue_prompt); + let compacted = compacted.unwrap(); + + assert!(compacted); + assert_eq!(pending, vec!["queued after summary"]); + assert_eq!(*auto.failures, 0); + assert_eq!( + sink.events() + .iter() + .filter(|event| matches!(event, AgentEvent::PromptDrained(_))) + .count(), + 0 + ); + assert!(sink.events().iter().any(|event| matches!( + event, + AgentEvent::SessionCompacted { + automatic: true, + .. + } + ))); + assert_eq!(messages.len(), 1); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("auto summary")) + ); + } + #[tokio::test] async fn agent_turn_dead_session_surfaces_write_failure_on_first_call() { // Write errors must not abort the turn; one Error event surfaces and the turn returns Ok. @@ -871,6 +1445,30 @@ mod tests { assert_eq!(streamed, ["Hello immediately"]); } + #[tokio::test] + async fn agent_turn_reports_latest_stream_usage() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let client = FakeClient::new(vec![text_turn_with_usage("Hello!", 100, 7)]); + let tools = ToolRegistry::new(Vec::new()); + let sink = CapturingSink::new(); + let mut messages = vec![Message::user("hi")]; + + let report = agent_turn( + &client, + &tools, + &mut messages, + &empty_prompt(), + &sink, + &session, + &mut inert_user_rx(), + ) + .await + .unwrap(); + + assert_eq!(report.usage.map(TokenUsage::total_tokens), Some(107)); + } + #[tokio::test] async fn agent_turn_single_tool_call_dispatches_and_completes_on_follow_up() { let dir = tempfile::tempdir().unwrap(); @@ -929,6 +1527,45 @@ mod tests { ))); } + #[tokio::test] + async fn agent_turn_does_not_auto_compact_between_tool_result_and_follow_up() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let client = FakeClient::new(vec![ + tool_use_turn_with_usage("tool_1", "echo", r#"{"v":1}"#, 9, 2), + text_turn_with_usage("Done", 1, 2), + ]); + let tools = ToolRegistry::new(vec![Box::new(EchoTool)]); + let sink = CapturingSink::new(); + let mut messages = vec![ + Message::user("run echo"), + Message::assistant("earlier"), + Message::user("continue"), + ]; + + let report = agent_turn( + &client, + &tools, + &mut messages, + &empty_prompt(), + &sink, + &session, + &mut inert_user_rx(), + ) + .await + .unwrap(); + + assert_eq!(report.usage.map(TokenUsage::total_tokens), Some(3)); + assert_eq!( + sink.events() + .iter() + .filter(|event| matches!(event, AgentEvent::SessionCompacted { .. })) + .count(), + 0 + ); + assert!(matches!(&messages[5].content[0], ContentBlock::Text { text } if text == "Done")); + } + #[tokio::test] async fn agent_turn_drains_mid_round_submit_into_messages_at_round_boundary() { // Pre-loaded SubmitPrompt is consumed during the round; at the boundary the agent splices diff --git a/crates/oxide-code/src/agent/compact_boundary.rs b/crates/oxide-code/src/agent/compact_boundary.rs new file mode 100644 index 00000000..bbd6a452 --- /dev/null +++ b/crates/oxide-code/src/agent/compact_boundary.rs @@ -0,0 +1,176 @@ +//! Compact-boundary persistence and live transcript replacement. + +use crate::agent::compaction::synthesize_post_compact_message; +use crate::agent::event::{AgentEvent, AgentSink}; +use crate::file_tracker::FileTracker; +use crate::message::Message; +use crate::session::handle::SessionHandle; + +/// Persists a compact boundary and swaps the live transcript to the synthetic summary root. +pub(crate) async fn replace_session_with_summary( + session: &SessionHandle, + file_tracker: &FileTracker, + messages: &mut Vec, + sink: &dyn AgentSink, + summary: String, + instructions: Option, + automatic: bool, +) -> bool { + let synthetic = synthesize_post_compact_message(&summary); + let outcome = session + .compact(summary.clone(), instructions.clone(), synthetic.clone()) + .await; + sink.session_write_error(outcome.failure.as_deref()); + if outcome.failure.is_some() { + return false; + } + + file_tracker.clear(); + *messages = vec![synthetic]; + if let Err(e) = sink.send(AgentEvent::SessionCompacted { + summary, + pre_count: outcome.pre_count, + instructions, + automatic, + }) { + tracing::error!("session-compacted event dropped: {e}"); + } + true +} + +#[cfg(test)] +mod tests { + use std::time::SystemTime; + + use anyhow::anyhow; + + use super::*; + use crate::agent::event::{AgentSink, CapturingSink}; + use crate::file_tracker::LastView; + use crate::message::{ContentBlock, Message}; + use crate::session::handle; + use crate::session::store::test_store; + + struct FailingSink; + + impl AgentSink for FailingSink { + fn send(&self, _event: AgentEvent) -> anyhow::Result<()> { + Err(anyhow!("sink closed")) + } + } + + fn fake_transcript() -> Vec { + vec![ + Message::user("fix the bug"), + Message::assistant("looking now"), + Message::user("any progress?"), + Message::assistant("found it"), + ] + } + + #[tokio::test] + async fn replace_session_with_summary_clears_tracker_and_replaces_messages() { + let dir = tempfile::tempdir().unwrap(); + let store = test_store(dir.path()); + let session = handle::start(&store, "claude-sonnet-4-6"); + let tracker = FileTracker::default(); + tracker.record_read( + std::path::Path::new("/tmp/example.rs"), + b"old", + SystemTime::UNIX_EPOCH, + 3, + LastView::Full, + ); + let mut messages = fake_transcript(); + let sink = CapturingSink::new(); + + let compacted = replace_session_with_summary( + &session, + &tracker, + &mut messages, + &sink, + "fixed login bug".to_owned(), + None, + true, + ) + .await; + + assert!(compacted); + assert!(tracker.snapshot_all().is_empty()); + assert_eq!(messages.len(), 1); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("fixed login bug")) + ); + } + + #[tokio::test] + async fn replace_session_with_summary_preserves_state_when_persist_fails() { + let session = handle::testing::dead("dead-compact-session"); + let tracker = FileTracker::default(); + let path = std::path::PathBuf::from("/tmp/example.rs"); + tracker.record_read(&path, b"old", SystemTime::UNIX_EPOCH, 3, LastView::Full); + let original = fake_transcript(); + let mut messages = original.clone(); + let sink = CapturingSink::new(); + + let compacted = replace_session_with_summary( + &session, + &tracker, + &mut messages, + &sink, + "fixed login bug".to_owned(), + None, + true, + ) + .await; + + assert!(!compacted); + assert_eq!(messages.len(), original.len()); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text == "fix the bug") + ); + assert!( + matches!(&messages[3].content[0], ContentBlock::Text { text } if text == "found it") + ); + assert_eq!(tracker.snapshot_all().len(), 1); + assert!( + sink.events() + .iter() + .any(|event| matches!(event, AgentEvent::Error(message) if message.contains("Session write failed"))) + ); + } + + #[tokio::test] + async fn replace_session_with_summary_still_replaces_messages_when_event_send_fails() { + let dir = tempfile::tempdir().unwrap(); + let store = test_store(dir.path()); + let session = handle::start(&store, "claude-sonnet-4-6"); + let tracker = FileTracker::default(); + tracker.record_read( + std::path::Path::new("/tmp/example.rs"), + b"old", + SystemTime::UNIX_EPOCH, + 3, + LastView::Full, + ); + let mut messages = fake_transcript(); + + let compacted = replace_session_with_summary( + &session, + &tracker, + &mut messages, + &FailingSink, + "fixed login bug".to_owned(), + None, + true, + ) + .await; + + assert!(compacted); + assert!(tracker.snapshot_all().is_empty()); + assert_eq!(messages.len(), 1); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("fixed login bug")) + ); + } +} diff --git a/crates/oxide-code/src/agent/compaction.rs b/crates/oxide-code/src/agent/compaction.rs index 88495030..337e1eec 100644 --- a/crates/oxide-code/src/agent/compaction.rs +++ b/crates/oxide-code/src/agent/compaction.rs @@ -6,7 +6,7 @@ use anyhow::{Result, bail}; use indoc::{formatdoc, indoc}; -use crate::client::anthropic::Client; +use crate::agent::AgentClient; use crate::client::anthropic::wire::{ContentBlockInfo, Delta, StreamEvent}; use crate::message::{ContentBlock, Message, Role}; @@ -51,7 +51,7 @@ const SUMMARY_PREFIX: &str = indoc! {r" /// Errors when the transcript is too short to compact, when the API errors mid-stream, or when /// the model returns an empty / whitespace-only response. pub(crate) async fn compact_session( - client: &Client, + client: &dyn AgentClient, transcript: &[Message], instructions: Option<&str>, ) -> Result { diff --git a/crates/oxide-code/src/agent/event.rs b/crates/oxide-code/src/agent/event.rs index 1f0c32f4..67a90a0c 100644 --- a/crates/oxide-code/src/agent/event.rs +++ b/crates/oxide-code/src/agent/event.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use anyhow::Result; use tokio::sync::mpsc; -use crate::config::Effort; +use crate::config::{CompactionConfig, Effort}; use crate::model::ResolvedModelId; use crate::tool::ToolRegistry; @@ -47,6 +47,9 @@ pub(crate) enum AgentEvent { /// User cancelled mid-turn ([`UserAction::Cancel`]); the in-flight reply is truncated and the /// inline [`INTERRUPTED_MARKER`] is rendered. Cancelled, + /// Automatic compaction started before the submitted prompt runs. TUI switches to compacting + /// status while the summarizer request streams. + AutoCompactionStarted, /// Background title generator finished; UI updates the chrome label. SessionTitleUpdated { session_id: String, title: String }, /// `/clear` rolled the session — a new session UUID is now active. @@ -63,16 +66,20 @@ pub(crate) enum AgentEvent { /// `/compact` finished — summary captures the prior transcript, `pre_count` is for the /// post-compact UI line. The agent loop has already swapped the in-memory transcript to /// the synthetic continuation; the UI clears its chat and replays a single boundary block. + /// Automatic compaction can happen before a submitted prompt starts, so the TUI keeps the + /// busy state until the following turn completes. SessionCompacted { summary: String, pre_count: u32, instructions: Option, + automatic: bool, }, /// Live config after a [`UserAction::SwapConfig`]. `effort` is the resolved value (post-clamp); /// `requested_effort` is the user's explicit pick, used to surface `(clamped from X)`. ConfigChanged { model_id: String, effort: Option, + compaction: CompactionConfig, requested_effort: Option, }, /// User-visible error from the agent loop, session writer, or tool dispatch. Renders as a @@ -203,12 +210,24 @@ impl StdioSink { } writeln!(stderr)?; } + AgentEvent::SessionCompacted { + pre_count, + automatic, + .. + } => { + let label = if automatic { + "Auto-compacted" + } else { + "Compacted" + }; + writeln!(stderr, "{label} {pre_count} messages into summary")?; + } // TUI-only — no stdio surface to update. AgentEvent::PromptDrained(_) + | AgentEvent::AutoCompactionStarted | AgentEvent::SessionTitleUpdated { .. } | AgentEvent::SessionRolled { .. } | AgentEvent::SessionResumed { .. } - | AgentEvent::SessionCompacted { .. } | AgentEvent::ConfigChanged { .. } => {} AgentEvent::TurnComplete => { writeln!(stdout)?; @@ -363,6 +382,7 @@ mod tests { fn render_tui_only_events_emit_nothing_on_either_stream() { for event in [ AgentEvent::PromptDrained("queued".to_owned()), + AgentEvent::AutoCompactionStarted, AgentEvent::SessionTitleUpdated { session_id: "sid".to_owned(), title: "New title".to_owned(), @@ -373,6 +393,7 @@ mod tests { AgentEvent::ConfigChanged { model_id: "claude-opus-4-7".to_owned(), effort: Some(Effort::Xhigh), + compaction: CompactionConfig::disabled(), requested_effort: Some(Effort::Xhigh), }, ] { @@ -382,6 +403,31 @@ mod tests { } } + #[test] + fn render_session_compacted_writes_stderr_boundary() { + let (_, stderr) = render_one( + &test_sink(false), + AgentEvent::SessionCompacted { + summary: "summary".to_owned(), + pre_count: 4, + instructions: None, + automatic: false, + }, + ); + assert_eq!(stderr, "Compacted 4 messages into summary\n"); + + let (_, stderr) = render_one( + &test_sink(false), + AgentEvent::SessionCompacted { + summary: "summary".to_owned(), + pre_count: 4, + instructions: None, + automatic: true, + }, + ); + assert_eq!(stderr, "Auto-compacted 4 messages into summary\n"); + } + #[test] fn render_turn_complete_writes_trailing_newline_to_stdout() { let (stdout, stderr) = render_one(&test_sink(false), AgentEvent::TurnComplete); diff --git a/crates/oxide-code/src/client/anthropic.rs b/crates/oxide-code/src/client/anthropic.rs index 43566bd5..06d68da7 100644 --- a/crates/oxide-code/src/client/anthropic.rs +++ b/crates/oxide-code/src/client/anthropic.rs @@ -23,7 +23,7 @@ use tokio::sync::mpsc; use tracing::debug; use uuid::Uuid; -use crate::config::{Auth, Config, Effort}; +use crate::config::{Auth, CompactionConfig, Config, Effort}; use crate::message::{ContentBlock, Message, Role}; use crate::prompt::SYSTEM_PROMPT_DYNAMIC_BOUNDARY; use crate::tool::ToolDefinition; @@ -147,6 +147,10 @@ impl Client { self.config.effort } + pub(crate) fn compaction(&self) -> CompactionConfig { + self.config.compaction + } + #[cfg(test)] pub(crate) fn session_id(&self) -> &str { &self.session_id @@ -161,13 +165,18 @@ impl Client { self.session_id = id; } - /// Swaps the active model and re-clamps `config.effort` against the new caps. - pub(crate) fn set_model(&mut self, model: String) -> Option { + /// Swaps the active model and re-clamps model-derived config against the new caps. + pub(crate) fn set_model(&mut self, model: String) -> Result> { let caps = crate::model::capabilities_for(&model); let effort = caps.resolve_effort(self.config.effort); + let compaction = self + .config + .compaction + .for_model(&model, self.config.max_tokens)?; self.config.effort = effort; + self.config.compaction = compaction; self.config.model = model; - effort + Ok(effort) } /// Swaps the active effort, clamped against the current model's caps. @@ -392,7 +401,7 @@ mod tests { use super::testing::{Captured, api_key, captured, oauth, test_config}; use super::wire::{ContentBlockInfo, Delta}; use super::*; - use crate::config::{Effort, ThinkingConfig}; + use crate::config::{AutoCompactionConfig, CompactionConfig, Effort, ThinkingConfig}; // ── Fixtures ── @@ -474,6 +483,18 @@ mod tests { assert_eq!(client.model(), "claude-sonnet-4-6"); } + #[test] + fn new_exposes_compaction_config() { + let mut config = test_config(OFFLINE_URL, Auth::ApiKey("sk-test".to_owned()), TEST_MODEL); + config.compaction = CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(123_456), + }); + let client = Client::new(config, None).unwrap(); + + assert_eq!(client.compaction().auto.threshold_tokens, Some(123_456)); + } + #[test] fn new_none_session_id_generates_uuid_v4() { let client = Client::new( @@ -614,7 +635,7 @@ mod tests { ), ] { let mut client = client_with(from_model, from_effort); - let returned = client.set_model(swap_to.to_owned()); + let returned = client.set_model(swap_to.to_owned()).unwrap(); assert_eq!(returned, expect, "{from_model} → {swap_to}: returned"); assert_eq!( client.config.effort, expect, @@ -629,8 +650,38 @@ mod tests { // `[1m]` is a client-side opt-in; the swap must store it verbatim so `compute_betas` keeps // sending the 1M context beta. Regressing this drops 1M context silently. let mut client = client_with("claude-opus-4-6", Some(Effort::Max)); - client.set_model("claude-opus-4-7[1m]".to_owned()); + client.set_model("claude-opus-4-7[1m]".to_owned()).unwrap(); + assert_eq!(client.model(), "claude-opus-4-7[1m]"); + } + + #[test] + fn set_model_recomputes_compaction_threshold_from_new_context_window() { + let mut config = test_config(OFFLINE_URL, api_key(), "claude-opus-4-7[1m]"); + config.max_tokens = 64_000; + config.compaction = CompactionConfig::default_for_test(&config.model, config.max_tokens); + let mut client = Client::new(config, Some("sid".to_owned())).unwrap(); + + let effort = client.set_model("claude-sonnet-4-6".to_owned()).unwrap(); + + assert_eq!(effort, Some(Effort::High)); + assert_eq!(client.model(), "claude-sonnet-4-6"); + assert_eq!(client.compaction().auto.threshold_tokens, Some(167_000)); + } + + #[test] + fn set_model_rejects_threshold_above_new_context_window() { + let mut config = test_config(OFFLINE_URL, api_key(), "claude-opus-4-7[1m]"); + config.compaction = CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(967_000), + }); + let mut client = Client::new(config, Some("sid".to_owned())).unwrap(); + + client + .set_model("claude-sonnet-4-6".to_owned()) + .expect_err("threshold above the smaller window must reject the swap"); assert_eq!(client.model(), "claude-opus-4-7[1m]"); + assert_eq!(client.compaction().auto.threshold_tokens, Some(967_000)); } // ── Client::set_effort ── diff --git a/crates/oxide-code/src/client/anthropic/testing.rs b/crates/oxide-code/src/client/anthropic/testing.rs index d85733d8..4007a98e 100644 --- a/crates/oxide-code/src/client/anthropic/testing.rs +++ b/crates/oxide-code/src/client/anthropic/testing.rs @@ -3,7 +3,7 @@ use std::sync::{Arc, Mutex}; use super::Client; -use crate::config::{Auth, Config, PromptCacheTtl}; +use crate::config::{Auth, CompactionConfig, Config, PromptCacheTtl}; use crate::tui::theme::Theme; /// Minimal [`Config`] for unit / wiremock tests. @@ -15,6 +15,7 @@ pub(crate) fn test_config(base_url: impl Into, auth: Auth, model: &str) effort: None, max_tokens: 128, prompt_cache_ttl: PromptCacheTtl::OneHour, + compaction: CompactionConfig::disabled(), thinking: None, show_thinking: false, show_welcome: true, diff --git a/crates/oxide-code/src/client/anthropic/wire.rs b/crates/oxide-code/src/client/anthropic/wire.rs index 9c79df6c..0a5d3e99 100644 --- a/crates/oxide-code/src/client/anthropic/wire.rs +++ b/crates/oxide-code/src/client/anthropic/wire.rs @@ -231,13 +231,6 @@ pub(crate) struct MessageDeltaBody { pub(crate) stop_reason: Option, } -#[cfg_attr( - not(test), - expect( - dead_code, - reason = "fields populated by serde, defined for full SSE protocol coverage" - ) -)] #[derive(Debug, Clone, Deserialize)] pub(crate) struct Usage { #[serde(default)] diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs index cbf314ed..b4f9d9c8 100644 --- a/crates/oxide-code/src/config.rs +++ b/crates/oxide-code/src/config.rs @@ -17,6 +17,9 @@ use crate::util::env; const DEFAULT_MODEL: &str = "claude-opus-4-7[1m]"; const DEFAULT_BASE_URL: &str = "https://api.anthropic.com"; +const AUTO_COMPACTION_OUTPUT_RESERVE_CAP: u32 = 20_000; +const AUTO_COMPACTION_BUFFER_TOKENS: u32 = 13_000; +const MIN_AUTO_COMPACTION_THRESHOLD_TOKENS: u32 = 50_000; /// Mirrors the fallback `loader::resolve_theme` applies when no `[tui.theme] base` is set. pub(crate) const DEFAULT_THEME: &str = "mocha"; @@ -50,6 +53,7 @@ pub(crate) struct ConfigSnapshot { pub(crate) base_url: String, pub(crate) max_tokens: u32, pub(crate) prompt_cache_ttl: PromptCacheTtl, + pub(crate) compaction: CompactionConfig, pub(crate) show_thinking: bool, pub(crate) show_welcome: bool, /// Resolved theme base name — built-in catalogue key or filesystem path. `/theme` reads this @@ -185,6 +189,76 @@ impl FromStr for PromptCacheTtl { } } +// ── CompactionConfig ── + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct CompactionConfig { + pub(crate) auto: AutoCompactionConfig, + policy: AutoCompactionPolicy, +} + +impl CompactionConfig { + #[cfg(test)] + pub(crate) const fn disabled() -> Self { + Self { + auto: AutoCompactionConfig::disabled(), + policy: AutoCompactionPolicy::Disabled, + } + } + + pub(crate) fn for_model(self, model: &str, max_tokens: u32) -> Result { + resolve_compaction_policy(self.policy, model, max_tokens) + } + + #[cfg(test)] + pub(crate) const fn resolved_for_test(auto: AutoCompactionConfig) -> Self { + let policy = if auto.enabled { + match auto.threshold_tokens { + Some(tokens) => AutoCompactionPolicy::Tokens(tokens), + None => AutoCompactionPolicy::Default, + } + } else { + AutoCompactionPolicy::Disabled + }; + Self { auto, policy } + } + + #[cfg(test)] + pub(crate) fn default_for_test(model: &str, max_tokens: u32) -> Self { + resolve_compaction_policy(AutoCompactionPolicy::Default, model, max_tokens).unwrap() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct AutoCompactionConfig { + pub(crate) enabled: bool, + pub(crate) threshold_tokens: Option, +} + +impl AutoCompactionConfig { + pub(crate) const fn disabled() -> Self { + Self { + enabled: false, + threshold_tokens: None, + } + } + + pub(crate) const fn should_trigger(self, total_tokens: u32) -> bool { + match (self.enabled, self.threshold_tokens) { + (true, Some(threshold)) => total_tokens >= threshold, + _ => false, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum AutoCompactionPolicy { + Disabled, + Default, + Tokens(u32), + Percent(u8), +} + // ── Config ── /// Resolved configuration. @@ -197,6 +271,7 @@ pub(crate) struct Config { pub(crate) base_url: String, pub(crate) max_tokens: u32, pub(crate) prompt_cache_ttl: PromptCacheTtl, + pub(crate) compaction: CompactionConfig, pub(crate) thinking: Option, pub(crate) show_thinking: bool, pub(crate) show_welcome: bool, @@ -275,6 +350,8 @@ impl Config { None => client.prompt_cache_ttl.unwrap_or(PromptCacheTtl::OneHour), }; + let compaction = resolve_compaction(client.compaction, &model, max_tokens)?; + let theme_name = theme_config .base .clone() @@ -291,6 +368,7 @@ impl Config { base_url, max_tokens, prompt_cache_ttl, + compaction, thinking, show_thinking, show_welcome, @@ -308,6 +386,7 @@ impl Config { base_url: self.base_url.clone(), max_tokens: self.max_tokens, prompt_cache_ttl: self.prompt_cache_ttl, + compaction: self.compaction, show_thinking: self.show_thinking, show_welcome: self.show_welcome, theme_name: self.theme_name.clone(), @@ -325,6 +404,14 @@ pub(crate) fn display_bool(flag: bool) -> &'static str { if flag { "on" } else { "off" } } +pub(crate) fn display_auto_compaction(auto: AutoCompactionConfig) -> String { + match (auto.enabled, auto.threshold_tokens) { + (true, Some(threshold)) => format!("on at {threshold} tokens"), + (true, None) => "off (no threshold)".to_owned(), + _ => "off".to_owned(), + } +} + fn default_max_tokens(effort: Option) -> u32 { match effort { Some(Effort::Xhigh | Effort::Max) => 64_000, @@ -333,6 +420,136 @@ fn default_max_tokens(effort: Option) -> u32 { } } +fn resolve_compaction( + file: Option, + model: &str, + max_tokens: u32, +) -> Result { + let auto_requested = env::bool("OX_COMPACTION_AUTO_ENABLED") + .or_else(|| file.as_ref().and_then(|c| c.enabled)) + .unwrap_or(true); + + let policy = if auto_requested { + resolve_auto_policy(file.as_ref())? + } else { + AutoCompactionPolicy::Disabled + }; + resolve_compaction_policy(policy, model, max_tokens) +} + +fn resolve_compaction_policy( + policy: AutoCompactionPolicy, + model: &str, + max_tokens: u32, +) -> Result { + let auto = resolve_auto_compaction(policy, model, max_tokens)?; + Ok(CompactionConfig { auto, policy }) +} + +fn resolve_auto_compaction( + policy: AutoCompactionPolicy, + model: &str, + max_tokens: u32, +) -> Result { + let threshold = match policy { + AutoCompactionPolicy::Disabled => return Ok(AutoCompactionConfig::disabled()), + AutoCompactionPolicy::Default => default_auto_threshold(model, max_tokens), + AutoCompactionPolicy::Tokens(tokens) => threshold_from_tokens(tokens, model, max_tokens)?, + AutoCompactionPolicy::Percent(percent) => { + threshold_from_percent(percent, model, max_tokens)? + } + }; + Ok(AutoCompactionConfig { + enabled: threshold.is_some(), + threshold_tokens: threshold, + }) +} + +fn resolve_auto_policy(file: Option<&file::CompactionConfig>) -> Result { + let env_tokens = env_u32("OX_COMPACTION_AUTO_THRESHOLD_TOKENS")?; + let env_percent = env_u8("OX_COMPACTION_AUTO_THRESHOLD_PERCENT")?; + let env_threshold_set = env_tokens.is_some() || env_percent.is_some(); + let file_tokens = file.and_then(|c| c.threshold_tokens); + let file_percent = file.and_then(|c| c.threshold_percent); + let (tokens, percent) = if env_threshold_set { + (env_tokens, env_percent) + } else { + (file_tokens, file_percent) + }; + + match (tokens, percent) { + (Some(_), Some(_)) => { + bail!("set only one of auto_threshold_tokens or auto_threshold_percent for compaction") + } + (Some(tokens), None) => Ok(AutoCompactionPolicy::Tokens(tokens)), + (None, Some(percent)) => Ok(AutoCompactionPolicy::Percent(percent)), + (None, None) => Ok(AutoCompactionPolicy::Default), + } +} + +fn threshold_from_tokens(tokens: u32, model: &str, max_tokens: u32) -> Result> { + validate_auto_threshold_floor(tokens)?; + if let Some(max) = default_auto_threshold(model, max_tokens) + && tokens > max + { + bail!("auto compaction threshold must be at most {max} tokens for model {model:?}"); + } + Ok(Some(tokens)) +} + +fn validate_auto_threshold_floor(tokens: u32) -> Result { + if tokens < MIN_AUTO_COMPACTION_THRESHOLD_TOKENS { + bail!( + "auto compaction threshold must be at least \ + {MIN_AUTO_COMPACTION_THRESHOLD_TOKENS} tokens" + ); + } + Ok(tokens) +} + +fn threshold_from_percent(percent: u8, model: &str, max_tokens: u32) -> Result> { + if !(1..=100).contains(&percent) { + bail!("auto compaction threshold percent must be between 1 and 100"); + } + let Some(context_window) = crate::model::context_window_for(model) else { + return Ok(None); + }; + let threshold = context_window.saturating_mul(u32::from(percent)) / 100; + let resolved = + default_auto_threshold_for_window(context_window, max_tokens).map(|max| threshold.min(max)); + resolved + .map(|tokens| { + validate_auto_threshold_floor(tokens).with_context(|| { + format!("auto_threshold_percent={percent} resolves to {tokens} tokens") + }) + }) + .transpose() +} + +fn default_auto_threshold(model: &str, max_tokens: u32) -> Option { + crate::model::context_window_for(model) + .and_then(|window| default_auto_threshold_for_window(window, max_tokens)) +} + +fn default_auto_threshold_for_window(context_window: u32, max_tokens: u32) -> Option { + let reserve = max_tokens.min(AUTO_COMPACTION_OUTPUT_RESERVE_CAP); + context_window + .checked_sub(reserve)? + .checked_sub(AUTO_COMPACTION_BUFFER_TOKENS) +} + +fn env_u32(key: &'static str) -> Result> { + env::string(key) + .map(|raw| raw.parse::().with_context(|| format!("{key}={raw:?}"))) + .transpose() +} + +fn env_u8(key: &'static str) -> Result> { + env::string(key) + .map(|raw| raw.parse::().with_context(|| format!("{key}={raw:?}"))) + .transpose() +} + fn validate_base_url(raw: &str) -> Result<()> { let url = reqwest::Url::parse(raw).with_context(|| format!("invalid base URL {raw:?}"))?; match url.scheme() { @@ -460,6 +677,9 @@ mod tests { "ANTHROPIC_BASE_URL", "ANTHROPIC_MAX_TOKENS", "ANTHROPIC_EFFORT", + "OX_COMPACTION_AUTO_ENABLED", + "OX_COMPACTION_AUTO_THRESHOLD_PERCENT", + "OX_COMPACTION_AUTO_THRESHOLD_TOKENS", "OX_SHOW_THINKING", "OX_SHOW_WELCOME", "OX_PROMPT_CACHE_TTL", @@ -519,6 +739,8 @@ mod tests { assert_eq!(config.max_tokens, 64_000); assert_eq!(config.effort, Some(Effort::Xhigh)); assert_eq!(config.prompt_cache_ttl, PromptCacheTtl::OneHour); + assert!(config.compaction.auto.enabled); + assert_eq!(config.compaction.auto.threshold_tokens, Some(967_000)); assert!(!config.show_thinking); assert!( config.show_welcome, @@ -561,6 +783,7 @@ mod tests { assert_eq!(config.model, "claude-opus-4-7"); assert_eq!(config.base_url, "https://example.invalid"); assert_eq!(config.max_tokens, 64); + assert!(config.compaction.auto.enabled); assert!(config.show_thinking); assert!( !config.show_welcome, @@ -590,6 +813,7 @@ mod tests { assert_eq!(config.model, "claude-sonnet-4-6"); assert_eq!(config.base_url, "https://config-file.invalid"); assert_eq!(config.max_tokens, 128); + assert!(config.compaction.auto.enabled); assert!(config.show_thinking); assert!( !config.show_welcome, @@ -671,6 +895,185 @@ mod tests { )); } + #[tokio::test] + async fn load_compaction_file_can_disable_default_on_auto_behavior() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r" + [client.compaction] + auto_enabled = false + "}, + ); + let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) + .await + .unwrap(); + assert!(!config.compaction.auto.enabled); + } + + #[tokio::test] + async fn load_compaction_auto_env_beats_file() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r" + [client.compaction] + auto_enabled = false + "}, + ); + let vars = env_vars(vec![xdg(&dir), env("OX_COMPACTION_AUTO_ENABLED", "1")]); + let config = temp_env::async_with_vars(vars, Config::load()) + .await + .unwrap(); + assert!(config.compaction.auto.enabled); + } + + #[tokio::test] + async fn load_compaction_auto_threshold_tokens_sets_absolute_trigger() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r" + [client.compaction] + auto_threshold_tokens = 400000 + "}, + ); + let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) + .await + .unwrap(); + assert_eq!(config.compaction.auto.threshold_tokens, Some(400_000)); + } + + #[tokio::test] + async fn load_compaction_auto_threshold_percent_uses_context_window() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("ANTHROPIC_MODEL", "claude-opus-4-7[1m]"), + env("OX_COMPACTION_AUTO_THRESHOLD_PERCENT", "40"), + ]); + let config = temp_env::async_with_vars(vars, Config::load()) + .await + .unwrap(); + assert_eq!(config.compaction.auto.threshold_tokens, Some(400_000)); + } + + #[tokio::test] + async fn load_compaction_rejects_ambiguous_auto_thresholds() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r" + [client.compaction] + auto_threshold_tokens = 400000 + auto_threshold_percent = 40 + "}, + ); + let err = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) + .await + .expect_err("ambiguous thresholds must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("only one"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_rejects_zero_auto_threshold_tokens() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("OX_COMPACTION_AUTO_THRESHOLD_TOKENS", "0"), + ]); + let err = temp_env::async_with_vars(vars, Config::load()) + .await + .expect_err("zero threshold must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("at least 50000 tokens"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_rejects_too_low_auto_threshold_tokens() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r" + [client.compaction] + auto_threshold_tokens = 49999 + "}, + ); + let err = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) + .await + .expect_err("low threshold must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("at least 50000 tokens"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_rejects_too_low_auto_threshold_percent() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("ANTHROPIC_MODEL", "claude-opus-4-7[1m]"), + env("OX_COMPACTION_AUTO_THRESHOLD_PERCENT", "4"), + ]); + let err = temp_env::async_with_vars(vars, Config::load()) + .await + .expect_err("resolved low threshold must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("auto_threshold_percent=4"), "{msg}"); + assert!(msg.contains("40000 tokens"), "{msg}"); + assert!(msg.contains("at least 50000 tokens"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_rejects_threshold_above_model_safe_window() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r#" + [client] + model = "claude-sonnet-4-6" + + [client.compaction] + auto_threshold_tokens = 400000 + "#}, + ); + let err = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) + .await + .expect_err("threshold beyond context safety window must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("at most"), "{msg}"); + assert!(msg.contains("claude-sonnet-4-6"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_rejects_out_of_range_auto_threshold_percent() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("OX_COMPACTION_AUTO_THRESHOLD_PERCENT", "101"), + ]); + let err = temp_env::async_with_vars(vars, Config::load()) + .await + .expect_err("out-of-range threshold percent must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("between 1 and 100"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_percent_for_unknown_model_disables_auto_trigger() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("ANTHROPIC_MODEL", "custom-model"), + env("OX_COMPACTION_AUTO_THRESHOLD_PERCENT", "40"), + ]); + let config = temp_env::async_with_vars(vars, Config::load()) + .await + .unwrap(); + assert!(!config.compaction.auto.enabled); + assert_eq!(config.compaction.auto.threshold_tokens, None); + } + #[tokio::test] async fn load_invalid_max_tokens_env_errors() { let dir = tempfile::tempdir().unwrap(); @@ -955,6 +1358,10 @@ mod tests { effort: Some(Effort::Xhigh), max_tokens: 64_000, prompt_cache_ttl: PromptCacheTtl::FiveMin, + compaction: CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(42), + }), thinking: None, show_thinking: true, show_welcome: false, @@ -968,6 +1375,7 @@ mod tests { assert_eq!(snap.effort, Some(Effort::Xhigh)); assert_eq!(snap.max_tokens, 64_000); assert_eq!(snap.prompt_cache_ttl, PromptCacheTtl::FiveMin); + assert_eq!(snap.compaction.auto.threshold_tokens, Some(42)); assert!(snap.show_thinking); assert!(!snap.show_welcome); assert_eq!(snap.theme_name, "macchiato"); @@ -989,6 +1397,53 @@ mod tests { assert_eq!(display_bool(false), "off"); } + // ── display_auto_compaction ── + + #[test] + fn display_auto_compaction_names_enabled_threshold_or_off() { + assert_eq!( + display_auto_compaction(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(400_000), + }), + "on at 400000 tokens", + ); + assert_eq!( + display_auto_compaction(AutoCompactionConfig { + enabled: false, + threshold_tokens: Some(400_000), + }), + "off", + ); + assert_eq!( + display_auto_compaction(AutoCompactionConfig { + enabled: true, + threshold_tokens: None, + }), + "off (no threshold)", + ); + } + + // ── AutoCompactionConfig::should_trigger ── + + #[test] + fn should_trigger_requires_enabled_threshold_and_enough_tokens() { + let enabled = AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }; + assert!(enabled.should_trigger(10)); + assert!(!enabled.should_trigger(9)); + assert!(!AutoCompactionConfig::disabled().should_trigger(100)); + assert!( + !AutoCompactionConfig { + enabled: true, + threshold_tokens: None, + } + .should_trigger(100) + ); + } + // ── default_max_tokens ── #[test] diff --git a/crates/oxide-code/src/config/file.rs b/crates/oxide-code/src/config/file.rs index 11a8ac30..df71256a 100644 --- a/crates/oxide-code/src/config/file.rs +++ b/crates/oxide-code/src/config/file.rs @@ -33,6 +33,18 @@ pub(super) struct ClientConfig { pub(super) effort: Option, pub(super) max_tokens: Option, pub(super) prompt_cache_ttl: Option, + pub(super) compaction: Option, +} + +#[derive(Debug, Default, Clone, Copy, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct CompactionConfig { + #[serde(rename = "auto_enabled")] + pub(super) enabled: Option, + #[serde(rename = "auto_threshold_tokens")] + pub(super) threshold_tokens: Option, + #[serde(rename = "auto_threshold_percent")] + pub(super) threshold_percent: Option, } #[derive(Debug, Default, Deserialize)] @@ -71,6 +83,25 @@ impl ClientConfig { effort: other.effort.or(self.effort), max_tokens: other.max_tokens.or(self.max_tokens), prompt_cache_ttl: other.prompt_cache_ttl.or(self.prompt_cache_ttl), + compaction: merge_section(self.compaction, other.compaction, CompactionConfig::merge), + } + } +} + +impl CompactionConfig { + fn merge(self, other: Self) -> Self { + let other_sets_threshold = + other.threshold_tokens.is_some() || other.threshold_percent.is_some(); + let (threshold_tokens, threshold_percent) = if other_sets_threshold { + (other.threshold_tokens, other.threshold_percent) + } else { + (self.threshold_tokens, self.threshold_percent) + }; + + Self { + enabled: other.enabled.or(self.enabled), + threshold_tokens, + threshold_percent, } } } @@ -222,6 +253,11 @@ mod tests { effort: Some(super::super::Effort::Low), max_tokens: Some(1000), prompt_cache_ttl: Some(super::super::PromptCacheTtl::FiveMin), + compaction: Some(CompactionConfig { + enabled: Some(false), + threshold_tokens: Some(400_000), + threshold_percent: None, + }), }), tui: Some(TuiConfig { show_thinking: Some(false), @@ -237,6 +273,11 @@ mod tests { effort: Some(super::super::Effort::Max), max_tokens: Some(2000), prompt_cache_ttl: Some(super::super::PromptCacheTtl::OneHour), + compaction: Some(CompactionConfig { + enabled: Some(true), + threshold_tokens: None, + threshold_percent: Some(40), + }), }), tui: Some(TuiConfig { show_thinking: Some(true), @@ -259,11 +300,34 @@ mod tests { client.prompt_cache_ttl, Some(super::super::PromptCacheTtl::OneHour) ); + let compaction = client.compaction.expect("compaction section should merge"); + assert_eq!(compaction.enabled, Some(true)); + assert_eq!(compaction.threshold_tokens, None); + assert_eq!(compaction.threshold_percent, Some(40)); let tui = merged.tui.expect("tui section should be present"); assert_eq!(tui.show_thinking, Some(true)); } + #[test] + fn merge_compaction_enabled_does_not_clear_base_threshold() { + let base = CompactionConfig { + enabled: Some(false), + threshold_tokens: Some(400_000), + threshold_percent: None, + }; + let other = CompactionConfig { + enabled: Some(true), + threshold_tokens: None, + threshold_percent: None, + }; + let merged = base.merge(other); + + assert_eq!(merged.enabled, Some(true)); + assert_eq!(merged.threshold_tokens, Some(400_000)); + assert_eq!(merged.threshold_percent, None); + } + #[test] fn merge_falls_back_to_base_when_other_is_none() { let base = FileConfig { @@ -274,6 +338,11 @@ mod tests { effort: Some(super::super::Effort::High), max_tokens: Some(4096), prompt_cache_ttl: Some(super::super::PromptCacheTtl::FiveMin), + compaction: Some(CompactionConfig { + enabled: Some(false), + threshold_tokens: Some(400_000), + threshold_percent: None, + }), }), tui: Some(TuiConfig { show_thinking: Some(true), @@ -293,6 +362,11 @@ mod tests { client.prompt_cache_ttl, Some(super::super::PromptCacheTtl::FiveMin) ); + let compaction = client + .compaction + .expect("compaction section should survive"); + assert_eq!(compaction.enabled, Some(false)); + assert_eq!(compaction.threshold_tokens, Some(400_000)); let tui = merged.tui.expect("tui section should survive"); assert_eq!(tui.show_thinking, Some(true)); diff --git a/crates/oxide-code/src/main.rs b/crates/oxide-code/src/main.rs index 5116e7ed..ecc5bd87 100644 --- a/crates/oxide-code/src/main.rs +++ b/crates/oxide-code/src/main.rs @@ -23,7 +23,7 @@ use tokio::sync::mpsc; use tracing::{debug, warn}; use agent::event::{AgentEvent, AgentSink, StdioSink, UserAction, inert_user_action_channel}; -use agent::{TurnAbort, agent_turn}; +use agent::{AutoCompact, TokenUsage, TurnAbort, agent_turn}; use client::anthropic::Client; use config::{Config, Effort}; use file_tracker::FileTracker; @@ -329,124 +329,233 @@ async fn run_tui( /// Each `TurnAbort` arm emits exactly one terminal event (`Error` xor `TurnComplete`). #[expect( clippy::too_many_arguments, - reason = "session lifecycle (store, handle, file tracker) lives here for /clear; bundling into a struct would just rename the dependencies" + reason = "the task entry point receives the spawned loop dependencies before AgentLoopTask owns them" )] async fn agent_loop_task( - mut client: Client, + client: Client, tools: Arc, sink: tui::event::ChannelSink, - mut user_rx: mpsc::Receiver, - mut session: SessionHandle, + user_rx: mpsc::Receiver, + session: SessionHandle, resumed_messages: Vec, store: SessionStore, file_tracker: Arc, ) -> Result<()> { - let mut messages: Vec = resumed_messages; + AgentLoopTask { + client, + tools, + sink, + user_rx, + session, + messages: resumed_messages, + store, + file_tracker, + auto_compaction_failures: 0, + last_usage: None, + } + .run() + .await +} - while let Some(action) = user_rx.recv().await { - match action { - UserAction::SubmitPrompt(text) => { - let user_msg = Message::user(&text); - let outcome = session.record_message(user_msg.clone()).await; - sink.session_write_error(outcome.failure.as_deref()); - messages.push(user_msg); - - if let Some(seed) = outcome.ai_title_seed { - session::title_generator::spawn( - client.clone(), - session.clone(), - sink.clone(), - seed, - ); - } +struct AgentLoopTask { + client: Client, + tools: Arc, + sink: tui::event::ChannelSink, + user_rx: mpsc::Receiver, + session: SessionHandle, + messages: Vec, + store: SessionStore, + file_tracker: Arc, + auto_compaction_failures: u8, + last_usage: Option, +} - let prompt = prompt::build_prompt(client.model()).await; - let outcome = agent_turn( - &client, - &tools, - &mut messages, - &prompt, - &sink, - &session, - &mut user_rx, - ) - .await; - match outcome { - Ok(()) => { - _ = sink.send(AgentEvent::TurnComplete); - } - Err(TurnAbort::Cancelled) => { - _ = sink.send(AgentEvent::Cancelled); - } - Err(TurnAbort::Quit) => break, - Err(TurnAbort::Failed(e)) => { - _ = sink.send(AgentEvent::Error(format!("{e:#}"))); - } - } +enum LoopControl { + Continue, + Stop, +} + +impl AgentLoopTask { + async fn run(&mut self) -> Result<()> { + while let Some(action) = self.user_rx.recv().await { + if matches!(self.handle_action(action).await, LoopControl::Stop) { + break; } + } + + Ok(()) + } + + async fn handle_action(&mut self, action: UserAction) -> LoopControl { + match action { + UserAction::SubmitPrompt(text) => self.handle_submit_prompt(text).await, // Cancel / ConfirmExit are no-ops here; PreviewTheme / SwapTheme are TUI-only and // applied client-side in `App::apply_action_locally`. UserAction::Cancel | UserAction::ConfirmExit | UserAction::PreviewTheme { .. } - | UserAction::SwapTheme { .. } => {} + | UserAction::SwapTheme { .. } => LoopControl::Continue, UserAction::Clear => { - let outcome = - roll_session(&mut session, &store, &file_tracker, client.model()).await; - sink.session_write_error(outcome.finalize_failure.as_deref()); - client.set_session_id(outcome.new_id.clone()); - messages.clear(); - if let Err(e) = sink.send(AgentEvent::SessionRolled { id: outcome.new_id }) { + let outcome = roll_session( + &mut self.session, + &self.store, + &self.file_tracker, + self.client.model(), + ) + .await; + self.sink + .session_write_error(outcome.finalize_failure.as_deref()); + self.client.set_session_id(outcome.new_id.clone()); + self.messages.clear(); + self.reset_auto_compaction(); + if let Err(e) = self + .sink + .send(AgentEvent::SessionRolled { id: outcome.new_id }) + { // /clear succeeded server-side but the TUI never sees the new id — surfaces as // a stuck "old session" header. Error-level so the log makes it findable. tracing::error!("session-rolled event dropped: {e}"); } + LoopControl::Continue } UserAction::Resume { session_id } => { apply_resume( - &mut session, - &mut client, - &mut messages, - &store, - &file_tracker, - &sink, + &mut self.session, + &mut self.client, + &mut self.messages, + &self.store, + &self.file_tracker, + &self.sink, &session_id, ) .await; + self.reset_auto_compaction(); + LoopControl::Continue } UserAction::Compact { instructions } => { let outcome = apply_compact( - &client, - &session, - &file_tracker, - &mut messages, - &sink, - &mut user_rx, + &self.client, + &self.session, + &self.file_tracker, + &mut self.messages, + &self.sink, + &mut self.user_rx, instructions, ) .await; match outcome { - Ok(()) => {} + Ok(true) => { + self.reset_auto_compaction(); + LoopControl::Continue + } + Ok(false) => LoopControl::Continue, Err(TurnAbort::Cancelled) => { - _ = sink.send(AgentEvent::Cancelled); + _ = self.sink.send(AgentEvent::Cancelled); + LoopControl::Continue } - Err(TurnAbort::Quit) => break, + Err(TurnAbort::Quit) => LoopControl::Stop, Err(TurnAbort::Failed(e)) => { - _ = sink.send(AgentEvent::Error(format!("{e:#}"))); + _ = self.sink.send(AgentEvent::Error(format!("{e:#}"))); + LoopControl::Continue } } } UserAction::Rename { title } => { - apply_rename(&session, &sink, title).await; + apply_rename(&self.session, &self.sink, title).await; + LoopControl::Continue } UserAction::SwapConfig { model, effort } => { - apply_swap_config(&mut client, &sink, model, effort); + if apply_swap_config(&mut self.client, &self.sink, model, effort) { + self.auto_compaction_failures = 0; + } + LoopControl::Continue } - UserAction::Quit => break, + UserAction::Quit => LoopControl::Stop, } } - Ok(()) + async fn handle_submit_prompt(&mut self, text: String) -> LoopControl { + let mut pre_prompt_pending = Vec::new(); + let pre_prompt_compact = auto_compact_before_prompt( + &self.client, + &self.session, + &self.file_tracker, + &mut self.messages, + &self.sink, + &mut self.user_rx, + &mut pre_prompt_pending, + &mut self.auto_compaction_failures, + self.last_usage, + ) + .await; + match pre_prompt_compact { + Ok(true) => self.last_usage = None, + Ok(false) => {} + Err(TurnAbort::Cancelled) => { + _ = self.sink.send(AgentEvent::Cancelled); + return LoopControl::Continue; + } + Err(TurnAbort::Quit) => return LoopControl::Stop, + Err(TurnAbort::Failed(e)) => { + _ = self.sink.send(AgentEvent::Error(format!("{e:#}"))); + return LoopControl::Continue; + } + } + + let user_msg = Message::user(&text); + let outcome = self.session.record_message(user_msg.clone()).await; + self.sink.session_write_error(outcome.failure.as_deref()); + self.messages.push(user_msg); + agent::record_drained_prompts( + pre_prompt_pending.drain(..), + &mut self.messages, + &self.session, + &self.sink, + ) + .await; + + if let Some(seed) = outcome.ai_title_seed { + session::title_generator::spawn( + self.client.clone(), + self.session.clone(), + self.sink.clone(), + seed, + ); + } + + let prompt = prompt::build_prompt(self.client.model()).await; + let outcome = agent_turn( + &self.client, + &self.tools, + &mut self.messages, + &prompt, + &self.sink, + &self.session, + &mut self.user_rx, + ) + .await; + match outcome { + Ok(report) => { + self.last_usage = report.usage; + _ = self.sink.send(AgentEvent::TurnComplete); + LoopControl::Continue + } + Err(TurnAbort::Cancelled) => { + _ = self.sink.send(AgentEvent::Cancelled); + LoopControl::Continue + } + Err(TurnAbort::Quit) => LoopControl::Stop, + Err(TurnAbort::Failed(e)) => { + _ = self.sink.send(AgentEvent::Error(format!("{e:#}"))); + LoopControl::Continue + } + } + } + + fn reset_auto_compaction(&mut self) { + self.auto_compaction_failures = 0; + self.last_usage = None; + } } /// Drives the mid-session resume: swap the handle, repaint the chat, surface previous-session @@ -517,6 +626,38 @@ fn format_drift_warning(drifted: &[std::path::PathBuf]) -> String { ) } +#[expect( + clippy::too_many_arguments, + reason = "pre-prompt auto-compaction needs the live session state and the shared failure counter" +)] +async fn auto_compact_before_prompt( + client: &Client, + session: &SessionHandle, + file_tracker: &FileTracker, + messages: &mut Vec, + sink: &dyn AgentSink, + user_rx: &mut mpsc::Receiver, + pending: &mut Vec, + failures: &mut u8, + usage: Option, +) -> std::result::Result { + agent::auto_compact_if_needed( + client, + session, + messages, + sink, + user_rx, + pending, + Some(&mut AutoCompact { + config: client.compaction().auto, + failures, + file_tracker, + }), + usage, + ) + .await +} + /// Drives `/compact`: stream the summarization, replace the in-memory transcript with the /// synthetic continuation, persist the boundary + synthetic message, surface the post-compact /// system event so the TUI can repaint. Errors leave the session untouched. @@ -528,7 +669,7 @@ async fn apply_compact( sink: &dyn AgentSink, user_rx: &mut mpsc::Receiver, instructions: Option, -) -> std::result::Result<(), TurnAbort> { +) -> std::result::Result { let mut pending_prompts = Vec::new(); let summary = agent::await_unless_aborted( agent::compaction::compact_session(client, messages, instructions.as_deref()), @@ -537,26 +678,16 @@ async fn apply_compact( ) .await? .map_err(|e| TurnAbort::Failed(anyhow!("Compaction failed: {e:#}")))?; - let synthetic = agent::compaction::synthesize_post_compact_message(&summary); - let outcome = session - .compact(summary.clone(), instructions.clone(), synthetic.clone()) - .await; - sink.session_write_error(outcome.failure.as_deref()); - if outcome.failure.is_some() { - return Ok(()); - } - // Reset the file tracker so post-compact Edits require a fresh Read — pre-compact Reads - // are no longer in the visible transcript and the safety contract has to follow. - file_tracker.clear(); - *messages = vec![synthetic]; - if let Err(e) = sink.send(AgentEvent::SessionCompacted { + Ok(agent::compact_boundary::replace_session_with_summary( + session, + file_tracker, + messages, + sink, summary, - pre_count: outcome.pre_count, instructions, - }) { - tracing::error!("session-compacted event dropped: {e}"); - } - Ok(()) + false, + ) + .await) } async fn apply_rename(session: &SessionHandle, sink: &dyn AgentSink, title: String) { @@ -579,9 +710,12 @@ fn apply_swap_config( sink: &dyn AgentSink, model: Option, effort: Option, -) { - if let Some(id) = model { - client.set_model(id.into_inner()); +) -> bool { + if let Some(id) = model + && let Err(e) = client.set_model(id.into_inner()) + { + _ = sink.send(AgentEvent::Error(format!("Config change failed: {e:#}"))); + return false; } let resolved = match effort { Some(pick) => client.set_effort(pick), @@ -590,6 +724,7 @@ fn apply_swap_config( if let Err(e) = sink.send(AgentEvent::ConfigChanged { model_id: client.model().to_owned(), effort: resolved, + compaction: client.compaction(), requested_effort: effort, }) { // Dropping this leaves the status bar showing the previous model / effort even though the @@ -597,6 +732,7 @@ fn apply_swap_config( // wrong after a /model or /effort swap. tracing::error!("config-changed event dropped: {e}"); } + true } // ── Bare REPL Mode ── @@ -616,6 +752,8 @@ async fn bare_repl( let mut messages: Vec = resumed_messages; let mut shutdown_fired = false; let (_user_tx, mut user_rx) = inert_user_action_channel(); + let mut auto_compaction_failures = 0_u8; + let mut last_usage = None; let result: Result<()> = async { loop { @@ -639,10 +777,37 @@ async fn bare_repl( continue; } + let mut pre_prompt_pending = Vec::new(); + match auto_compact_before_prompt( + client, + &session, + &file_tracker, + &mut messages, + &sink, + &mut user_rx, + &mut pre_prompt_pending, + &mut auto_compaction_failures, + last_usage, + ) + .await + { + Ok(true) => last_usage = None, + Ok(false) => {} + Err(TurnAbort::Cancelled | TurnAbort::Quit) => continue, + Err(TurnAbort::Failed(e)) => return Err(e), + } + let user_msg = Message::user(&input); let outcome = session.record_message(user_msg.clone()).await; sink.session_write_error(outcome.failure.as_deref()); messages.push(user_msg); + agent::record_drained_prompts( + pre_prompt_pending.drain(..), + &mut messages, + &session, + &sink, + ) + .await; let prompt = prompt::build_prompt(model).await; let turn = agent_turn( client, @@ -662,7 +827,8 @@ async fn bare_repl( } }; match turn_result { - Ok(()) | Err(TurnAbort::Cancelled | TurnAbort::Quit) => {} + Ok(report) => last_usage = report.usage, + Err(TurnAbort::Cancelled | TurnAbort::Quit) => {} Err(TurnAbort::Failed(e)) => return Err(e), } _ = sink.send(AgentEvent::TurnComplete); @@ -711,7 +877,7 @@ async fn headless( ); let result: Result<()> = tokio::select! { r = turn => match r { - Ok(()) | Err(TurnAbort::Cancelled | TurnAbort::Quit) => Ok(()), + Ok(_) | Err(TurnAbort::Cancelled | TurnAbort::Quit) => Ok(()), Err(TurnAbort::Failed(e)) => Err(e), }, () = shutdown_signal() => { @@ -730,3 +896,141 @@ async fn headless( println!(); Ok(()) } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + use super::*; + use crate::agent::event::CapturingSink; + use crate::client::anthropic::testing::{api_key, test_config}; + use crate::config::{AutoCompactionConfig, CompactionConfig}; + use crate::message::ContentBlock; + use crate::session::store::test_store; + + fn streamed_summary_body(text: &str) -> String { + let start = serde_json::json!({ + "type": "message_start", + "message": {"id": "m", "model": "claude-haiku-4-5"}, + }); + let block = serde_json::json!({ + "type": "content_block_start", + "index": 0, + "content_block": {"type": "text", "text": text}, + }); + format!( + "event: message_start\ndata: {start}\n\n\ + event: content_block_start\ndata: {block}\n\n\ + event: message_stop\ndata: {{\"type\":\"message_stop\"}}\n\n", + ) + } + + #[tokio::test] + async fn auto_compact_before_prompt_compacts_previous_turn_before_recording_new_prompt() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/v1/messages")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string(streamed_summary_body("auto summary")) + .insert_header("content-type", "text/event-stream"), + ) + .mount(&server) + .await; + + let mut config = test_config(server.uri(), api_key(), "claude-opus-4-7[1m]"); + config.compaction = CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(50_000), + }); + let client = Client::new(config, Some("sid".to_owned())).unwrap(); + let dir = tempfile::tempdir().unwrap(); + let store = test_store(dir.path()); + let session = session::handle::start(&store, "claude-opus-4-7[1m]"); + let file_tracker = Arc::new(FileTracker::default()); + let sink = CapturingSink::new(); + let (_tx, mut user_rx) = agent::event::inert_user_action_channel(); + let mut pending = Vec::new(); + let mut failures = 0; + let mut messages = vec![ + Message::user("one"), + Message::assistant("two"), + Message::user("three"), + Message::assistant("four"), + ]; + + let compacted = auto_compact_before_prompt( + &client, + &session, + &file_tracker, + &mut messages, + &sink, + &mut user_rx, + &mut pending, + &mut failures, + Some(TokenUsage::new(50_000, 1)), + ) + .await + .unwrap(); + + assert!(compacted); + assert!(pending.is_empty()); + assert_eq!(failures, 0); + assert_eq!(messages.len(), 1); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("auto summary")) + ); + assert!(sink.events().iter().any(|event| { + matches!( + event, + AgentEvent::SessionCompacted { + automatic: true, + .. + } + ) + })); + } + + #[tokio::test] + async fn handle_action_swap_config_resets_auto_compaction_breaker() { + let server = MockServer::start().await; + let config = test_config(server.uri(), api_key(), "claude-opus-4-7[1m]"); + let client = Client::new(config, Some("sid".to_owned())).unwrap(); + let dir = tempfile::tempdir().unwrap(); + let store = test_store(dir.path()); + let session = session::handle::start(&store, "claude-opus-4-7[1m]"); + let file_tracker = Arc::new(FileTracker::default()); + let (sink, mut event_rx) = tui::event::channel(); + let (_user_tx, user_rx) = agent::event::inert_user_action_channel(); + let mut task = AgentLoopTask { + client, + tools: Arc::new(ToolRegistry::new(Vec::new())), + sink, + user_rx, + session, + messages: Vec::new(), + store, + file_tracker, + auto_compaction_failures: 3, + last_usage: Some(TokenUsage::new(100_000, 1)), + }; + + let control = task + .handle_action(UserAction::SwapConfig { + model: None, + effort: Some(Effort::Xhigh), + }) + .await; + + assert!(matches!(control, LoopControl::Continue)); + assert_eq!(task.auto_compaction_failures, 0); + assert_eq!(task.last_usage, Some(TokenUsage::new(100_000, 1))); + assert!(matches!( + event_rx.recv().await, + Some(AgentEvent::ConfigChanged { .. }) + )); + } +} diff --git a/crates/oxide-code/src/model.rs b/crates/oxide-code/src/model.rs index c9f5e274..51ee3d1b 100644 --- a/crates/oxide-code/src/model.rs +++ b/crates/oxide-code/src/model.rs @@ -16,6 +16,9 @@ pub(crate) struct ModelInfo { pub(crate) capabilities: Capabilities, } +const STANDARD_CONTEXT_WINDOW: u32 = 200_000; +const CONTEXT_1M_WINDOW: u32 = 1_000_000; + // ── Capabilities ── /// Per-model gate set consumed by the wire-builder (header + body fields), the slash commands @@ -208,6 +211,17 @@ pub(crate) fn capabilities_for(model: &str) -> Capabilities { .unwrap_or_default() } +/// Effective context window for known Claude models. `[1m]` opts into the 1M beta only on +/// models that advertise that capability; unknown raw ids stay disabled for auto-compaction. +pub(crate) fn context_window_for(model: &str) -> Option { + let info = lookup(model)?; + if model.ends_with("[1m]") && info.capabilities.context_1m { + Some(CONTEXT_1M_WINDOW) + } else { + Some(STANDARD_CONTEXT_WINDOW) + } +} + /// Human-facing label: the row's [`ModelInfo::display_name`] plus a ` (1M context)` suffix on /// `[1m]` ids; the raw id when the model is unknown. pub(crate) fn display_name(model: &str) -> Cow<'_, str> { @@ -498,6 +512,25 @@ mod tests { } } + // ── context_window_for ── + + #[test] + fn context_window_for_known_models_defaults_to_standard_window() { + assert_eq!(context_window_for("claude-opus-4-7"), Some(200_000)); + assert_eq!(context_window_for("claude-haiku-4-5"), Some(200_000)); + } + + #[test] + fn context_window_for_1m_suffix_requires_model_capability() { + assert_eq!(context_window_for("claude-opus-4-7[1m]"), Some(1_000_000)); + assert_eq!(context_window_for("claude-haiku-4-5[1m]"), Some(200_000)); + } + + #[test] + fn context_window_for_unknown_model_is_none() { + assert_eq!(context_window_for("claude-future-9"), None); + } + // ── display_name ── #[test] diff --git a/crates/oxide-code/src/slash.rs b/crates/oxide-code/src/slash.rs index 6913bc26..ed44821c 100644 --- a/crates/oxide-code/src/slash.rs +++ b/crates/oxide-code/src/slash.rs @@ -120,7 +120,9 @@ fn classify_in(commands: &[&dyn registry::SlashCommand], parsed: &Parsed) -> Sla /// Fully-populated `LiveSessionInfo` for per-command tests. #[cfg(test)] pub(crate) fn test_session_info() -> LiveSessionInfo { - use crate::config::{ConfigSnapshot, Effort, PromptCacheTtl}; + use crate::config::{ + AutoCompactionConfig, CompactionConfig, ConfigSnapshot, Effort, PromptCacheTtl, + }; // Real MODELS row so `display_name()` resolves to a known label. LiveSessionInfo { @@ -134,6 +136,10 @@ pub(crate) fn test_session_info() -> LiveSessionInfo { effort: Some(Effort::High), max_tokens: 32_000, prompt_cache_ttl: PromptCacheTtl::OneHour, + compaction: CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(155_000), + }), show_thinking: false, show_welcome: true, theme_name: "mocha".to_owned(), diff --git a/crates/oxide-code/src/slash/config.rs b/crates/oxide-code/src/slash/config.rs index 0f6aa3d7..48782f00 100644 --- a/crates/oxide-code/src/slash/config.rs +++ b/crates/oxide-code/src/slash/config.rs @@ -6,7 +6,7 @@ use std::path::Path; use super::context::{LiveSessionInfo, SlashContext}; use super::registry::{SlashCommand, SlashOutcome}; -use crate::config::{display_bool, display_effort, file}; +use crate::config::{display_auto_compaction, display_bool, display_effort, file}; use crate::tui::modal::kv_overview::{KvOverview, KvSection}; use crate::util::path::tildify; @@ -54,6 +54,10 @@ fn build_modal( "Prompt Cache TTL".to_owned(), cfg.prompt_cache_ttl.to_string(), ), + ( + "Auto Compaction".to_owned(), + display_auto_compaction(cfg.compaction.auto), + ), ( "Show Thinking".to_owned(), display_bool(cfg.show_thinking).to_owned(), @@ -95,6 +99,28 @@ mod tests { use crate::tui::modal::Modal; use crate::tui::theme::Theme; + fn render_modal(modal: &KvOverview, width: u16) -> String { + use ratatui::Terminal; + use ratatui::backend::TestBackend; + use ratatui::layout::Rect; + + let height = modal.height(width); + let theme = Theme::default(); + let mut terminal = Terminal::new(TestBackend::new(width, height)).unwrap(); + terminal + .draw(|frame| modal.render(frame, Rect::new(0, 0, width, height), &theme)) + .unwrap(); + let buffer = terminal.backend().buffer(); + let mut out = String::new(); + for y in 0..height { + for x in 0..width { + out.push_str(buffer[(x, y)].symbol()); + } + out.push('\n'); + } + out + } + // ── ConfigCmd metadata ── #[test] @@ -123,11 +149,21 @@ mod tests { #[test] fn build_modal_height_accounts_for_both_sections() { - // title + blank + (heading + blank + 8 rows) + blank + (heading + blank + 2 rows) - // + blank + footer = 2 + 10 + 1 + 4 + 2 = 19. + // title + blank + (heading + blank + 9 rows) + blank + (heading + blank + 2 rows) + // + blank + footer = 2 + 11 + 1 + 4 + 2 = 20. + let info = test_session_info(); + let m = build_modal(&info, None, None); + assert_eq!(m.height(80), 20); + } + + #[test] + fn build_modal_renders_resolved_auto_compaction() { let info = test_session_info(); let m = build_modal(&info, None, None); - assert_eq!(m.height(80), 19); + let rendered = render_modal(&m, 80); + + assert!(rendered.contains("Auto Compaction"), "{rendered}"); + assert!(rendered.contains("on at 155000 tokens"), "{rendered}"); } // ── display_path ── diff --git a/crates/oxide-code/src/slash/status.rs b/crates/oxide-code/src/slash/status.rs index 67d67ced..f76f25aa 100644 --- a/crates/oxide-code/src/slash/status.rs +++ b/crates/oxide-code/src/slash/status.rs @@ -3,7 +3,7 @@ use super::context::{LiveSessionInfo, SlashContext}; use super::registry::{SlashCommand, SlashOutcome}; -use crate::config::{display_bool, display_effort}; +use crate::config::{display_auto_compaction, display_bool, display_effort}; use crate::tui::modal::kv_overview::{KvOverview, KvSection}; pub(super) struct StatusCmd; @@ -37,9 +37,13 @@ fn build_modal(info: &LiveSessionInfo) -> KvOverview { ("Auth".to_owned(), info.config.auth_label.to_owned()), ("Version".to_owned(), info.version.to_owned()), ( - "Context Cache".to_owned(), + "Prompt Cache TTL".to_owned(), info.config.prompt_cache_ttl.to_string(), ), + ( + "Auto Compaction".to_owned(), + display_auto_compaction(info.config.compaction.auto), + ), ( "Show Thinking".to_owned(), display_bool(info.config.show_thinking).to_owned(), @@ -60,6 +64,28 @@ mod tests { use crate::tui::modal::Modal; use crate::tui::theme::Theme; + fn render_modal(modal: &KvOverview, width: u16) -> String { + use ratatui::Terminal; + use ratatui::backend::TestBackend; + use ratatui::layout::Rect; + + let height = modal.height(width); + let theme = Theme::default(); + let mut terminal = Terminal::new(TestBackend::new(width, height)).unwrap(); + terminal + .draw(|frame| modal.render(frame, Rect::new(0, 0, width, height), &theme)) + .unwrap(); + let buffer = terminal.backend().buffer(); + let mut out = String::new(); + for y in 0..height { + for x in 0..width { + out.push_str(buffer[(x, y)].symbol()); + } + out.push('\n'); + } + out + } + // ── StatusCmd metadata ── #[test] @@ -91,7 +117,17 @@ mod tests { fn build_modal_renders_one_row_per_session_descriptor() { let info = test_session_info(); let m = build_modal(&info); - // Title + blank + 9 rows + blank + footer = 13. - assert_eq!(m.height(80), 13); + // Title + blank + 10 rows + blank + footer = 14. + assert_eq!(m.height(80), 14); + } + + #[test] + fn build_modal_renders_resolved_auto_compaction() { + let info = test_session_info(); + let m = build_modal(&info); + let rendered = render_modal(&m, 80); + + assert!(rendered.contains("Auto Compaction"), "{rendered}"); + assert!(rendered.contains("on at 155000 tokens"), "{rendered}"); } } diff --git a/crates/oxide-code/src/tui/app.rs b/crates/oxide-code/src/tui/app.rs index dc336cc6..0da516e7 100644 --- a/crates/oxide-code/src/tui/app.rs +++ b/crates/oxide-code/src/tui/app.rs @@ -11,8 +11,9 @@ use std::time::{Duration, Instant}; use anyhow::Result; use crossterm::event::{Event, EventStream, KeyCode, KeyEvent}; use futures::{Stream, StreamExt}; -use ratatui::layout::{Constraint, Layout}; +use ratatui::layout::{Alignment, Constraint, Layout, Rect}; use ratatui::text::{Line, Span}; +use ratatui::widgets::Paragraph; use tokio::sync::mpsc; use super::components::chat::ChatView; @@ -25,11 +26,12 @@ use super::pending_calls::{PendingCall, PendingCalls, result_header}; use super::terminal::{Tui, draw_sync}; use super::theme::Theme; use crate::agent::event::{AgentEvent, UserAction}; +use crate::config::{CompactionConfig, Effort, display_auto_compaction}; use crate::message::Message; use crate::session::entry::CompactInfo; use crate::slash::{self, LiveSessionInfo, SlashContext, SlashKind}; use crate::tool::{ToolMetadata, ToolRegistry, ToolResultView}; -use crate::util::text::truncate_to_width; +use crate::util::text::{center_truncate_to_width, truncate_to_width}; /// Tick interval for animation frames and render coalescing (~60 FPS). const TICK_INTERVAL: Duration = Duration::from_millis(16); @@ -51,6 +53,8 @@ pub(crate) struct App { tools: Arc, /// Correlates `ToolCallStart` with its matching `ToolCallEnd`. pending_calls: PendingCalls, + /// Prompt already painted for the in-flight turn. Replayed if auto-compaction clears chat. + active_prompt: Option, /// FIFO of prompts submitted mid-turn. Drained at turn boundaries. pending_prompts: VecDeque, modals: ModalStack, @@ -100,6 +104,7 @@ impl App { user_tx, tools, pending_calls: PendingCalls::new(), + active_prompt: None, pending_prompts: VecDeque::new(), modals: ModalStack::new(), preview_theme_snapshot: None, @@ -362,6 +367,9 @@ impl App { } if let Some(action) = synthesized { if matches!(action, UserAction::SubmitPrompt(_)) { + if slash::echoes_input(&parsed) { + self.active_prompt = Some(text.to_owned()); + } self.input.set_enabled(false); self.status_bar.set_status(Status::Streaming); self.forward_to_agent(action); @@ -372,6 +380,7 @@ impl App { return false; } self.chat.push_user_message(text.to_owned()); + self.active_prompt = Some(text.to_owned()); self.input.set_enabled(false); self.status_bar.set_status(Status::Streaming); return true; @@ -454,8 +463,13 @@ impl App { } AgentEvent::Cancelled => { self.chat.push_interrupted_marker(); + self.active_prompt = None; self.finalize_idle(); } + AgentEvent::AutoCompactionStarted => { + self.set_active_status(Status::Compacting); + self.input.set_enabled(false); + } AgentEvent::SessionTitleUpdated { session_id, title } => { if session_id == self.session_info.session_id { self.status_bar.set_title(Some(title)); @@ -465,6 +479,7 @@ impl App { self.session_info.session_id = id; self.status_bar.set_title(None); self.chat.clear_history(); + self.active_prompt = None; } AgentEvent::SessionResumed { id, @@ -477,29 +492,19 @@ impl App { summary, pre_count, instructions, - } => self.apply_session_compacted(&summary, pre_count, instructions.as_deref()), + automatic, + } => self.apply_session_compacted( + &summary, + pre_count, + instructions.as_deref(), + automatic, + ), AgentEvent::ConfigChanged { model_id, effort, + compaction, requested_effort, - } => { - let model_changed = model_id != self.session_info.config.model_id; - let prev_effort = self.session_info.config.effort; - let confirmation = format_config_change( - &model_id, - model_changed, - prev_effort, - effort, - requested_effort, - ); - if model_changed { - self.status_bar - .set_model(crate::model::display_name(&model_id).into_owned()); - } - self.session_info.config.model_id = model_id; - self.session_info.config.effort = effort; - self.chat.push_system_message(confirmation); - } + } => self.apply_config_changed(model_id, effort, compaction, requested_effort), AgentEvent::Error(msg) => { self.chat.push_error(&msg); self.finish_turn(); @@ -510,6 +515,7 @@ impl App { fn finish_turn(&mut self) { self.chat.commit_streaming(); + self.active_prompt = None; self.finalize_idle(); } @@ -528,6 +534,7 @@ impl App { self.chat .load_history(messages, compact, tool_metadata, self.tools.as_ref()); self.pending_calls.clear(); + self.active_prompt = None; // Queued prompts belonged to the previous thread, so resume drops them. let dropped = self.pending_prompts.len(); self.pending_prompts.clear(); @@ -548,13 +555,49 @@ impl App { summary: &str, pre_count: u32, instructions: Option<&str>, + automatic: bool, ) { self.chat.clear_history(); self.pending_calls.clear(); self.chat .push_compacted_block(pre_count, instructions, summary.to_owned()); + if automatic && let Some(prompt) = &self.active_prompt { + self.chat.push_user_message(prompt.clone()); + } self.clear_modals(); - self.finalize_idle(); + if !automatic { + self.active_prompt = None; + self.finalize_idle(); + } + } + + fn apply_config_changed( + &mut self, + model_id: String, + effort: Option, + compaction: CompactionConfig, + requested_effort: Option, + ) { + let model_changed = model_id != self.session_info.config.model_id; + let prev_effort = self.session_info.config.effort; + let prev_compaction = self.session_info.config.compaction; + let confirmation = format_config_change( + &model_id, + model_changed, + prev_effort, + effort, + requested_effort, + prev_compaction, + compaction, + ); + if model_changed { + self.status_bar + .set_model(crate::model::display_name(&model_id).into_owned()); + } + self.session_info.config.model_id = model_id; + self.session_info.config.effort = effort; + self.session_info.config.compaction = compaction; + self.chat.push_system_message(confirmation); } /// Resets to idle, clears orphan calls, re-enables input, and drains queued prompts. @@ -643,6 +686,7 @@ impl App { welcome::paint(frame, chunks[1], &self.theme, &snap); } else { self.chat.render(frame, chunks[1]); + self.render_jump_overlay(frame, chunks[1]); } if preview_height > 0 { self.render_preview(frame, chunks[2]); @@ -689,6 +733,43 @@ impl App { area, ); } + + fn render_jump_overlay(&self, frame: &mut ratatui::Frame<'_>, area: Rect) { + if !self.chat.is_scrolled_up() || area.width < 25 || area.height == 0 { + return; + } + + let new_count = self.chat.new_content_since_pause(); + let label = jump_overlay_label(new_count, usize::from(area.width)); + let style = if new_count == 0 { + self.theme.dim() + } else { + self.theme.accent() + }; + let band = Rect { + y: area.y + area.height.saturating_sub(1), + height: 1, + ..area + }; + frame.render_widget( + Paragraph::new(Line::from(Span::styled(label, style))) + .style(self.theme.surface()) + .alignment(Alignment::Right), + band, + ); + } +} + +fn jump_overlay_label(new_count: u32, width: usize) -> String { + if width < 40 { + return "↓ (ctrl+End)".to_owned(); + } + let label = match new_count { + 0 => "Jump to bottom (ctrl+End) ↓".to_owned(), + 1 => "1 new message (ctrl+End) ↓".to_owned(), + n => format!("{n} new messages (ctrl+End) ↓"), + }; + center_truncate_to_width(&label, width.saturating_sub(2)) } /// Renders a queued prompt as a dim ghost, capped at `body_width` columns. @@ -711,9 +792,30 @@ fn format_config_change( prev_effort: Option, new_effort: Option, requested_effort: Option, + prev_compaction: CompactionConfig, + new_compaction: CompactionConfig, ) -> String { - if !model_changed { - return match (requested_effort, new_effort) { + let message = if model_changed { + let head = format!( + "Switched to {} ({model_id})", + crate::model::display_name(model_id) + ); + match (requested_effort, prev_effort, new_effort) { + (Some(req), _, Some(eff)) if req == eff => format!("{head} · effort {eff}."), + (Some(req), _, Some(eff)) => format!("{head} · effort {eff} (clamped from {req})."), + (Some(req), _, None) => { + format!("{head}. Effort unchanged — model has no effort tier (asked for {req}).") + } + (None, None, None) => format!("{head}."), + (None, Some(_), None) => format!("{head}. Effort cleared (model has no effort tier)."), + (None, None, Some(eff)) => format!("{head} · effort {eff} (model default)."), + (None, Some(prev), Some(new)) if new < prev => { + format!("{head} · effort {new} (clamped from {prev}).") + } + (None, Some(_), Some(eff)) => format!("{head} · effort {eff}."), + } + } else { + match (requested_effort, new_effort) { (Some(req), Some(eff)) if req == eff => format!("Effort set to {eff}."), (Some(req), Some(eff)) => format!("Effort set to {eff} (clamped from {req})."), (Some(req), None) => { @@ -721,26 +823,25 @@ fn format_config_change( } // Slash dispatch keeps this unreachable, but a clear fallback beats a panic. (None, _) => "Config unchanged.".to_owned(), - }; - } - let head = format!( - "Switched to {} ({model_id})", - crate::model::display_name(model_id) - ); - match (requested_effort, prev_effort, new_effort) { - (Some(req), _, Some(eff)) if req == eff => format!("{head} · effort {eff}."), - (Some(req), _, Some(eff)) => format!("{head} · effort {eff} (clamped from {req})."), - (Some(req), _, None) => { - format!("{head}. Effort unchanged — model has no effort tier (asked for {req}).") } - (None, None, None) => format!("{head}."), - (None, Some(_), None) => format!("{head}. Effort cleared (model has no effort tier)."), - (None, None, Some(eff)) => format!("{head} · effort {eff} (model default)."), - (None, Some(prev), Some(new)) if new < prev => { - format!("{head} · effort {new} (clamped from {prev}).") - } - (None, Some(_), Some(eff)) => format!("{head} · effort {eff}."), + }; + if model_changed && prev_compaction.auto != new_compaction.auto { + return append_sentence( + message, + &format!( + "Auto compaction {}", + display_auto_compaction(new_compaction.auto) + ), + ); } + message +} + +fn append_sentence(mut message: String, sentence: &str) -> String { + if message.ends_with('.') { + message.pop(); + } + format!("{message}. {sentence}.") } #[cfg(test)] @@ -807,7 +908,9 @@ mod tests { fn test_session_info() -> LiveSessionInfo { // `test-model` is intentionally unknown so `display_name` falls back to the literal // id, keeping insta snapshots stable. - use crate::config::{ConfigSnapshot, Effort, PromptCacheTtl}; + use crate::config::{ + AutoCompactionConfig, CompactionConfig, ConfigSnapshot, Effort, PromptCacheTtl, + }; LiveSessionInfo { cwd: "~/test".to_owned(), @@ -820,6 +923,10 @@ mod tests { effort: Some(Effort::High), max_tokens: 32_000, prompt_cache_ttl: PromptCacheTtl::OneHour, + compaction: CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(155_000), + }), show_thinking: false, show_welcome: true, theme_name: "mocha".to_owned(), @@ -827,6 +934,13 @@ mod tests { } } + fn base_compaction() -> CompactionConfig { + CompactionConfig::resolved_for_test(crate::config::AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(155_000), + }) + } + /// Minimal modal for layout tests: paints `title` on its only row, ignores keys. struct FakeModal { title: String, @@ -1928,6 +2042,12 @@ mod tests { app.handle_agent_event(AgentEvent::ConfigChanged { model_id: "claude-sonnet-4-6".to_owned(), effort: Some(crate::config::Effort::High), + compaction: crate::config::CompactionConfig::resolved_for_test( + crate::config::AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(167_000), + }, + ), requested_effort: None, }); @@ -1937,10 +2057,14 @@ mod tests { Some(crate::config::Effort::High), ); assert_eq!(app.status_bar.model(), "Claude Sonnet 4.6"); + assert_eq!( + app.session_info.config.compaction.auto.threshold_tokens, + Some(167_000), + ); let body = app.chat.last_system_text().expect("confirmation block"); assert_eq!( body, - "Switched to Claude Sonnet 4.6 (claude-sonnet-4-6) · effort high.", + "Switched to Claude Sonnet 4.6 (claude-sonnet-4-6) · effort high. Auto compaction on at 167000 tokens.", ); assert!(app.dirty); } @@ -1954,6 +2078,7 @@ mod tests { app.handle_agent_event(AgentEvent::ConfigChanged { model_id: app.session_info.config.model_id.clone(), effort: Some(crate::config::Effort::Xhigh), + compaction: app.session_info.config.compaction, requested_effort: Some(crate::config::Effort::Xhigh), }); assert_eq!( @@ -1972,7 +2097,15 @@ mod tests { fn format_config_change_swap_both_none_omits_effort_clause() { // Pin: no `effort` substring at all, never a stray "none" // word. Mutation that prints `effort none.` would surface here. - let s = format_config_change("claude-haiku-4-5", true, None, None, None); + let s = format_config_change( + "claude-haiku-4-5", + true, + None, + None, + None, + base_compaction(), + base_compaction(), + ); assert_eq!(s, "Switched to Claude Haiku 4.5 (claude-haiku-4-5)."); } @@ -1986,6 +2119,8 @@ mod tests { Some(crate::config::Effort::Xhigh), None, None, + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2004,6 +2139,8 @@ mod tests { None, Some(crate::config::Effort::Xhigh), None, + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2020,6 +2157,8 @@ mod tests { Some(crate::config::Effort::Xhigh), Some(crate::config::Effort::High), None, + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2037,6 +2176,8 @@ mod tests { Some(crate::config::Effort::High), Some(crate::config::Effort::High), None, + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2054,6 +2195,8 @@ mod tests { Some(crate::config::Effort::Medium), Some(crate::config::Effort::High), Some(crate::config::Effort::Xhigh), + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2069,6 +2212,8 @@ mod tests { Some(crate::config::Effort::High), Some(crate::config::Effort::Xhigh), Some(crate::config::Effort::Xhigh), + base_compaction(), + base_compaction(), ); assert_eq!(s, "Effort set to xhigh."); } @@ -2081,6 +2226,8 @@ mod tests { Some(crate::config::Effort::Medium), Some(crate::config::Effort::High), Some(crate::config::Effort::Xhigh), + base_compaction(), + base_compaction(), ); assert_eq!(s, "Effort set to high (clamped from xhigh)."); } @@ -2095,6 +2242,8 @@ mod tests { None, None, Some(crate::config::Effort::High), + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2102,6 +2251,30 @@ mod tests { ); } + #[test] + fn format_config_change_model_swap_mentions_compaction_threshold_change() { + let new_compaction = + CompactionConfig::resolved_for_test(crate::config::AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(167_000), + }); + + let s = format_config_change( + "claude-sonnet-4-6", + true, + Some(crate::config::Effort::High), + Some(crate::config::Effort::High), + None, + base_compaction(), + new_compaction, + ); + + assert_eq!( + s, + "Switched to Claude Sonnet 4.6 (claude-sonnet-4-6) · effort high. Auto compaction on at 167000 tokens." + ); + } + #[test] fn handle_session_rolled_clears_chat_rebinds_id_and_drops_stale_title() { let (mut app, _rx, _agent_tx) = test_app(Some("Old session title")); @@ -2254,6 +2427,7 @@ mod tests { summary: "## Recap\n\nDid the thing.".to_owned(), pre_count: 4, instructions: Some("focus on auth".to_owned()), + automatic: false, }); assert_eq!( @@ -2283,6 +2457,7 @@ mod tests { summary: "s".to_owned(), pre_count: 2, instructions: None, + automatic: false, }); let forwarded = rx.recv().await.expect("drained prompt reaches the agent"); @@ -2301,10 +2476,74 @@ mod tests { summary: "summary only".to_owned(), pre_count: 2, instructions: None, + automatic: false, }); assert_eq!(app.chat.entry_count(), 1, "exactly one boundary block"); } + #[tokio::test] + async fn handle_session_compacted_automatic_keeps_busy_state_and_queued_prompts() { + let (mut app, mut rx, _agent_tx) = test_app(None); + app.input.set_enabled(false); + app.status_bar.set_status(Status::Compacting); + app.pending_prompts + .push_back("queued while busy".to_owned()); + + app.handle_agent_event(AgentEvent::SessionCompacted { + summary: "auto summary".to_owned(), + pre_count: 4, + instructions: None, + automatic: true, + }); + + assert_eq!(app.chat.entry_count(), 1); + assert_eq!(app.status_bar.status(), &Status::Compacting); + assert!(!app.input.is_enabled()); + assert_eq!(app.pending_prompts.len(), 1); + assert!( + rx.try_recv().is_err(), + "automatic compact must not drain early" + ); + } + + #[test] + fn handle_auto_compaction_started_sets_compacting_status() { + let (mut app, _rx, _agent_tx) = test_app(None); + app.dispatch_user_action(UserAction::SubmitPrompt("active question".to_owned())); + + app.handle_agent_event(AgentEvent::AutoCompactionStarted); + + assert_eq!(app.status_bar.status(), &Status::Compacting); + assert!(!app.input.is_enabled()); + } + + #[tokio::test] + async fn handle_session_compacted_automatic_replays_active_prompt_after_summary() { + let (mut app, mut rx, _agent_tx) = test_app(None); + + app.dispatch_user_action(UserAction::SubmitPrompt("active question".to_owned())); + let forwarded = rx.recv().await.expect("prompt reaches the agent"); + assert_eq!( + forwarded, + UserAction::SubmitPrompt("active question".to_owned()) + ); + app.handle_agent_event(AgentEvent::AutoCompactionStarted); + + app.handle_agent_event(AgentEvent::SessionCompacted { + summary: "auto summary".to_owned(), + pre_count: 4, + instructions: None, + automatic: true, + }); + + assert_eq!(app.chat.entry_count(), 2); + assert_eq!(app.status_bar.status(), &Status::Compacting); + assert!(!app.input.is_enabled()); + let text = rendered_text(&mut app, 80, 10); + assert!(text.contains("auto summary")); + assert!(text.contains("active question")); + } + #[test] fn handle_stream_token_switches_to_streaming_and_disables_input() { let (mut app, _rx, _agent_tx) = test_app(None); @@ -2795,6 +3034,30 @@ mod tests { .join("\n") } + fn long_chat_block() -> String { + use std::fmt::Write as _; + + let mut body = String::new(); + for i in 0..30 { + _ = writeln!(body, "line {i:02} of a long chat block"); + } + body + } + + // ── jump_overlay_label ── + + #[test] + fn jump_overlay_label_renders_idle_and_new_content_variants() { + assert_eq!(jump_overlay_label(0, 60), "Jump to bottom (ctrl+End) ↓"); + assert_eq!(jump_overlay_label(1, 60), "1 new message (ctrl+End) ↓"); + assert_eq!(jump_overlay_label(3, 60), "3 new messages (ctrl+End) ↓"); + } + + #[test] + fn jump_overlay_label_uses_short_form_below_full_width() { + assert_eq!(jump_overlay_label(3, 30), "↓ (ctrl+End)"); + } + #[test] fn draw_frame_lays_out_status_chat_and_input_in_order() { let (mut app, _rx, _agent_tx) = test_app(Some("Session title")); @@ -2824,6 +3087,42 @@ mod tests { insta::assert_snapshot!(render_app(&mut app, 60, 8)); } + #[test] + fn draw_frame_auto_scroll_on_hides_jump_overlay() { + let (mut app, _rx, _agent_tx) = test_app(None); + app.chat.push_system_message(long_chat_block()); + + let text = rendered_text(&mut app, 60, 10); + assert!(!text.contains("Jump to bottom"), "{text}"); + } + + #[test] + fn draw_frame_scrolled_up_shows_jump_overlay() { + let (mut app, _rx, _agent_tx) = test_app(None); + app.chat.push_system_message(long_chat_block()); + _ = render_app(&mut app, 60, 10); + + app.chat + .handle_event(&key_event(KeyCode::PageUp, KeyModifiers::NONE)); + let text = rendered_text(&mut app, 60, 10); + + assert!(text.contains("Jump to bottom"), "{text}"); + } + + #[test] + fn draw_frame_scrolled_up_counts_new_content() { + let (mut app, _rx, _agent_tx) = test_app(None); + app.chat.push_system_message(long_chat_block()); + _ = render_app(&mut app, 60, 10); + app.chat + .handle_event(&key_event(KeyCode::PageUp, KeyModifiers::NONE)); + + app.chat.push_error("background update"); + let text = rendered_text(&mut app, 60, 10); + + assert!(text.contains("1 new message"), "{text}"); + } + #[test] fn draw_frame_narrow_width_still_renders_all_three_panels() { let (mut app, _rx, _agent_tx) = test_app(Some("narrow")); diff --git a/crates/oxide-code/src/tui/components/chat.rs b/crates/oxide-code/src/tui/components/chat.rs index e3a421f4..0addc3b6 100644 --- a/crates/oxide-code/src/tui/components/chat.rs +++ b/crates/oxide-code/src/tui/components/chat.rs @@ -40,6 +40,7 @@ pub(crate) struct ChatView { viewport_height: u16, viewport_width: u16, auto_scroll: bool, + new_content_since_pause: u32, } impl ChatView { @@ -55,6 +56,7 @@ impl ChatView { viewport_height: 0, viewport_width: 0, auto_scroll: true, + new_content_since_pause: 0, } } @@ -150,6 +152,7 @@ impl ChatView { self.commit_streaming(); self.blocks.push(Box::new(UserMessage::new(text))); self.auto_scroll = true; + self.new_content_since_pause = 0; } /// Appends a streamed token to the current assistant response. @@ -158,6 +161,9 @@ impl ChatView { /// block split. Auto-scroll only follows the tail when the user hasn't manually scrolled up. pub(crate) fn append_stream_token(&mut self, token: &str) { self.commit_thinking_buffer(); + if self.streaming.is_none() { + self.bump_paused_counter(); + } self.streaming .get_or_insert_with(StreamingAssistant::new) .append(token); @@ -169,6 +175,9 @@ impl ChatView { /// Appends a thinking token to the live thinking display buffer. pub(crate) fn append_thinking_token(&mut self, token: &str) { + if self.thinking_buffer.is_empty() { + self.bump_paused_counter(); + } self.thinking_buffer.push_str(token); if self.auto_scroll { self.scroll_to_bottom(); @@ -200,6 +209,7 @@ impl ChatView { /// Appends a tool call, flushing any in-flight streaming buffer. pub(crate) fn push_tool_call(&mut self, icon: &'static str, label: &str) { self.commit_streaming(); + self.bump_paused_counter(); self.blocks.push(Box::new(ToolCallBlock::new(icon, label))); } @@ -210,6 +220,7 @@ impl ChatView { view: ToolResultView, is_error: bool, ) { + self.bump_paused_counter(); self.blocks .push(Box::new(ToolResultBlock::new(label, view, is_error))); } @@ -225,11 +236,13 @@ impl ChatView { /// Appends an error message. pub(crate) fn push_error(&mut self, msg: &str) { + self.bump_paused_counter(); self.blocks.push(Box::new(ErrorBlock::new(msg))); } /// Appends informational output from a slash command. pub(crate) fn push_system_message(&mut self, body: impl Into) { + self.bump_paused_counter(); self.blocks.push(Box::new(SystemMessageBlock::new(body))); } @@ -240,6 +253,7 @@ impl ChatView { instructions: Option<&str>, summary: impl Into, ) { + self.bump_paused_counter(); self.blocks.push(Box::new(CompactedBlock::new( pre_count, instructions, @@ -249,12 +263,14 @@ impl ChatView { /// Appends a unified diff body for display. pub(crate) fn push_git_diff(&mut self, text: impl Into) { + self.bump_paused_counter(); self.blocks.push(Box::new(GitDiffBlock::new(text))); } /// Appends an interrupted marker. Flushes any in-flight streaming buffer first. pub(crate) fn push_interrupted_marker(&mut self) { self.commit_streaming(); + self.bump_paused_counter(); self.blocks.push(Box::new(InterruptedMarker)); } @@ -266,6 +282,15 @@ impl ChatView { self.scroll_offset = 0; self.content_height.set(0); self.auto_scroll = true; + self.new_content_since_pause = 0; + } + + pub(crate) const fn is_scrolled_up(&self) -> bool { + !self.auto_scroll + } + + pub(crate) const fn new_content_since_pause(&self) -> u32 { + self.new_content_since_pause } /// Number of committed chat blocks. @@ -374,6 +399,7 @@ impl ChatView { .content_height .get() .saturating_sub(self.viewport_height); + self.new_content_since_pause = 0; } fn scroll_up(&mut self, lines: u16) { @@ -389,6 +415,13 @@ impl ChatView { self.scroll_offset = self.scroll_offset.saturating_add(lines).min(max); if self.scroll_offset >= max { self.auto_scroll = true; + self.new_content_since_pause = 0; + } + } + + fn bump_paused_counter(&mut self) { + if !self.auto_scroll { + self.new_content_since_pause = self.new_content_since_pause.saturating_add(1); } } @@ -2142,6 +2175,42 @@ mod tests { assert_eq!(chat.viewport_height, 20); } + #[test] + fn paused_counter_bumps_only_while_scrolled_up() { + let mut chat = test_chat(); + chat.push_system_message("one"); + assert_eq!(chat.new_content_since_pause(), 0); + + chat.auto_scroll = false; + chat.push_system_message("two"); + chat.push_error("three"); + assert_eq!(chat.new_content_since_pause(), 2); + } + + #[test] + fn paused_counter_streaming_bumps_once_per_block() { + let mut chat = test_chat(); + chat.auto_scroll = false; + + chat.append_stream_token("a"); + chat.append_stream_token("b"); + assert_eq!(chat.new_content_since_pause(), 1); + + chat.commit_streaming(); + chat.append_stream_token("c"); + assert_eq!(chat.new_content_since_pause(), 2); + } + + #[test] + fn paused_counter_saturates() { + let mut chat = test_chat(); + chat.auto_scroll = false; + chat.new_content_since_pause = u32::MAX; + + chat.push_error("overflow"); + assert_eq!(chat.new_content_since_pause(), u32::MAX); + } + #[test] fn update_layout_invalidates_streaming_cache_on_width_change() { let mut chat = test_chat(); @@ -2249,10 +2318,12 @@ mod tests { chat.viewport_height = 20; chat.scroll_offset = 10; chat.auto_scroll = false; + chat.new_content_since_pause = 3; chat.handle_event(&ctrl_key_event(KeyCode::End)); assert_eq!(chat.scroll_offset, 80); assert!(chat.auto_scroll); + assert_eq!(chat.new_content_since_pause(), 0); } #[test] @@ -2507,10 +2578,12 @@ mod tests { chat.content_height.set(100); chat.viewport_height = 20; chat.scroll_offset = 75; + chat.new_content_since_pause = 3; chat.scroll_down(10); assert_eq!(chat.scroll_offset, 80); assert!(chat.auto_scroll); + assert_eq!(chat.new_content_since_pause(), 0); } // ── build_text ── diff --git a/crates/oxide-code/src/tui/components/welcome.rs b/crates/oxide-code/src/tui/components/welcome.rs index a3fa0567..08226488 100644 --- a/crates/oxide-code/src/tui/components/welcome.rs +++ b/crates/oxide-code/src/tui/components/welcome.rs @@ -326,7 +326,9 @@ mod tests { use ratatui::backend::TestBackend; use super::*; - use crate::config::{ConfigSnapshot, Effort, PromptCacheTtl}; + use crate::config::{ + AutoCompactionConfig, CompactionConfig, ConfigSnapshot, Effort, PromptCacheTtl, + }; use crate::slash::LiveSessionInfo; const TEST_SEED: u64 = 0x00C0_FFEE; @@ -343,6 +345,10 @@ mod tests { effort: Some(Effort::Xhigh), max_tokens: 64_000, prompt_cache_ttl: PromptCacheTtl::OneHour, + compaction: CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(967_000), + }), show_thinking: false, show_welcome: true, theme_name: "mocha".to_owned(), diff --git a/docs/design/README.md b/docs/design/README.md index 1bf8c36a..90e9db0e 100644 --- a/docs/design/README.md +++ b/docs/design/README.md @@ -4,6 +4,12 @@ Architecture decisions and implementation specs for oxide-code. Organized by topic. Each subdirectory mirrors the corresponding directory in [`docs/research/`](../research/), where the underlying research lives. +## Agent Loop + +| Document | Description | +| ------------------------------------------- | ----------------------------------------------------- | +| [Auto-Compaction](agent/auto-compaction.md) | Automatic compaction thresholds, triggers, fail-safes | + ## Session | Document | Description | diff --git a/docs/design/agent/auto-compaction.md b/docs/design/agent/auto-compaction.md new file mode 100644 index 00000000..1fdba10d --- /dev/null +++ b/docs/design/agent/auto-compaction.md @@ -0,0 +1,106 @@ +# Auto-Compaction + +Automatic context compression builds on manual `/compact`: when the latest observed token usage approaches the active model's context window, oxide-code summarizes the current transcript, persists the normal compact boundary, resets the file tracker, and continues from the synthetic summary. + +Companion docs: [research/agent/auto-compaction.md](../../research/agent/auto-compaction.md), [slash/compact.md](../slash/compact.md), [session/persistence.md](../session/persistence.md). + +## Scope + +Auto-compaction is **default on** and can be disabled independently from manual `/compact`. The trigger runs before recording a new user prompt when the previous completed turn left usage over threshold. Tool results are compacted only after the assistant has consumed them and returned a final response. + +It does not interrupt an in-flight stream or tool call. If another prompt arrives while summarization is running, the prompt remains queued during compaction and drains afterward through the existing prompt-queue path. + +## Token Signal + +The agent loop records the maximum observed token usage from each stream: + +- `message_start.message.usage.input_tokens + output_tokens`; +- `message_delta.usage.input_tokens + output_tokens`. + +Anthropic's delta usage often carries only output tokens, so stream processing keeps the latest non-zero input and output values separately and computes `total = input + output`. Treat this value only as the auto-compaction trigger signal; it is unsuitable for billing telemetry. Missing usage means "do not auto-compact". + +## Threshold + +Each model has a known context window in `model.rs`: + +- normal Claude context: `200_000`; +- `[1m]` models with the 1M beta: `1_000_000`; +- unknown models: no window, so auto-compaction is disabled. + +The threshold is: + +```text +effective_window = context_window - min(max_tokens, 20_000) +threshold = effective_window - 13_000 +``` + +The 20k summary reserve mirrors Claude Code's p99 summary-output headroom and keeps a compact request from firing at the hard limit. The 13k buffer leaves room for the next prompt, dynamic instructions, and small tool-schema drift. If the subtraction would underflow, auto-compaction stays disabled. + +## Configuration + +Config surface: + +```toml +[client.compaction] +auto_enabled = true +auto_threshold_tokens = 400000 +# or: +auto_threshold_percent = 40 +``` + +Environment: + +| Variable | Effect | +| ---------------------------------------- | ------------------------------------------- | +| `OX_COMPACTION_AUTO_ENABLED` | Overrides `client.compaction.auto_enabled` | +| `OX_COMPACTION_AUTO_THRESHOLD_TOKENS` | Absolute automatic trigger threshold | +| `OX_COMPACTION_AUTO_THRESHOLD_PERCENT` | Percent of context, capped by safe trigger | + +Manual `/compact` remains available. The config controls only whether automatic compaction triggers and where that trigger fires. Token and percent thresholds are mutually exclusive so the resolved trigger stays obvious. + +Explicit token thresholds must be at least `50_000` tokens and, for models with known context windows, no higher than the model-derived safe trigger. Percent thresholds must be 1-100, are capped by the same safe trigger after they resolve to tokens, and must still resolve to at least `50_000` tokens. Lower values create frequent summarization loops, extra latency, and avoidable summary loss long before context pressure exists. + +## Trigger Flow + +The main loop owns the automatic trigger because it can compact before a new prompt is recorded. The agent turn reports the latest usage signal after each completed turn. + +1. `agent_turn` streams a complete turn, persists the transcript tail, and returns the latest token usage from `StreamOutcome`. +2. The main loop stores that usage as the pending automatic trigger signal. +3. When the next `SubmitPrompt` arrives, `auto_compact_before_prompt` checks the stored usage before recording the prompt. +4. If the total crosses the threshold, it calls the same compact driver used by `/compact`. +5. The agent loop emits `AutoCompactionStarted` so the TUI can show compaction status while the summarizer runs. +6. On success, `compact_boundary` persists the compact boundary, clears the file tracker, replaces `messages` with the synthetic post-compact message, and emits `SessionCompacted`. +7. On failure, the loop increments the auto-compaction failure counter and records the new prompt against the unchanged transcript. + +The failure counter is per agent-loop task. Three consecutive automatic failures disable further automatic attempts for the current session. Manual `/compact` does not consult this counter and resets it on success. + +## User Experience + +Manual and automatic compaction use the same visible `CompactedBlock`. Automatic compaction does not need a separate chat error on failure; repeated automatic failure is a background recovery problem, and the user's next regular request should proceed. The error still lands in logs. + +During TUI auto-compaction, the status bar uses the existing `Compacting` state. Automatic `SessionCompacted` events keep the TUI busy until the queued prompt drains or the prompt submission finishes. In bare REPL / headless mode, `StdioSink` already renders `SessionCompacted` as a stderr boundary line. + +## Design Decisions + +1. **Default-on.** Running out of context is worse than a well-marked summary boundary. A separate opt-out preserves user control. + +2. **Response usage over preflight counting.** The stream already carries usage. A count-tokens request would add latency and still be approximate once dynamic system context and tool definitions are included. + +3. **Boundary-only compaction.** The first version compacts after a coherent transcript unit is persisted and before the next prompt starts. This avoids partial tool loops and makes session replay identical to manual `/compact`. + +4. **Same summarizer as `/compact`.** No separate compaction model knob yet. The current `Client::stream_message` path already handles auth, model, effort, betas, prompt caching, and first-party gateway constraints. + +5. **Same persistence boundary as `/compact`.** Auto-compaction should not create a second session format. `Entry::Compact` can later gain a trigger field if the UI needs to distinguish manual from automatic in history. + +6. **Failure circuit breaker.** A too-large or malformed compact request can be unrecoverable. After 3 consecutive automatic failures, the loop stops trying until the session changes through manual compaction, `/clear`, or `/resume`. + +7. **No automatic continue prompt.** If the user queued input, it drains after compaction. Otherwise the assistant waits. Synthetic "continue" prompts make the agent act without fresh user intent. + +## Deferred + +- Mid-turn compaction while a model response still needs tool follow-up. +- Microcompact / prune for old tool-result bodies. +- Anchored re-compaction that updates a previous summary in place. +- Separate compaction model. +- Token / cost status-bar redesign. +- Hook integration. diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index 9a32e890..8229135b 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -21,6 +21,9 @@ effort = "high" max_tokens = 32000 prompt_cache_ttl = "1h" +[client.compaction] +auto_threshold_tokens = 400000 + [tui] show_thinking = true ``` @@ -69,6 +72,20 @@ Use `base_url` only in `~/.config/ox/config.toml` or `ANTHROPIC_BASE_URL`. Proje Accepted values: `"5m"` (matches the server default as of 2026-03-06) and `"1h"` (higher write premium, bigger hit-rate win on long sessions). oxide-code defaults to `"1h"` because Anthropic's silent 2026-03 TTL drop cut typical prompt-caching savings from 80 %+ to 40-55 %. See [Agentic Request Body Fields](../research/api/anthropic.md#agentic-request-body-fields) for the wire shape and cost analysis. +### `[client.compaction]`: context compression + +Auto-compaction is enabled by default for known model context windows. The default trigger leaves room for the next response and a safety buffer. Set one threshold override when you want compaction to happen earlier: + +| Key | Type | Default | Description | +| ------------------------ | ------- | ------------------- | ------------------------------------------ | +| `auto_enabled` | boolean | `true` | Enable automatic context compaction | +| `auto_threshold_tokens` | integer | model-derived | Absolute trigger, `50000` token minimum | +| `auto_threshold_percent` | integer | model-derived | Percent of context, capped by safe trigger | + +`auto_threshold_tokens` and `auto_threshold_percent` are mutually exclusive. Absolute thresholds must be at least `50000` tokens. For models with known context windows, absolute thresholds must also stay within the model-derived safe trigger. Percent thresholds must be 1-100, are capped by that safe trigger after they resolve to tokens, and must still resolve to at least `50000` tokens. + +For models without known context windows, the default and percent-based automatic triggers stay off. An explicit token threshold still works after floor validation. + #### 1M Context Window: `[1m]` Tag Append `[1m]` to `model` to opt into the 1M-token context window on models that support it (any Sonnet 4.x, plus Opus 4.6 and newer): @@ -127,16 +144,19 @@ Prefer the environment variable (or OAuth) over `api_key` in a config file. `ox. Environment variables override all config file values. -| Variable | Config key | Default | Description | -| ---------------------- | ------------------------- | --------------------------- | ---------------------------- | -| `ANTHROPIC_API_KEY` | `client.api_key` | - | Anthropic API key | -| `ANTHROPIC_BASE_URL` | `client.base_url` | `https://api.anthropic.com` | API base URL | -| `ANTHROPIC_MODEL` | `client.model` | `claude-opus-4-7[1m]` | Model to use | -| `ANTHROPIC_EFFORT` | `client.effort` | per-model | Intelligence-vs-latency tier | -| `ANTHROPIC_MAX_TOKENS` | `client.max_tokens` | effort-derived | Max tokens per response | -| `OX_PROMPT_CACHE_TTL` | `client.prompt_cache_ttl` | `1h` | Prompt-cache TTL | -| `OX_SHOW_THINKING` | `tui.show_thinking` | `false` | Show extended thinking | -| `OX_SHOW_WELCOME` | `tui.show_welcome` | `true` | Paint the welcome splash | +| Variable | Config key | Default | Description | +| --------------------------------------- | ------------------------------------------ | --------------------------- | ---------------------------- | +| `ANTHROPIC_API_KEY` | `client.api_key` | - | Anthropic API key | +| `ANTHROPIC_BASE_URL` | `client.base_url` | `https://api.anthropic.com` | API base URL | +| `ANTHROPIC_MODEL` | `client.model` | `claude-opus-4-7[1m]` | Model to use | +| `ANTHROPIC_EFFORT` | `client.effort` | per-model | Intelligence-vs-latency tier | +| `ANTHROPIC_MAX_TOKENS` | `client.max_tokens` | effort-derived | Max tokens per response | +| `OX_PROMPT_CACHE_TTL` | `client.prompt_cache_ttl` | `1h` | Prompt-cache TTL | +| `OX_COMPACTION_AUTO_ENABLED` | `client.compaction.auto_enabled` | `true` | Enable auto-compaction | +| `OX_COMPACTION_AUTO_THRESHOLD_TOKENS` | `client.compaction.auto_threshold_tokens` | model-derived | Absolute compaction trigger | +| `OX_COMPACTION_AUTO_THRESHOLD_PERCENT` | `client.compaction.auto_threshold_percent` | model-derived | Percent compaction trigger | +| `OX_SHOW_THINKING` | `tui.show_thinking` | `false` | Show extended thinking | +| `OX_SHOW_WELCOME` | `tui.show_welcome` | `true` | Paint the welcome splash | Set `OX_SHOW_THINKING=1` to display the model's thinking process (dimmed text) when extended thinking is enabled for the model. diff --git a/docs/research/README.md b/docs/research/README.md index 7f859f70..2187786e 100644 --- a/docs/research/README.md +++ b/docs/research/README.md @@ -12,6 +12,12 @@ Organized by topic. Each subdirectory mirrors the corresponding directory in [`d | [Extended Thinking](api/extended-thinking.md) | Content block types, signatures, round-tripping | | [System Prompt](api/system-prompt.md) | Section assembly, CLAUDE.md, caching, block layout | +## Agent Loop + +| Document | Description | +| ------------------------------------------- | ----------------------------------------------------- | +| [Auto-Compaction](agent/auto-compaction.md) | Automatic compaction thresholds, triggers, fail-safes | + ## Session | Document | Description | diff --git a/docs/research/agent/auto-compaction.md b/docs/research/agent/auto-compaction.md new file mode 100644 index 00000000..47d4b6c4 --- /dev/null +++ b/docs/research/agent/auto-compaction.md @@ -0,0 +1,106 @@ +# Auto-Compaction (Reference) + +Research on automatic context compaction across Claude Code, OpenAI Codex, and opencode. Companion to [slash/compact.md](../slash/compact.md), which covers manual compaction and replacement strategy. + +## Claude Code + +Claude Code runs automatic compaction proactively before the model call. The query loop applies snip / microcompact / context-collapse transforms first, then calls `autoCompactIfNeeded` with the transformed messages. A successful compact replaces the message set for the rest of the same turn. + +Threshold math is token-buffer based: + +- `effectiveWindow = contextWindow - min(modelMaxOutputTokens, 20_000)`. +- `autoCompactThreshold = effectiveWindow - 13_000`. +- Warning and error indicators use `threshold - 20_000`. +- Manual blocking limit uses `effectiveWindow - 3_000`. + +The token signal is `tokenCountWithEstimation(messages)`, which uses the last API usage plus estimates for unsampled tail content. Auto-compaction defaults on, can be disabled by global `autoCompactEnabled`, and is also gated by `DISABLE_COMPACT` / `DISABLE_AUTO_COMPACT`. `DISABLE_COMPACT` disables manual and automatic compaction; `DISABLE_AUTO_COMPACT` leaves `/compact` available. + +Failures are deliberately quiet. Auto-compaction first tries session-memory compaction, falls back to the full summarizer, and stops retrying after 3 consecutive failures. The circuit breaker is important because an over-limit session can otherwise retry a doomed compact request every turn. + +Claude Code also has pre-stages that oxide-code should not copy yet: + +- **Microcompact** clears old tool-result bodies before a full summary pass. +- **Session-memory compaction** prunes memory-specific slices. +- **Context-collapse** can own the headroom problem in feature-gated builds, so proactive auto-compact is suppressed when it is active. + +User-facing behavior is minimal: token warnings mention "until auto-compact" when enabled, and a compact boundary renders after success. Automatic failures are logged rather than surfaced in chat. + +Key files: + +- `claude-code/src/services/compact/autoCompact.ts`: threshold math, opt-out flags, circuit breaker. +- `claude-code/src/query.ts`: pre-query placement. +- `claude-code/src/components/Settings/Config.tsx`: `autoCompactEnabled` setting. +- `claude-code/src/utils/context.ts`: context-window detection. + +## OpenAI Codex + +Codex drives auto-compaction from model metadata. `ModelInfo::auto_compact_token_limit()` defaults to 90% of the resolved context window, or to a configured limit clamped to that 90% ceiling. If no context window or explicit limit is known, the runtime uses `i64::MAX`, effectively disabling auto-compact. + +Triggers: + +- **Pre-turn**: before recording the new user input, if current total usage is already over the limit. +- **Mid-turn**: after a sampling response, only when usage is over the limit and the model needs a follow-up or pending input exists. +- **Model downshift**: when switching to a smaller context-window model and the current token use exceeds the new model's limit. + +The token signal is `Session::get_total_token_usage()`, which combines cached last API token usage with estimates after the last model-generated item. Local compaction streams a normal model request. OpenAI / Azure providers use a remote compaction path, and a newer feature-gated path expects a `context_compaction` response item. + +Codex exposes configuration for `model_context_window`, `model_auto_compact_token_limit`, and `compact_prompt`. The auto limit is absolute; percentage values are not part of that surface. Hooks can run before and after manual or automatic compaction. + +Key files: + +- `codex-rs/protocol/src/openai_models.rs`: 90% default and configured-limit clamp. +- `codex-rs/core/src/session/turn.rs`: pre-turn, mid-turn, and model-downshift triggers. +- `codex-rs/core/src/compact.rs`: inline summarization and history replacement. +- `codex-rs/config/src/config_toml.rs`: config surface. + +## opencode + +opencode performs local app-level compaction through a hidden `compaction` agent. The compaction agent is tool-denied and receives prior context plus a strict Markdown summary template. It does not rely on provider-side automatic summarization. + +Threshold math is based on usable input context: + +- Default reserved buffer is `20_000`. +- If the provider exposes `model.limit.input`, usable tokens are `input - reserved`. +- Otherwise usable tokens are `context - maxOutputTokens(model)`. +- Auto-overflow is disabled when `compaction.auto === false` or model context is `0`. + +The overflow count prefers provider `tokens.total`; when absent, it falls back to `input + output + cache.read + cache.write`. opencode also reacts to provider context overflow errors by scheduling compaction. + +Compaction preserves a recent tail. Defaults are 2 user turns and a recent-token budget of 25% of usable context, clamped to 2,000-8,000 tokens unless configured. Old tool-output pruning is a separate pass: it can wipe older completed tool outputs once enough tokens are reclaimable after protecting recent results. + +Config supports `compaction.auto`, `compaction.prune`, `compaction.tail_turns`, `compaction.preserve_recent_tokens`, and `compaction.reserved`. Env flags `OPENCODE_DISABLE_AUTOCOMPACT` and `OPENCODE_DISABLE_PRUNE` override config. + +Key files: + +- `packages/opencode/src/session/overflow.ts`: usable-context threshold. +- `packages/opencode/src/session/prompt.ts`: post-assistant and overflow-triggered compaction scheduling. +- `packages/opencode/src/session/compaction.ts`: prompt, tail preservation, pruning. +- `packages/opencode/src/agent/agent.ts`: hidden tool-denied compaction agent. + +## Patterns Worth Borrowing + +1. **Default-on with explicit opt-out.** All three systems treat auto-compaction as normal context hygiene, while still giving users an escape hatch. + +2. **Use observed response usage.** Response usage is already available on the hot path. Pre-flight token-count calls add latency and still need estimates for dynamic system / tool content. + +3. **Reserve output headroom.** Claude Code and opencode both avoid compacting exactly at the model's advertised context limit. The compact request itself needs room to produce the summary. + +4. **Run at turn boundaries first.** Pre-turn or post-round compaction is much simpler than interrupting an in-flight response. Mid-turn compaction is useful only once the loop can resume safely after history replacement. + +5. **Circuit-break automatic failures.** Automatic failures should not spam chat or repeatedly hit the API when the session is too large to summarize. + +6. **Keep manual `/compact` independent.** Auto opt-out should not disable manual compaction. + +## Patterns to Defer + +1. **Mid-turn compaction.** Requires pausing a tool loop or assistant continuation, replacing history, and resuming the same logical turn. The first oxide-code version should compact before recording the next user prompt after a completed turn crosses the trigger. + +2. **Microcompact / prune.** Clearing old tool outputs can save tokens, but it is a separate retention policy with its own UI and persistence implications. + +3. **Anchored summary rewrites.** opencode's `` pattern helps repeated compactions, but repeated lossy rewrite quality needs real usage data before adding complexity. + +4. **Provider-specific remote compaction.** oxide-code talks to Anthropic Messages today, and the current manual compaction path already works through the normal stream. + +5. **Automatic continue prompts.** opencode can synthesize a "Continue..." prompt after auto-compaction. oxide-code should wait for the user unless a queued prompt already exists. + +6. **Hooks.** PreCompact / PostCompact hooks belong with a broader hook or workflow-skill system. diff --git a/docs/roadmap.md b/docs/roadmap.md index a70635d7..65e78cf2 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -1,29 +1,26 @@ # Roadmap -oxide-code is still early. This roadmap is the high-level product view: what works, what's being built next, and what is intentionally out of scope for now. +oxide-code is still early. This roadmap is the high-level product view: what works, what's being built next, and what is out of scope for now. The direction is simple: -- Build a useful terminal-based AI coding assistant in Rust. -- Follow the agent-harness architecture: the model is the agent, everything else is harness (tools, context, permissions, coordination). -- Keep the architecture understandable. New features should fit the current model instead of forcing large abstractions too early. +- Keep the terminal as the primary interface: streaming chat, tool output, and session controls stay keyboard-first. +- Keep context and state visible: model, instructions, compaction, queued prompts, and session identity should be inspectable from the UI. +- Add workflow depth only when it fits the current agent-harness model. ## Working Today ### Terminal UI -- Streaming output with markdown rendering and syntax-highlighted code blocks. -- Multi-line input with a prompt marker and a status bar showing model, working directory, and run state. -- Welcome surface on empty chat with identity, environment, and a few starter commands. -- Rich per-tool views: edit diffs with line gutters, line-numbered read excerpts, grouped grep matches, and structured glob lists. Bash output rides the default truncated-text view. -- Themable via runtime-loaded TOML, with 5 built-in palettes (Catppuccin Mocha, Macchiato, Frappe, Latte, Material) and per-slot overrides. -- Three modes: full TUI, bare REPL (`--no-tui`), and headless (`-p`). +- Streaming chat with markdown, syntax-highlighted code, and clear tool output. +- Multi-line input, a live status bar, and a focused welcome screen for new sessions. +- Theme support with built-in palettes and user-defined TOML themes. +- Full TUI, bare REPL (`--no-tui`), and headless (`-p`) modes. ### Agent Loop -- Async streaming from the Anthropic Messages API. -- Tool-use round-trip: the model calls tools, results feed back, and the loop continues until a text-only response. -- Extended thinking with optional dimmed display. +- Anthropic-powered streaming turns with tool use and multi-step continuation. +- Optional extended-thinking display for models that support it. ### Tools @@ -38,102 +35,89 @@ The direction is simple: ### Turn Interruption & Queueing -- Esc / Ctrl+C while busy interrupts the in-flight turn. Partial output is preserved with a clear `(interrupted)` marker. -- Type during a busy turn to queue prompts. Queued prompts splice into the same multi-step turn at the next round boundary (between tool calls), so follow-ups land without aborting in-flight work. Tool-less turns drain queued prompts at the turn boundary instead. -- Esc on idle pops the most recent queued prompt back into the input for editing. -- Idle Ctrl+C arms a 1-second exit confirmation. A second press confirms. +- Interrupt busy turns without losing partial output. +- Queue follow-up prompts while the assistant is working, then edit or cancel them from idle. +- Exit intentionally with a guarded Ctrl+C confirmation. ### System Prompt -- Runtime environment (cwd, platform, shell, git status, date, model) injected every turn. -- `CLAUDE.md` / `AGENTS.md` discovered from user-global and project scopes (root-to-CWD walk, root-level and `.claude/` at each level). +- Project environment and model context are injected every turn. +- `CLAUDE.md` / `AGENTS.md` instructions are loaded from user and project scopes. ### Session Persistence -- Every conversation saved as JSONL under `$XDG_DATA_HOME/ox/sessions/{project}/`. -- `ox --list` browses recent sessions, capped at 30 by default with `--limit N` / `--limit 0` overrides. `ox -c` resumes by recency, prefix, or path. -- Mid-session `/resume` (alias `/continue`) opens an in-place picker (substring search, Tab toggles current-project ↔ all projects). `/resume ` jumps directly. -- AI-generated 3-7-word titles land shortly after the first prompt. +- Conversations are saved per project and can be listed or resumed later. +- Mid-session `/resume` switches chats without restarting the app. +- Short AI-generated titles make session history easier to scan. ### File-Change Tracking -- Per-session tracker remembers each Read. Unchanged re-reads return a cache-hit stub instead of the full body. -- Edit and Write require a prior full Read and refuse if the on-disk bytes have drifted (xxh64 fallback for cloud-sync mtime touches). -- Tracker state persists into the session JSONL on clean exit and restores on resume. +- Tracks reads so edits are made against files the assistant has actually seen. +- Refuses stale writes when files changed on disk. +- Restores edit-safety state when a session resumes. ### Slash Commands -- Built-in: `/clear` (aliases `/new`, `/reset`), `/compact`, `/config`, `/delete`, `/diff`, `/effort`, `/help`, `/init`, `/model`, `/rename`, `/resume` (alias `/continue`), `/status`, `/theme`. See the [user guide](guide/slash-commands.md). -- Autocomplete popup on typing `/`, with ranked filter, Tab completion, and arg-mode completion for commands with curated rosters (`/model`, `/effort`, `/theme`). -- Mid-session swaps (`/model`, `/effort`, `/rename`, `/resume`, `/theme`) are session-only, and no slash command writes user config files. -- Destructive ops (`/delete `, or Ctrl+D / Delete inside the `/resume` picker) gate behind a Y/N confirm modal. The live session is excluded; any saved non-live session can be deleted. -- Modal UI primitive: focus-grabbing overlays above the input for picker, slider, editor, and read-only kv-overview forms. Nested modals layer cleanly, and Esc / Ctrl+C cancels any modal. +- Built-in commands cover session control, config/status, model and theme changes, diffs, compaction, and help. See the [user guide](guide/slash-commands.md). +- Autocomplete, typed shortcuts, and modal pickers keep common actions quick. +- Destructive session actions require confirmation. ### Context Compression -- Manual `/compact [instructions]` streams a one-shot summarization through the live model and replaces the in-memory transcript with a synthetic continuation. Optional trailing instructions steer the focus. -- Persisted as a dedicated `compact` JSONL boundary plus the synthetic post-compact message. Resume sees only the post-compact tail. -- File tracker resets on compact. Edits after `/compact` require a fresh Read. +- Manual `/compact [instructions]` and default auto-compaction keep long sessions usable. +- Compaction keeps a visible history boundary and makes future edits require fresh reads. ### Authentication & Configuration -- Anthropic API key via `ANTHROPIC_API_KEY` or config file. -- Claude Code OAuth credentials picked up automatically (macOS Keychain, Linux file). -- TOML config with layered precedence: defaults → user (`~/.config/ox/config.toml`) → project (`ox.toml`) → environment. +- Supports Anthropic API keys and Claude Code OAuth pickup. +- Layered TOML configuration supports user, project, and environment overrides. ## Current Focus ### Permission & Approval -- Per-tool approval prompts before destructive actions (bash, write, edit). -- Project-level allowlists to auto-approve trusted commands. -- Plan mode: read-only review of the agent's proposed changes before any tool runs. - -### Auto-Compaction - -- Fire `/compact` automatically when the running token usage approaches the model's context window. Threshold math (effective context window minus reserved-output buffer), per-turn check at sampling boundaries, single-turn circuit breaker, and a config knob for opt-out. +- Approval prompts for destructive tool actions. +- Project allowlists for trusted commands. +- Plan mode for reviewing the assistant's proposed work before tools run. ### Slash Commands (continuation) Remaining surface beyond Working Today: -- Deferred: `/cost`, `/login` / `/logout`, custom user commands, `/init` multi-phase flow. +- Cost visibility, login/logout, custom commands, and a guided `/init` flow. -Persistence stance: `/model`, `/effort`, and `/theme` mutate session state only, and restart returns to user-declared config. Cross-session persistence will land as an **explicit subcommand** writing to a user-opted-in path, never as a silent merge into a `~/.claude.json`-style mega-file. +Persistence stance: session commands should feel reversible. Cross-session writes will require an explicit user action. ### Viewport Virtualization -- Render only the visible chat region for sessions with thousands of blocks. +- Keep very long sessions responsive by rendering only the visible chat region. ## Later ### MCP Integration -- MCP client to call external tool servers (Atlassian, GitHub, custom). -- MCP server mode to expose oxide-code as a tool to other agents. +- MCP client support for external tool servers. +- MCP server mode so other agents can call oxide-code. ### Agent Infrastructure -- Task management for multi-step work (TodoWrite-style tracking). -- Subagent spawning to delegate self-contained sub-tasks. -- Background tasks for long-running shell processes. -- Agent-team coordination across multiple subagents. -- Git-worktree isolation for parallel implementation attempts. +- Task tracking for multi-step work. +- Subagents for self-contained delegation. +- Background shell processes and stronger parallel-work support. ### Sandboxing -- Sandboxed execution for `bash` / `write` / `edit` so the agent runs without trusting the host shell. +- Sandboxed `bash`, `write`, and `edit` execution. ### Workflow Skills -- User-extensible templates that can override built-ins or add new ones (e.g. project-local `~/.claude/commands/review.md`). Built-ins like `/init` ship under Working Today. -- Auth slash commands: `/login`, `/logout`. -- Configurable instruction directories beyond `.claude/`. +- User-extensible workflow templates. +- Auth slash commands. +- Configurable instruction directories. ### Status Bar Redesign -- Current bar packs model + status + (optional) title + cwd into a single line. Layout collapses to model + status under width pressure but reads as cluttered above ~80 cols. -- Direction: a richer, possibly multi-segment surface with token / cost meter, queued-prompt indicator, session id glance, and theme indicator. Likely needs a layout rethink rather than incremental slot additions. +- A clearer status surface for model, cost, queue state, session identity, and theme. ## Not the Goal Right Now