Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/engine/capacity_pause.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
// capacity / session-limit exhaustion at a given pipeline stage. The "capacity
// limit" phrasing routes pauseResumeHint to the correct operator guidance.
func capacityPauseReason(stage string, err error) string {
return fmt.Sprintf("LLM capacity/session limit during %s — resume after reset: %v", stage, err)
return fmt.Sprintf("transient LLM capacity/network error during %s — resume after it clears: %v", stage, err)
}

// pauseIfCapacity inspects an LLM-call error. If it is a transient capacity
Expand Down
6 changes: 6 additions & 0 deletions internal/llm/capacity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ func TestIsCapacityError(t *testing.T) {
{name: "rate limit string", err: fmt.Errorf("claude CLI error: rate limit exceeded"), expect: true},
{name: "too many requests string", err: fmt.Errorf("too many requests"), expect: true},
{name: "overloaded string", err: fmt.Errorf("the service is currently overloaded"), expect: true},
// Transient network/transport failures (api_error_status null) — must
// also classify as transient so they take the clean-pause path.
{name: "socket closed string", err: fmt.Errorf(`claude CLI error: exit status 1 (output: {"is_error":true,"api_error_status":null,"result":"API Error: The socket connection was closed unexpectedly"})`), expect: true},
{name: "connection reset", err: fmt.Errorf("read tcp: connection reset by peer"), expect: true},
{name: "i/o timeout", err: fmt.Errorf("dial tcp: i/o timeout"), expect: true},
{name: "503 service unavailable", err: fmt.Errorf("503 service unavailable"), expect: true},
{name: "api_error_status 429 embedded", err: fmt.Errorf(`output: {"api_error_status":429}`), expect: true},
{name: "api_error_status 529 embedded", err: fmt.Errorf(`output: {"api_error_status":529}`), expect: true},

Expand Down
15 changes: 15 additions & 0 deletions internal/llm/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,21 @@ var capacitySignatures = []string{
`"api_error_status":529`,
`"api_error_status": 429`,
`"api_error_status": 529`,

// Transient network/transport failures. Like a session limit, these are not
// a story-quality problem and succeed on retry/resume — so they must take
// the clean-pause path, not burn the escalation chain. Surfaced by the CLI
// as e.g. "API Error: The socket connection was closed unexpectedly".
"socket connection was closed",
"connection closed unexpectedly",
"connection reset",
"connection refused",
"i/o timeout",
"tls handshake timeout",
"unexpected eof",
"service unavailable",
"bad gateway",
"gateway timeout",
}

// ContainsCapacitySignature reports whether a raw string carries a capacity /
Expand Down
Loading