From 8cfbdc67da83ec106c83d62daaf2b64d2e81cda6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 11:32:03 +0000
Subject: [PATCH] =?UTF-8?q?fix(gateway):=20=E6=B5=81=E5=BC=8F=E8=AF=B7?=
 =?UTF-8?q?=E6=B1=82=E5=BC=80=E6=B5=81=E5=89=8D=E7=9A=84=E9=94=99=E8=AF=AF?=
 =?UTF-8?q?=E7=8A=B6=E6=80=81=E7=A0=81=E4=B9=9F=E5=8F=82=E4=B8=8E=20failov?=
 =?UTF-8?q?er?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

此前状态码级别的故障转移被 `!retryableStreamRequest` 一刀切挡掉，
导致流式请求（stream: true）即使在开流前收到 429 / 5xx，也不会按
priority 降级到下一个渠道，而是直接把错误透传给客户端（issue #6）。

错误状态码是在响应体转发给客户端之前返回的（随后会 cancel 掉上游
body），此时重试 / 回退是安全的；真正不可重试的是“已经开始流式响应体
后才出错”，而那种情况状态码是 2xx，本就不会命中 failover。因此去掉该
前置条件即可，无需新增开关。

新增两条流式 failover 端到端测试：开流前 429 重试成功、5xx 按 priority
回退到下一渠道。

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01B1f8CF7YQmp9EU3m8yJYFr
---
 src/index.ts                              |  4 +-
 test/gateway-routing-failover-e2e.test.ts | 56 ++++++++++++++++++++---
 2 files changed, 52 insertions(+), 8 deletions(-)
diff --git a/src/index.ts b/src/index.ts
index 48be4fb..9458686 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -925,8 +925,10 @@ async function handleProxyRequest(c: any): Promise<Response> {
       return response;
     }
 
+    // 即使是流式请求，错误状态码也是在开流前返回的（响应体尚未转发给客户端，下面会 cancel 掉），
+    // 此时重试/回退是安全的；只有已经开始流式响应体后才不应重试，而那种情况状态码是 2xx，不会命中 failover。
     const statusTrigger: FailoverTrigger = { kind: 'status', status: upstreamResponse.status };
-    if (!retryableStreamRequest && shouldContinueAfterFailure(failoverPolicy, statusTrigger, retryIndexForRoute)) {
+    if (shouldContinueAfterFailure(failoverPolicy, statusTrigger, retryIndexForRoute)) {
       lastFailureTrigger = statusTrigger;
       const reason = describeFailoverTrigger(statusTrigger);
       failoverReason = reason;
diff --git a/test/gateway-routing-failover-e2e.test.ts b/test/gateway-routing-failover-e2e.test.ts
index 2d41ddb..43d4a80 100644
--- a/test/gateway-routing-failover-e2e.test.ts
+++ b/test/gateway-routing-failover-e2e.test.ts
@@ -4,7 +4,7 @@
  * 覆盖的场景：
  *   - 路由：基于模型路由、显式 provider 路由、未知模型 400、优先级选择、禁用 provider 跳过
  *   - 认证：缺少 key → 401、错误 key → 401、正确 key → 通过
- *   - 故障转移—重试：5xx 重试、429 重试、400 不重试、流式请求不重试状态码错误
+ *   - 故障转移—重试：5xx 重试、429 重试、400 不重试、流式请求开流前的状态码错误也会重试/回退
  *   - 故障转移—网络/超时：网络错误重试、超时重试、全部超时 → 504
  *   - 故障转移—模型回退：same_model 模式、any_model 模式、自定义回退、全部失败 → 5xx 透传
  *   - 策略控制：failover 禁用、maxFallbackAttempts=0
@@ -430,24 +430,66 @@ describe('failover – retry on error status', () => {
     expect(requestLog).toHaveLength(1);
   });
 
-  it('does NOT retry a streaming request on 5xx — stream errors pass through', async () => {
+  it('retries a streaming request when the upstream returns an error status before the first byte', async () => {
     loadProviderConfigsForTest(singleProviderConfig());
     loadFailoverPolicyForTest({
       enabled: true,
-      retryAttempts: 2,
+      retryAttempts: 1,
       maxFallbackAttempts: 0,
       retryOnStatusRanges: ['5xx'],
+      retryOnStatusCodes: [429],
+    });
+    responseQueue.push(() => errorResponse(429, 'Rate limited'));
+    responseQueue.push(() => defaultOkResponse());
+
+    // stream: true must NOT disable status-based failover: the error status arrives
+    // before any body is streamed to the client, so retrying to the next attempt is safe.
+    const res = await app.fetch(
+      gatewayReq('/v1/chat/completions', chatBody('gpt-4o', { stream: true })),
+    );
+
+    expect(res.status).toBe(200);
+    expect(requestLog).toHaveLength(2);
+  });
+
+  it('falls over a streaming request to the next provider on 5xx', async () => {
+    const configs = validateConfigEntries({
+      primary: {
+        type: 'openai',
+        targetBaseUrl: `${mockBaseUrl}/primary/v1`,
+        auth: { header: 'authorization', value: 'key' },
+        models: ['gpt-4o'],
+        priority: 10,
+      },
+      secondary: {
+        type: 'openai',
+        targetBaseUrl: `${mockBaseUrl}/secondary/v1`,
+        auth: { header: 'authorization', value: 'key' },
+        models: ['gpt-4o'],
+        priority: 5,
+      },
+    } as any);
+    loadProviderConfigsForTest(configs);
+    loadFailoverPolicyForTest({
+      enabled: true,
+      retryAttempts: 0,
+      modelFallbackMode: 'same_model',
+      maxFallbackAttempts: 1,
+      retryOnStatusRanges: ['5xx'],
       retryOnStatusCodes: [],
     });
-    responseQueue.push(() => errorResponse(500, 'Internal error'));
+    responseQueue.push(() => errorResponse(503, 'Service unavailable'));
+    responseQueue.push(() => defaultOkResponse());
 
-    // stream: true marks this as a streaming request; status-based retries are skipped
     const res = await app.fetch(
       gatewayReq('/v1/chat/completions', chatBody('gpt-4o', { stream: true })),
     );
 
-    expect(res.status).toBe(500);
-    expect(requestLog).toHaveLength(1); // no retry
+    expect(res.status).toBe(200);
+    expect(requestLog).toHaveLength(2);
+    // Primary (high priority) is tried first, then the request falls over to secondary.
+    expect(requestLog[0]!.path).toBe('/primary/v1/chat/completions');
+    expect(requestLog[1]!.path).toBe('/secondary/v1/chat/completions');
   });
 });