From 8cfbdc67da83ec106c83d62daaf2b64d2e81cda6 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 30 Jun 2026 11:32:03 +0000 Subject: [PATCH] =?UTF-8?q?fix(gateway):=20=E6=B5=81=E5=BC=8F=E8=AF=B7?= =?UTF-8?q?=E6=B1=82=E5=BC=80=E6=B5=81=E5=89=8D=E7=9A=84=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E7=8A=B6=E6=80=81=E7=A0=81=E4=B9=9F=E5=8F=82=E4=B8=8E=20failov?= =?UTF-8?q?er?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 此前状态码级别的故障转移被 `!retryableStreamRequest` 一刀切挡掉, 导致流式请求(stream: true)即使在开流前收到 429 / 5xx,也不会按 priority 降级到下一个渠道,而是直接把错误透传给客户端(issue #6)。 错误状态码是在响应体转发给客户端之前返回的(随后会 cancel 掉上游 body),此时重试 / 回退是安全的;真正不可重试的是“已经开始流式响应体 后才出错”,而那种情况状态码是 2xx,本就不会命中 failover。因此去掉该 前置条件即可,无需新增开关。 新增两条流式 failover 端到端测试:开流前 429 重试成功、5xx 按 priority 回退到下一渠道。 Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01B1f8CF7YQmp9EU3m8yJYFr --- src/index.ts | 4 +- test/gateway-routing-failover-e2e.test.ts | 56 ++++++++++++++++++++--- 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/src/index.ts b/src/index.ts index 48be4fb..9458686 100644 --- a/src/index.ts +++ b/src/index.ts @@ -925,8 +925,10 @@ async function handleProxyRequest(c: any): Promise { return response; } + // 即使是流式请求,错误状态码也是在开流前返回的(响应体尚未转发给客户端,下面会 cancel 掉), + // 此时重试/回退是安全的;只有已经开始流式响应体后才不应重试,而那种情况状态码是 2xx,不会命中 failover。 const statusTrigger: FailoverTrigger = { kind: 'status', status: upstreamResponse.status }; - if (!retryableStreamRequest && shouldContinueAfterFailure(failoverPolicy, statusTrigger, retryIndexForRoute)) { + if (shouldContinueAfterFailure(failoverPolicy, statusTrigger, retryIndexForRoute)) { lastFailureTrigger = statusTrigger; const reason = describeFailoverTrigger(statusTrigger); failoverReason = reason; diff --git a/test/gateway-routing-failover-e2e.test.ts b/test/gateway-routing-failover-e2e.test.ts index 2d41ddb..43d4a80 100644 --- a/test/gateway-routing-failover-e2e.test.ts +++ b/test/gateway-routing-failover-e2e.test.ts @@ -4,7 +4,7 @@ * 覆盖的场景: * - 路由:基于模型路由、显式 provider 路由、未知模型 400、优先级选择、禁用 provider 跳过 * - 认证:缺少 key → 401、错误 key → 401、正确 key → 通过 - * - 故障转移—重试:5xx 重试、429 重试、400 不重试、流式请求不重试状态码错误 + * - 故障转移—重试:5xx 重试、429 重试、400 不重试、流式请求开流前的状态码错误也会重试/回退 * - 故障转移—网络/超时:网络错误重试、超时重试、全部超时 → 504 * - 故障转移—模型回退:same_model 模式、any_model 模式、自定义回退、全部失败 → 5xx 透传 * - 策略控制:failover 禁用、maxFallbackAttempts=0 @@ -430,24 +430,66 @@ describe('failover – retry on error status', () => { expect(requestLog).toHaveLength(1); }); - it('does NOT retry a streaming request on 5xx — stream errors pass through', async () => { + it('retries a streaming request when the upstream returns an error status before the first byte', async () => { loadProviderConfigsForTest(singleProviderConfig()); loadFailoverPolicyForTest({ enabled: true, - retryAttempts: 2, + retryAttempts: 1, maxFallbackAttempts: 0, retryOnStatusRanges: ['5xx'], + retryOnStatusCodes: [429], + }); + responseQueue.push(() => errorResponse(429, 'Rate limited')); + responseQueue.push(() => defaultOkResponse()); + + // stream: true must NOT disable status-based failover: the error status arrives + // before any body is streamed to the client, so retrying to the next attempt is safe. + const res = await app.fetch( + gatewayReq('/v1/chat/completions', chatBody('gpt-4o', { stream: true })), + ); + + expect(res.status).toBe(200); + expect(requestLog).toHaveLength(2); + }); + + it('falls over a streaming request to the next provider on 5xx', async () => { + const configs = validateConfigEntries({ + primary: { + type: 'openai', + targetBaseUrl: `${mockBaseUrl}/primary/v1`, + auth: { header: 'authorization', value: 'key' }, + models: ['gpt-4o'], + priority: 10, + }, + secondary: { + type: 'openai', + targetBaseUrl: `${mockBaseUrl}/secondary/v1`, + auth: { header: 'authorization', value: 'key' }, + models: ['gpt-4o'], + priority: 5, + }, + } as any); + loadProviderConfigsForTest(configs); + loadFailoverPolicyForTest({ + enabled: true, + retryAttempts: 0, + modelFallbackMode: 'same_model', + maxFallbackAttempts: 1, + retryOnStatusRanges: ['5xx'], retryOnStatusCodes: [], }); - responseQueue.push(() => errorResponse(500, 'Internal error')); + responseQueue.push(() => errorResponse(503, 'Service unavailable')); + responseQueue.push(() => defaultOkResponse()); - // stream: true marks this as a streaming request; status-based retries are skipped const res = await app.fetch( gatewayReq('/v1/chat/completions', chatBody('gpt-4o', { stream: true })), ); - expect(res.status).toBe(500); - expect(requestLog).toHaveLength(1); // no retry + expect(res.status).toBe(200); + expect(requestLog).toHaveLength(2); + // Primary (high priority) is tried first, then the request falls over to secondary. + expect(requestLog[0]!.path).toBe('/primary/v1/chat/completions'); + expect(requestLog[1]!.path).toBe('/secondary/v1/chat/completions'); }); });