diff --git a/src/index.ts b/src/index.ts index 48be4fb..9458686 100644 --- a/src/index.ts +++ b/src/index.ts @@ -925,8 +925,10 @@ async function handleProxyRequest(c: any): Promise { return response; } + // 即使是流式请求,错误状态码也是在开流前返回的(响应体尚未转发给客户端,下面会 cancel 掉), + // 此时重试/回退是安全的;只有已经开始流式响应体后才不应重试,而那种情况状态码是 2xx,不会命中 failover。 const statusTrigger: FailoverTrigger = { kind: 'status', status: upstreamResponse.status }; - if (!retryableStreamRequest && shouldContinueAfterFailure(failoverPolicy, statusTrigger, retryIndexForRoute)) { + if (shouldContinueAfterFailure(failoverPolicy, statusTrigger, retryIndexForRoute)) { lastFailureTrigger = statusTrigger; const reason = describeFailoverTrigger(statusTrigger); failoverReason = reason; diff --git a/test/gateway-routing-failover-e2e.test.ts b/test/gateway-routing-failover-e2e.test.ts index 2d41ddb..43d4a80 100644 --- a/test/gateway-routing-failover-e2e.test.ts +++ b/test/gateway-routing-failover-e2e.test.ts @@ -4,7 +4,7 @@ * 覆盖的场景: * - 路由:基于模型路由、显式 provider 路由、未知模型 400、优先级选择、禁用 provider 跳过 * - 认证:缺少 key → 401、错误 key → 401、正确 key → 通过 - * - 故障转移—重试:5xx 重试、429 重试、400 不重试、流式请求不重试状态码错误 + * - 故障转移—重试:5xx 重试、429 重试、400 不重试、流式请求开流前的状态码错误也会重试/回退 * - 故障转移—网络/超时:网络错误重试、超时重试、全部超时 → 504 * - 故障转移—模型回退:same_model 模式、any_model 模式、自定义回退、全部失败 → 5xx 透传 * - 策略控制:failover 禁用、maxFallbackAttempts=0 @@ -430,24 +430,66 @@ describe('failover – retry on error status', () => { expect(requestLog).toHaveLength(1); }); - it('does NOT retry a streaming request on 5xx — stream errors pass through', async () => { + it('retries a streaming request when the upstream returns an error status before the first byte', async () => { loadProviderConfigsForTest(singleProviderConfig()); loadFailoverPolicyForTest({ enabled: true, - retryAttempts: 2, + retryAttempts: 1, maxFallbackAttempts: 0, retryOnStatusRanges: ['5xx'], + retryOnStatusCodes: [429], + }); + responseQueue.push(() => errorResponse(429, 'Rate limited')); + responseQueue.push(() => defaultOkResponse()); + + // stream: true must NOT disable status-based failover: the error status arrives + // before any body is streamed to the client, so retrying to the next attempt is safe. + const res = await app.fetch( + gatewayReq('/v1/chat/completions', chatBody('gpt-4o', { stream: true })), + ); + + expect(res.status).toBe(200); + expect(requestLog).toHaveLength(2); + }); + + it('falls over a streaming request to the next provider on 5xx', async () => { + const configs = validateConfigEntries({ + primary: { + type: 'openai', + targetBaseUrl: `${mockBaseUrl}/primary/v1`, + auth: { header: 'authorization', value: 'key' }, + models: ['gpt-4o'], + priority: 10, + }, + secondary: { + type: 'openai', + targetBaseUrl: `${mockBaseUrl}/secondary/v1`, + auth: { header: 'authorization', value: 'key' }, + models: ['gpt-4o'], + priority: 5, + }, + } as any); + loadProviderConfigsForTest(configs); + loadFailoverPolicyForTest({ + enabled: true, + retryAttempts: 0, + modelFallbackMode: 'same_model', + maxFallbackAttempts: 1, + retryOnStatusRanges: ['5xx'], retryOnStatusCodes: [], }); - responseQueue.push(() => errorResponse(500, 'Internal error')); + responseQueue.push(() => errorResponse(503, 'Service unavailable')); + responseQueue.push(() => defaultOkResponse()); - // stream: true marks this as a streaming request; status-based retries are skipped const res = await app.fetch( gatewayReq('/v1/chat/completions', chatBody('gpt-4o', { stream: true })), ); - expect(res.status).toBe(500); - expect(requestLog).toHaveLength(1); // no retry + expect(res.status).toBe(200); + expect(requestLog).toHaveLength(2); + // Primary (high priority) is tried first, then the request falls over to secondary. + expect(requestLog[0]!.path).toBe('/primary/v1/chat/completions'); + expect(requestLog[1]!.path).toBe('/secondary/v1/chat/completions'); }); });