Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -925,8 +925,10 @@ async function handleProxyRequest(c: any): Promise<Response> {
return response;
}

// 即使是流式请求,错误状态码也是在开流前返回的(响应体尚未转发给客户端,下面会 cancel 掉),
// 此时重试/回退是安全的;只有已经开始流式响应体后才不应重试,而那种情况状态码是 2xx,不会命中 failover。
const statusTrigger: FailoverTrigger = { kind: 'status', status: upstreamResponse.status };
if (!retryableStreamRequest && shouldContinueAfterFailure(failoverPolicy, statusTrigger, retryIndexForRoute)) {
if (shouldContinueAfterFailure(failoverPolicy, statusTrigger, retryIndexForRoute)) {
lastFailureTrigger = statusTrigger;
const reason = describeFailoverTrigger(statusTrigger);
failoverReason = reason;
Expand Down
56 changes: 49 additions & 7 deletions test/gateway-routing-failover-e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* 覆盖的场景:
* - 路由:基于模型路由、显式 provider 路由、未知模型 400、优先级选择、禁用 provider 跳过
* - 认证:缺少 key → 401、错误 key → 401、正确 key → 通过
* - 故障转移—重试:5xx 重试、429 重试、400 不重试、流式请求不重试状态码错误
* - 故障转移—重试:5xx 重试、429 重试、400 不重试、流式请求开流前的状态码错误也会重试/回退
* - 故障转移—网络/超时:网络错误重试、超时重试、全部超时 → 504
* - 故障转移—模型回退:same_model 模式、any_model 模式、自定义回退、全部失败 → 5xx 透传
* - 策略控制:failover 禁用、maxFallbackAttempts=0
Expand Down Expand Up @@ -430,24 +430,66 @@ describe('failover – retry on error status', () => {
expect(requestLog).toHaveLength(1);
});

it('does NOT retry a streaming request on 5xx — stream errors pass through', async () => {
it('retries a streaming request when the upstream returns an error status before the first byte', async () => {
loadProviderConfigsForTest(singleProviderConfig());
loadFailoverPolicyForTest({
enabled: true,
retryAttempts: 2,
retryAttempts: 1,
maxFallbackAttempts: 0,
retryOnStatusRanges: ['5xx'],
retryOnStatusCodes: [429],
});
responseQueue.push(() => errorResponse(429, 'Rate limited'));
responseQueue.push(() => defaultOkResponse());

// stream: true must NOT disable status-based failover: the error status arrives
// before any body is streamed to the client, so retrying to the next attempt is safe.
const res = await app.fetch(
gatewayReq('/v1/chat/completions', chatBody('gpt-4o', { stream: true })),
);

expect(res.status).toBe(200);
expect(requestLog).toHaveLength(2);
});

it('falls over a streaming request to the next provider on 5xx', async () => {
const configs = validateConfigEntries({
primary: {
type: 'openai',
targetBaseUrl: `${mockBaseUrl}/primary/v1`,
auth: { header: 'authorization', value: 'key' },
models: ['gpt-4o'],
priority: 10,
},
secondary: {
type: 'openai',
targetBaseUrl: `${mockBaseUrl}/secondary/v1`,
auth: { header: 'authorization', value: 'key' },
models: ['gpt-4o'],
priority: 5,
},
} as any);
loadProviderConfigsForTest(configs);
loadFailoverPolicyForTest({
enabled: true,
retryAttempts: 0,
modelFallbackMode: 'same_model',
maxFallbackAttempts: 1,
retryOnStatusRanges: ['5xx'],
retryOnStatusCodes: [],
});
responseQueue.push(() => errorResponse(500, 'Internal error'));
responseQueue.push(() => errorResponse(503, 'Service unavailable'));
responseQueue.push(() => defaultOkResponse());

// stream: true marks this as a streaming request; status-based retries are skipped
const res = await app.fetch(
gatewayReq('/v1/chat/completions', chatBody('gpt-4o', { stream: true })),
);

expect(res.status).toBe(500);
expect(requestLog).toHaveLength(1); // no retry
expect(res.status).toBe(200);
expect(requestLog).toHaveLength(2);
// Primary (high priority) is tried first, then the request falls over to secondary.
expect(requestLog[0]!.path).toBe('/primary/v1/chat/completions');
expect(requestLog[1]!.path).toBe('/secondary/v1/chat/completions');
});
});

Expand Down
Loading