fix: Fix gemini stream route

tjtanjin · tjtanjin · commit e09c32ea14a3 · 2025-06-10T01:11:29.000+08:00
diff --git a/src/controllers/geminiQuery.ts b/src/controllers/geminiQuery.ts
@@ -212,8 +212,10 @@ export const handleGeminiStream = async (req: Request, res: Response) => {
     try {
       await generateText({
         model: model,
-        onChunk: (chunk: LLMStreamChunk) => {
-          res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+        // The onChunk callback now receives a raw SSE line string from llmWrapper.ts
+        onChunk: (rawSseLine: string) => {
+          // Pass the raw SSE line, followed by a single newline, as per SSE spec.
+          res.write(`${rawSseLine}\n`);
         },
         query: augmentedPrompt,
         stream: true,
diff --git a/src/routers/geminiQuery.ts b/src/routers/geminiQuery.ts
@@ -5,10 +5,21 @@ import { queryApiKeyAuth } from '../middleware/auth';
 
 const geminiRouter = Router();
 
-// Route for batch processing
-geminiRouter.post('/gemini/models/:model:generateContent', queryApiKeyAuth, handleGeminiBatch);
+// gemini proxy endpoints
+geminiRouter.post('/gemini/models/:model', (req, res) => {
+	const model = req.params.model;
 
-// Route for streaming
-geminiRouter.post('/gemini/models/:model:streamGenerateContent', queryApiKeyAuth, handleGeminiStream);
+	if (model.endsWith(':generateContent')) {
+		req.params.model = model.replace(':generateContent', '');
+		return handleGeminiBatch(req, res);
+	}
+
+	if (model.endsWith(':streamGenerateContent')) {
+		req.params.model = model.replace(':streamGenerateContent', '');
+		return handleGeminiStream(req, res);
+	}
+
+	return res.status(404).json({ error: 'Unsupported Gemini operation' });
+});
 
 export { geminiRouter };
diff --git a/src/services/gemini.ts b/src/services/gemini.ts
@@ -31,7 +31,7 @@ const ensureModelPrefixed = (modelId: string): string => {
 const streamGemini = async (
   modelId: GeminiChatModel | string,
   contents: GeminiContent[],
-  onChunk: (chunk: GeminiStreamChunk) => void
+  onChunk: (rawSseLine: string) => void
 ): Promise<void> => {
   if (!config.geminiApiKey) {
     throw new Error('Gemini API key is not configured.');
@@ -69,17 +69,8 @@ const streamGemini = async (
     buffer = lines.pop()!; // Keep the last partial line in buffer
 
     for (const line of lines) {
-      const trimmedLine = line.trim();
-      if (trimmedLine.startsWith('data: ')) {
-        const jsonData = trimmedLine.substring('data: '.length);
-        try {
-          const chunk = JSON.parse(jsonData) as GeminiStreamChunk;
-          onChunk(chunk);
-        } catch (error) {
-          console.error('Failed to parse Gemini stream chunk:', error, jsonData);
-          // Decide on error handling: re-throw, or pass error to onChunk, or ignore.
-        }
-      }
+      // Pass through every line from the source SSE stream to the callback.
+      onChunk(line);
     }
   }
 };
diff --git a/src/services/llmWrapper.ts b/src/services/llmWrapper.ts
@@ -42,10 +42,11 @@ export const generateText = async (
       if (!onChunk) {
         throw new Error('onChunk callback is required for streaming responses.');
       }
-      await streamGeminiGenerate(geminiModelId as GeminiModelName, geminiContents, (chunk: GeminiStreamChunk) => {
-        // LLMStreamChunk is now GeminiStreamChunk, so direct pass is fine.
-        onChunk(chunk as LLMStreamChunk);
-      });
+      // gemini.ts's streamGenerateContent now provides raw SSE lines to its onChunk callback.
+      // The onChunk from LLMChatRequestOptions (options.onChunk) will also expect raw SSE lines
+      // (this type will be updated in a subsequent step in types.ts).
+      // Therefore, we can pass options.onChunk directly.
+      await streamGeminiGenerate(geminiModelId as GeminiModelName, geminiContents, onChunk);
       return;
     } else {
       const response = await batchGeminiGenerate(geminiModelId as GeminiModelName, geminiContents);
diff --git a/src/types/llmWrapper.ts b/src/types/llmWrapper.ts
@@ -7,7 +7,7 @@ export type LLMChatRequestOptions = {
   query: string;
   stream?: boolean;
   model?: string; // Optional model override
-  onChunk?: (chunk: Gemini.GeminiStreamChunk) => void; // For streaming, now only Gemini
+  onChunk?: (rawSseLine: string) => void; // For streaming, expects a raw SSE line string
   // Add other common parameters like temperature, max_tokens if they are to be abstracted.
 };