From 3f71de3b345efba50231a526acf5307a27c14e4b Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Wed, 20 May 2026 23:30:21 +0000 Subject: [PATCH 1/3] feat(ollama): add generateWithUsage() and export Usage/GenerateResult types Adds generateWithUsage() method to OllamaClient that returns token usage metadata (prompt_tokens, completion_tokens, total_tokens) alongside content. The existing generate() method delegates to generateWithUsage() internally, maintaining full backward compatibility. Also exports the Usage and GenerateResult interfaces so consumers can type their metering/billing integrations. --- packages/ollama/src/index.ts | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/packages/ollama/src/index.ts b/packages/ollama/src/index.ts index 5852889..8ab0260 100644 --- a/packages/ollama/src/index.ts +++ b/packages/ollama/src/index.ts @@ -44,7 +44,7 @@ interface ToolCallContent { arguments: Record; } -interface Usage { +export interface Usage { input: number; output: number; cacheRead: number; @@ -226,6 +226,13 @@ export interface GenerateInput { maxTokens?: number; } +export interface GenerateResult { + content: string; + usage: Usage; + model: string; + stopReason: 'stop' | 'length' | 'toolUse' | 'error' | 'aborted'; +} + interface OllamaTagsResponse { models?: Array<{ name: string }>; } @@ -310,6 +317,15 @@ export class OllamaClient { input: GenerateInput, onChunk?: (chunk: string) => void ): Promise { + const result = await this.generateWithUsage(input, onChunk); + if (onChunk || input.stream) return; + return result.content; + } + + async generateWithUsage( + input: GenerateInput, + onChunk?: (chunk: string) => void + ): Promise { const context = legacyInputToContext(input); const model: ModelDescriptor = { id: input.model, @@ -336,14 +352,20 @@ export class OllamaClient { onChunk?.(event.delta); } } - return; } const message = await response.result(); - return message.content + const content = message.content .filter((block): block is TextContent => block.type === 'text') .map((block) => block.text) .join(''); + + return { + content, + usage: message.usage, + model: message.model, + stopReason: message.stopReason, + }; } } From 8209c096b5699ed232f94b7bf5cf7ad3579e678f Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Wed, 20 May 2026 23:40:32 +0000 Subject: [PATCH 2/3] test(ollama): add generateWithUsage token metering live tests Three smoke tests covering: - Batch mode: content + non-zero usage (input, output, totalTokens) - Streaming mode: chunks received + usage returned after completion - Multi-turn chat: token counts for conversation context --- packages/ollama/__tests__/ollama.live.test.ts | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/packages/ollama/__tests__/ollama.live.test.ts b/packages/ollama/__tests__/ollama.live.test.ts index 16b151f..2343c99 100644 --- a/packages/ollama/__tests__/ollama.live.test.ts +++ b/packages/ollama/__tests__/ollama.live.test.ts @@ -118,6 +118,69 @@ describeSmoke('Ollama live smoke', () => { }); }); +describeSmoke('generateWithUsage token metering', () => { + jest.setTimeout(60_000); + + it('returns content and non-zero usage in batch mode', async () => { + const client = new OllamaClient(baseUrl); + const result = await client.generateWithUsage({ + model: modelId, + prompt: 'Reply with exactly the single word PING and nothing else.', + maxTokens: 128, + temperature: 0, + }); + + expect(result.content.toLowerCase()).toContain('ping'); + expect(result.model).toBeTruthy(); + expect(result.usage.input).toBeGreaterThan(0); + expect(result.usage.output).toBeGreaterThan(0); + expect(result.usage.totalTokens).toBeGreaterThanOrEqual( + result.usage.input + result.usage.output, + ); + expect(result.stopReason).toBe('stop'); + }); + + it('streams chunks and returns usage after completion', async () => { + const client = new OllamaClient(baseUrl); + const chunks: string[] = []; + const result = await client.generateWithUsage( + { + model: modelId, + prompt: 'Reply with exactly the single word PONG and nothing else.', + stream: true, + maxTokens: 128, + temperature: 0, + }, + (chunk: string) => { + chunks.push(chunk); + }, + ); + + expect(chunks.length).toBeGreaterThan(0); + expect(result.content.toLowerCase()).toContain('pong'); + expect(result.usage.output).toBeGreaterThan(0); + expect(result.usage.totalTokens).toBeGreaterThan(0); + }); + + it('returns token counts for multi-turn chat', async () => { + const client = new OllamaClient(baseUrl); + const result = await client.generateWithUsage({ + model: modelId, + messages: [ + { role: 'user', content: 'Say hello' }, + { role: 'assistant', content: 'Hello!' }, + { role: 'user', content: 'Now say goodbye in one word.' }, + ], + maxTokens: 128, + temperature: 0, + }); + + expect(result.content.length).toBeGreaterThan(0); + expect(result.usage.input).toBeGreaterThan(0); + expect(result.usage.output).toBeGreaterThan(0); + }); +}); + describeExtended('Ollama live extended', () => { jest.setTimeout(60_000); From b905ff2a09abde5139e29e436fc4e403e295035b Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Wed, 20 May 2026 23:46:43 +0000 Subject: [PATCH 3/3] revert: remove generateWithUsage live tests (pending team approval) --- packages/ollama/__tests__/ollama.live.test.ts | 63 ------------------- 1 file changed, 63 deletions(-) diff --git a/packages/ollama/__tests__/ollama.live.test.ts b/packages/ollama/__tests__/ollama.live.test.ts index 2343c99..16b151f 100644 --- a/packages/ollama/__tests__/ollama.live.test.ts +++ b/packages/ollama/__tests__/ollama.live.test.ts @@ -118,69 +118,6 @@ describeSmoke('Ollama live smoke', () => { }); }); -describeSmoke('generateWithUsage token metering', () => { - jest.setTimeout(60_000); - - it('returns content and non-zero usage in batch mode', async () => { - const client = new OllamaClient(baseUrl); - const result = await client.generateWithUsage({ - model: modelId, - prompt: 'Reply with exactly the single word PING and nothing else.', - maxTokens: 128, - temperature: 0, - }); - - expect(result.content.toLowerCase()).toContain('ping'); - expect(result.model).toBeTruthy(); - expect(result.usage.input).toBeGreaterThan(0); - expect(result.usage.output).toBeGreaterThan(0); - expect(result.usage.totalTokens).toBeGreaterThanOrEqual( - result.usage.input + result.usage.output, - ); - expect(result.stopReason).toBe('stop'); - }); - - it('streams chunks and returns usage after completion', async () => { - const client = new OllamaClient(baseUrl); - const chunks: string[] = []; - const result = await client.generateWithUsage( - { - model: modelId, - prompt: 'Reply with exactly the single word PONG and nothing else.', - stream: true, - maxTokens: 128, - temperature: 0, - }, - (chunk: string) => { - chunks.push(chunk); - }, - ); - - expect(chunks.length).toBeGreaterThan(0); - expect(result.content.toLowerCase()).toContain('pong'); - expect(result.usage.output).toBeGreaterThan(0); - expect(result.usage.totalTokens).toBeGreaterThan(0); - }); - - it('returns token counts for multi-turn chat', async () => { - const client = new OllamaClient(baseUrl); - const result = await client.generateWithUsage({ - model: modelId, - messages: [ - { role: 'user', content: 'Say hello' }, - { role: 'assistant', content: 'Hello!' }, - { role: 'user', content: 'Now say goodbye in one word.' }, - ], - maxTokens: 128, - temperature: 0, - }); - - expect(result.content.length).toBeGreaterThan(0); - expect(result.usage.input).toBeGreaterThan(0); - expect(result.usage.output).toBeGreaterThan(0); - }); -}); - describeExtended('Ollama live extended', () => { jest.setTimeout(60_000);