From 3f71de3b345efba50231a526acf5307a27c14e4b Mon Sep 17 00:00:00 2001
From: Dan Lynch <pyramation@gmail.com>
Date: Wed, 20 May 2026 23:30:21 +0000
Subject: [PATCH 1/3] feat(ollama): add generateWithUsage() and export
 Usage/GenerateResult types

Adds generateWithUsage() method to OllamaClient that returns token usage
metadata (prompt_tokens, completion_tokens, total_tokens) alongside content.
The existing generate() method delegates to generateWithUsage() internally,
maintaining full backward compatibility.

Also exports the Usage and GenerateResult interfaces so consumers can
type their metering/billing integrations.
---
 packages/ollama/src/index.ts | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/packages/ollama/src/index.ts b/packages/ollama/src/index.ts
index 5852889..8ab0260 100644
--- a/packages/ollama/src/index.ts
+++ b/packages/ollama/src/index.ts
@@ -44,7 +44,7 @@ interface ToolCallContent {
   arguments: Record<string, JsonValue | undefined>;
 }
 
-interface Usage {
+export interface Usage {
   input: number;
   output: number;
   cacheRead: number;
@@ -226,6 +226,13 @@ export interface GenerateInput {
   maxTokens?: number;
 }
 
+export interface GenerateResult {
+  content: string;
+  usage: Usage;
+  model: string;
+  stopReason: 'stop' | 'length' | 'toolUse' | 'error' | 'aborted';
+}
+
 interface OllamaTagsResponse {
   models?: Array<{ name: string }>;
 }
@@ -310,6 +317,15 @@ export class OllamaClient {
     input: GenerateInput,
     onChunk?: (chunk: string) => void
   ): Promise<string | void> {
+    const result = await this.generateWithUsage(input, onChunk);
+    if (onChunk || input.stream) return;
+    return result.content;
+  }
+
+  async generateWithUsage(
+    input: GenerateInput,
+    onChunk?: (chunk: string) => void
+  ): Promise<GenerateResult> {
     const context = legacyInputToContext(input);
     const model: ModelDescriptor = {
       id: input.model,
@@ -336,14 +352,20 @@ export class OllamaClient {
           onChunk?.(event.delta);
         }
       }
-      return;
     }
 
     const message = await response.result();
-    return message.content
+    const content = message.content
       .filter((block): block is TextContent => block.type === 'text')
       .map((block) => block.text)
       .join('');
+
+    return {
+      content,
+      usage: message.usage,
+      model: message.model,
+      stopReason: message.stopReason,
+    };
   }
 }
 

From 8209c096b5699ed232f94b7bf5cf7ad3579e678f Mon Sep 17 00:00:00 2001
From: Dan Lynch <pyramation@gmail.com>
Date: Wed, 20 May 2026 23:40:32 +0000
Subject: [PATCH 2/3] test(ollama): add generateWithUsage token metering live
 tests

Three smoke tests covering:
- Batch mode: content + non-zero usage (input, output, totalTokens)
- Streaming mode: chunks received + usage returned after completion
- Multi-turn chat: token counts for conversation context
---
 packages/ollama/__tests__/ollama.live.test.ts | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/packages/ollama/__tests__/ollama.live.test.ts b/packages/ollama/__tests__/ollama.live.test.ts
index 16b151f..2343c99 100644
--- a/packages/ollama/__tests__/ollama.live.test.ts
+++ b/packages/ollama/__tests__/ollama.live.test.ts
@@ -118,6 +118,69 @@ describeSmoke('Ollama live smoke', () => {
   });
 });
 
+describeSmoke('generateWithUsage token metering', () => {
+  jest.setTimeout(60_000);
+
+  it('returns content and non-zero usage in batch mode', async () => {
+    const client = new OllamaClient(baseUrl);
+    const result = await client.generateWithUsage({
+      model: modelId,
+      prompt: 'Reply with exactly the single word PING and nothing else.',
+      maxTokens: 128,
+      temperature: 0,
+    });
+
+    expect(result.content.toLowerCase()).toContain('ping');
+    expect(result.model).toBeTruthy();
+    expect(result.usage.input).toBeGreaterThan(0);
+    expect(result.usage.output).toBeGreaterThan(0);
+    expect(result.usage.totalTokens).toBeGreaterThanOrEqual(
+      result.usage.input + result.usage.output,
+    );
+    expect(result.stopReason).toBe('stop');
+  });
+
+  it('streams chunks and returns usage after completion', async () => {
+    const client = new OllamaClient(baseUrl);
+    const chunks: string[] = [];
+    const result = await client.generateWithUsage(
+      {
+        model: modelId,
+        prompt: 'Reply with exactly the single word PONG and nothing else.',
+        stream: true,
+        maxTokens: 128,
+        temperature: 0,
+      },
+      (chunk: string) => {
+        chunks.push(chunk);
+      },
+    );
+
+    expect(chunks.length).toBeGreaterThan(0);
+    expect(result.content.toLowerCase()).toContain('pong');
+    expect(result.usage.output).toBeGreaterThan(0);
+    expect(result.usage.totalTokens).toBeGreaterThan(0);
+  });
+
+  it('returns token counts for multi-turn chat', async () => {
+    const client = new OllamaClient(baseUrl);
+    const result = await client.generateWithUsage({
+      model: modelId,
+      messages: [
+        { role: 'user', content: 'Say hello' },
+        { role: 'assistant', content: 'Hello!' },
+        { role: 'user', content: 'Now say goodbye in one word.' },
+      ],
+      maxTokens: 128,
+      temperature: 0,
+    });
+
+    expect(result.content.length).toBeGreaterThan(0);
+    expect(result.usage.input).toBeGreaterThan(0);
+    expect(result.usage.output).toBeGreaterThan(0);
+  });
+});
+
 describeExtended('Ollama live extended', () => {
   jest.setTimeout(60_000);
 

From b905ff2a09abde5139e29e436fc4e403e295035b Mon Sep 17 00:00:00 2001
From: Dan Lynch <pyramation@gmail.com>
Date: Wed, 20 May 2026 23:46:43 +0000
Subject: [PATCH 3/3] revert: remove generateWithUsage live tests (pending team
 approval)

---
 packages/ollama/__tests__/ollama.live.test.ts | 63 -------------------
 1 file changed, 63 deletions(-)

diff --git a/packages/ollama/__tests__/ollama.live.test.ts b/packages/ollama/__tests__/ollama.live.test.ts
index 2343c99..16b151f 100644
--- a/packages/ollama/__tests__/ollama.live.test.ts
+++ b/packages/ollama/__tests__/ollama.live.test.ts
@@ -118,69 +118,6 @@ describeSmoke('Ollama live smoke', () => {
   });
 });
 
-describeSmoke('generateWithUsage token metering', () => {
-  jest.setTimeout(60_000);
-
-  it('returns content and non-zero usage in batch mode', async () => {
-    const client = new OllamaClient(baseUrl);
-    const result = await client.generateWithUsage({
-      model: modelId,
-      prompt: 'Reply with exactly the single word PING and nothing else.',
-      maxTokens: 128,
-      temperature: 0,
-    });
-
-    expect(result.content.toLowerCase()).toContain('ping');
-    expect(result.model).toBeTruthy();
-    expect(result.usage.input).toBeGreaterThan(0);
-    expect(result.usage.output).toBeGreaterThan(0);
-    expect(result.usage.totalTokens).toBeGreaterThanOrEqual(
-      result.usage.input + result.usage.output,
-    );
-    expect(result.stopReason).toBe('stop');
-  });
-
-  it('streams chunks and returns usage after completion', async () => {
-    const client = new OllamaClient(baseUrl);
-    const chunks: string[] = [];
-    const result = await client.generateWithUsage(
-      {
-        model: modelId,
-        prompt: 'Reply with exactly the single word PONG and nothing else.',
-        stream: true,
-        maxTokens: 128,
-        temperature: 0,
-      },
-      (chunk: string) => {
-        chunks.push(chunk);
-      },
-    );
-
-    expect(chunks.length).toBeGreaterThan(0);
-    expect(result.content.toLowerCase()).toContain('pong');
-    expect(result.usage.output).toBeGreaterThan(0);
-    expect(result.usage.totalTokens).toBeGreaterThan(0);
-  });
-
-  it('returns token counts for multi-turn chat', async () => {
-    const client = new OllamaClient(baseUrl);
-    const result = await client.generateWithUsage({
-      model: modelId,
-      messages: [
-        { role: 'user', content: 'Say hello' },
-        { role: 'assistant', content: 'Hello!' },
-        { role: 'user', content: 'Now say goodbye in one word.' },
-      ],
-      maxTokens: 128,
-      temperature: 0,
-    });
-
-    expect(result.content.length).toBeGreaterThan(0);
-    expect(result.usage.input).toBeGreaterThan(0);
-    expect(result.usage.output).toBeGreaterThan(0);
-  });
-});
-
 describeExtended('Ollama live extended', () => {
   jest.setTimeout(60_000);