From 942e1219213c9565f96c5bc9c219f403616da124 Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Thu, 21 May 2026 22:23:41 +0000 Subject: [PATCH] feat(ollama): return real token counts from embedding endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Switch from deprecated /api/embeddings to /api/embed - generateEmbedding() now returns EmbeddingResult { embedding, promptTokens } instead of plain number[] — promptTokens comes from prompt_eval_count - Add OllamaAdapter.embed() convenience method - Update live tests to verify promptTokens > 0 - Update README with new return type and adapter example BREAKING CHANGE: generateEmbedding() return type changed from Promise to Promise --- packages/ollama/README.md | 13 ++++++--- packages/ollama/__tests__/ollama.live.test.ts | 25 +++++++++++++---- packages/ollama/src/index.ts | 28 +++++++++++++++---- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/packages/ollama/README.md b/packages/ollama/README.md index c80b9f8..10a6285 100644 --- a/packages/ollama/README.md +++ b/packages/ollama/README.md @@ -50,9 +50,10 @@ await client.generate( // Pull a model to local cache await client.pullModel('mistral'); -// Generate embeddings -const embedding = await client.generateEmbedding('Compute embeddings'); -console.log('Embedding vector length:', embedding.length); +// Generate embeddings (with token count from /api/embed) +const result = await client.generateEmbedding('Compute embeddings'); +console.log('Embedding vector length:', result.embedding.length); +console.log('Prompt tokens:', result.promptTokens); // Delete a pulled model when done await client.deleteModel('mistral'); @@ -64,7 +65,7 @@ await client.deleteModel('mistral'); - `.listModels(): Promise` - `.showModel(model: string): Promise<{ capabilities?: string[] } | null>` - `.generate(input: GenerateInput, onChunk?: (chunk: string) => void): Promise` -- `.generateEmbedding(text: string, model?: string): Promise` — defaults to `nomic-embed-text` +- `.generateEmbedding(text: string, model?: string): Promise` — returns `{ embedding: number[], promptTokens: number }`, defaults to `nomic-embed-text` - `.pullModel(model: string): Promise` - `.deleteModel(model: string): Promise` @@ -75,6 +76,10 @@ import { OllamaAdapter } from '@agentic-kit/ollama'; const provider = new OllamaAdapter('http://localhost:11434'); const model = provider.createModel('llama3'); + +// Embeddings with real token counts +const result = await provider.embed('Compute embeddings', 'nomic-embed-text'); +console.log(result.embedding.length, result.promptTokens); ``` ## Local Live Tests diff --git a/packages/ollama/__tests__/ollama.live.test.ts b/packages/ollama/__tests__/ollama.live.test.ts index 4c3f1ea..ab8af2b 100644 --- a/packages/ollama/__tests__/ollama.live.test.ts +++ b/packages/ollama/__tests__/ollama.live.test.ts @@ -205,13 +205,28 @@ describeExtended('Ollama live extended', () => { expect(output.trim().toLowerCase()).toContain('marble'); }); - itWithEmbeddings('generates local embeddings when an embed model is installed', async () => { + itWithEmbeddings('generates local embeddings with token count via /api/embed', async () => { const client = new OllamaClient(baseUrl); - const embedding = await client.generateEmbedding('hello world', embedModel); + const result = await client.generateEmbedding('hello world', embedModel); + + expect(result).toHaveProperty('embedding'); + expect(result).toHaveProperty('promptTokens'); + expect(Array.isArray(result.embedding)).toBe(true); + expect(result.embedding.length).toBeGreaterThan(0); + expect(result.embedding.every((value) => Number.isFinite(value))).toBe(true); + expect(result.promptTokens).toBeGreaterThan(0); + }); + + itWithEmbeddings('OllamaAdapter.embed() returns embedding with token count', async () => { + const { OllamaAdapter } = require('../src/index'); + const adapter = new OllamaAdapter(baseUrl); + const result = await adapter.embed('hello world', embedModel); - expect(Array.isArray(embedding)).toBe(true); - expect(embedding.length).toBeGreaterThan(0); - expect(embedding.every((value) => Number.isFinite(value))).toBe(true); + expect(result).toHaveProperty('embedding'); + expect(result).toHaveProperty('promptTokens'); + expect(Array.isArray(result.embedding)).toBe(true); + expect(result.embedding.length).toBeGreaterThan(0); + expect(result.promptTokens).toBeGreaterThan(0); }); }); diff --git a/packages/ollama/src/index.ts b/packages/ollama/src/index.ts index 02695b1..37961f5 100644 --- a/packages/ollama/src/index.ts +++ b/packages/ollama/src/index.ts @@ -246,8 +246,17 @@ interface OllamaChatLine { response?: string; } -interface OllamaEmbeddingResponse { +interface OllamaEmbedResponse { + model: string; + embeddings: number[][]; + total_duration?: number; + load_duration?: number; + prompt_eval_count?: number; +} + +export interface EmbeddingResult { embedding: number[]; + promptTokens: number; } export const OLLAMA_MODELS: ModelDescriptor[] = []; @@ -297,18 +306,21 @@ export class OllamaClient { } } - async generateEmbedding(text: string, model = 'nomic-embed-text'): Promise { - const response = await fetch(`${this.baseUrl}/api/embeddings`, { + async generateEmbedding(text: string, model = 'nomic-embed-text'): Promise { + const response = await fetch(`${this.baseUrl}/api/embed`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ model, prompt: text }), + body: JSON.stringify({ model, input: text }), }); if (!response.ok) { throw new Error(`generateEmbedding failed: ${response.status} ${response.statusText}`); } - const payload = (await response.json()) as OllamaEmbeddingResponse; - return payload.embedding; + const payload = (await response.json()) as OllamaEmbedResponse; + return { + embedding: payload.embeddings[0], + promptTokens: payload.prompt_eval_count ?? 0, + }; } async generate(input: GenerateInput): Promise; @@ -385,6 +397,10 @@ export class OllamaAdapter { return this.client.listModels(); } + async embed(text: string, model = 'nomic-embed-text'): Promise { + return this.client.generateEmbedding(text, model); + } + stream(model: ModelDescriptor, context: Context, options?: StreamOptions): AssistantMessageEventStream { const stream = new DefaultAssistantMessageEventStream(); const output = createAssistantMessage(model);