diff --git a/config/lanes.example.yaml b/config/lanes.example.yaml index dc42765..f6e29a7 100644 --- a/config/lanes.example.yaml +++ b/config/lanes.example.yaml @@ -33,7 +33,7 @@ lanes: - id: claude-native kind: cli - model: claude-opus-4-7 + model: claude-opus@latest # self-updating: tracks the newest PRICED claude-opus (e.g. claude-opus-4-8) — never a hard-pinned version trust_mode: full costBasis: subscription provenance: anthropic @@ -106,14 +106,14 @@ lanes: # run with TOKENMAXED_DISABLE=1 so they never re-enter routing / recurse.) - id: claude-haiku kind: cli - model: claude-haiku-4-5-20251001 + model: claude-haiku@latest # self-updating; the {model} arg below spawns the resolved id trust_mode: full costBasis: subscription provenance: anthropic jurisdiction: US execution_mode: answer-only command: claude - args: ["-p", "--model", "claude-haiku-4-5-20251001"] + args: ["-p", "--model", "{model}"] # {model} ⇒ the resolved newest priced claude-haiku # Secondary / in-family manager. Codex above is the default host-turn reviewer # (first eligible in file order). On the escalation path this lane can only # independently review an offloaded output in categories where it isn't weaker @@ -132,11 +132,37 @@ lanes: explain: 0.82 codegen: 0.72 + # Full-access Sonnet (Claude Code): a stronger in-family offload than Haiku for + # bounded subtasks, still on the same subscription (no metered $). Self-updating via + # claude-sonnet@latest; the {model} arg spawns the resolved id. As a manager it is a + # capable INDEPENDENT reviewer for Codex's strong categories (which Haiku can't cover). + - id: claude-sonnet + kind: cli + model: claude-sonnet@latest + trust_mode: full + costBasis: subscription + provenance: anthropic + jurisdiction: US + execution_mode: answer-only + command: claude + args: ["-p", "--model", "{model}"] # {model} ⇒ the resolved newest priced claude-sonnet + roles: [manager] + manager_allowed: true + capability: + feature: 0.90 + refactor: 0.86 + bugfix: 0.85 + boilerplate: 0.88 + explain: 0.88 + codegen: 0.85 + docs: 0.86 + # --- Other provider lanes (SUPPORTED) --------------------------------------- # Popular vendors, shipped as SAFE inert TEMPLATES: each is `blocked` (never - # selected) until YOU pick a trust_mode. (The only enabled defaults ABOVE are the - # host `claude-native`, `codex-cli` (the default reviewer), and the in-family - # `claude-haiku` — all first-party or availability-gated. `ollama-llama3` ships + # selected) until YOU pick a trust_mode. (The enabled defaults ABOVE are the host + # `claude-native`, `codex-cli` (the default reviewer), and the in-family Claude Code + # lanes `claude-haiku` + `claude-sonnet` — all first-party or availability-gated. + # `ollama-llama3` ships # `blocked` too: a local server isn't assumed. In your own config, set every # lane's trust deliberately.) The vendor lanes here add nothing to your trust # surface until you change `blocked`. Trust ladder: blocked < worker < reader < full. @@ -154,6 +180,10 @@ lanes: # BYOK api lanes: put the key in env TOKENMAXED_KEY_ (never in this file). # api lanes speak the OpenAI /chat/completions schema — point `endpoint` at your # provider's OpenAI-COMPATIBLE chat-completions URL (not a vendor-proprietary one). + # COST: `costBasis` is YOUR billing model — NOT implied by `api`. Many vendors (e.g. + # MiniMax) are a flat-rate SUBSCRIPTION token ⇒ `costBasis: subscription` (treated as + # $0 and preferred by routing, like a CLI subscription); pay-per-token ⇒ `metered`. + # /tokenmaxed:setup ASKS you per api lane — TokenMaxed never assumes metered. # CLI lanes (gemini-cli, kimi-cli) can only be `full` or `blocked` — worker/reader # executors are API-only, so a CLI lane set to worker/reader loads but never routes. @@ -199,9 +229,24 @@ lanes: # automatically (family from the price table); for an # unpriced pin add `model_family: minimax`. trust_mode: blocked # → worker (no repo) or reader (repo-read; see above) or full - costBasis: metered + costBasis: subscription # MiniMax is typically a flat-rate SUBSCRIPTION token (not pay-per-token); /tokenmaxed:setup confirms this with you provenance: minimax jurisdiction: CN endpoint: https://api.minimax.io/v1/chat/completions # use the OpenAI-compatible URL authHandle: MINIMAX capability: { codegen: 0.80, boilerplate: 0.80, docs: 0.76 } + + # Sonnet via Anthropic API (BYOK) — an OPT-IN alternative to the Claude Code CLI + # lane above, NOT a default. When the `claude` CLI is available, the subscription + # CLI lane runs Sonnet (no metered $); this is here only if you deliberately want + # Sonnet over the API. Flip trust_mode to worker/full to enable. Self-updating. + - id: claude-sonnet-api + kind: api + model: claude-sonnet@latest + trust_mode: blocked + costBasis: metered + provenance: anthropic + jurisdiction: US + endpoint: https://api.anthropic.com/v1/chat/completions # Anthropic's OpenAI-compatible endpoint + authHandle: ANTHROPIC + capability: { codegen: 0.85, boilerplate: 0.86, docs: 0.86, explain: 0.88 } diff --git a/config/prices.seed.json b/config/prices.seed.json index 149b25e..11326d7 100644 --- a/config/prices.seed.json +++ b/config/prices.seed.json @@ -1,8 +1,10 @@ { "schema_version": 2, - "frontier_model": "claude-opus-4-7", + "frontier_model": "claude-opus-4-8", "models": { + "claude-opus-4-8": { "inputPer1M": 5, "outputPer1M": 25, "family": "claude-opus", "released": "2026-05-15" }, "claude-opus-4-7": { "inputPer1M": 15, "outputPer1M": 75, "family": "claude-opus", "released": "2026-01-15" }, + "claude-sonnet-4-6": { "inputPer1M": 3, "outputPer1M": 15, "family": "claude-sonnet", "released": "2025-11-14" }, "claude-haiku-4-5-20251001": { "inputPer1M": 1, "outputPer1M": 5, "family": "claude-haiku", "released": "2025-10-01" }, "gpt-5.5": { "inputPer1M": 10, "outputPer1M": 30, "family": "gpt", "released": "2026-02-01" }, "llama3.1:8b": { "inputPer1M": 0, "outputPer1M": 0, "family": "llama", "released": "2024-07-23" }, diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 6bafc04..f1d614c 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -34,10 +34,11 @@ export { pricedIdsInFamily, newestPricedInFamily, resolveLaneModel, + staleAgainstPriceTable, sameFamily, assessStaleness, } from './model-freshness.ts'; -export type { ModelSpec, FamilyModel, StalenessReport } from './model-freshness.ts'; +export type { ModelSpec, FamilyModel, StalenessReport, PriceTableStaleness } from './model-freshness.ts'; export { LedgerError, EVENT_FIELDS, diff --git a/packages/core/src/model-freshness.ts b/packages/core/src/model-freshness.ts index 8c3c8e1..dd13b17 100644 --- a/packages/core/src/model-freshness.ts +++ b/packages/core/src/model-freshness.ts @@ -102,6 +102,50 @@ export function resolveLaneModel(lane: L, table: Pr return concrete ? { ...lane, model: concrete } : lane; } +/** A price-table-derived staleness finding (egress-free; covers any lane kind). */ +export interface PriceTableStaleness { + laneId: string; + /** The family the comparison was made within. */ + family: string; + /** The concrete model the lane currently uses (an `@latest` alias already resolved). */ + pinned: string; + /** The newest priced model in the family — what the lane should be on. */ + newest: string; +} + +/** + * Check each lane against the PRICE TABLE ONLY (no vendor `/models` call), so it is + * safe to run on the session-start path for EVERY lane kind — including the CLI/native + * Claude lanes that the live, api-only staleness check never sees. A lane is flagged + * when the concrete model it would use (a `@latest` alias resolved to the + * newest priced id; a concrete pin taken as-is) is OLDER than the newest priced model + * in its family. The family is taken from a `@latest` stem, the lane's explicit + * `model_family`, or the price table's metadata for the concrete id — NEVER guessed + * from the id string. Lanes with no resolvable family (or already on the newest priced + * model) produce no finding. An `@latest` lane is therefore self-correcting: it always + * resolves to the newest priced model, so it is never flagged. Pure — no I/O. + */ +export function staleAgainstPriceTable( + lanes: readonly L[], + table: PriceTable, +): PriceTableStaleness[] { + const out: PriceTableStaleness[] = []; + for (const lane of lanes) { + const spec = parseModelAlias(lane.model); + // The concrete model the lane would actually use. + const pinned = spec.latest ? newestPricedInFamily(table, spec.family) : spec.id; + if (!pinned) continue; // an @latest alias with no priced family member ⇒ handled elsewhere. + const family = spec.latest ? spec.family : (lane.model_family ?? table.models[pinned]?.family); + if (!family) continue; // unknown family ⇒ can't judge (no prefix guessing). + const newest = newestPricedInFamily(table, family); + // Flag only when a strictly-newer priced model exists in the family. + if (newest && newest !== pinned && compareNewestFirst(table, newest, pinned) < 0) { + out.push({ laneId: lane.id, family, pinned, newest }); + } + } + return out; +} + /** A model id (optionally with a vendor `created` epoch) for family matching. */ export interface FamilyModel { id: string; diff --git a/packages/core/src/node.ts b/packages/core/src/node.ts index dc71bdf..6b3f7d3 100644 --- a/packages/core/src/node.ts +++ b/packages/core/src/node.ts @@ -449,7 +449,13 @@ export function makeCliExecutor(spawnImpl?: SpawnLike): TrustedExecFn { return async (lane, instruction, attachments) => { if (!lane.command) throw new Error(`cli lane "${lane.id}" has no command configured`); const input = combinedPrompt(instruction, attachments); - const res = spawn(lane.command, lane.args ?? [], { input, encoding: 'utf8', maxBuffer: 64 * 1024 * 1024 }); + // `{model}` placeholder substitution: a CLI lane can pass `--model {model}` in its + // args instead of hard-pinning a version, so the spawn always uses the lane's + // CURRENT model. By the time a lane reaches the executor its `model` is already the + // concrete, price-table-resolved id (a `@latest` alias has been resolved on + // the routing path), so this keeps CLI lanes self-updating with no stale literal. + const args = (lane.args ?? []).map((a) => a.replaceAll('{model}', lane.model)); + const res = spawn(lane.command, args, { input, encoding: 'utf8', maxBuffer: 64 * 1024 * 1024 }); if (res.error) throw new LaneFailure('provider_error', `cli lane "${lane.id}" failed to spawn`); if (res.status !== 0) throw new LaneFailure('provider_error', `cli lane "${lane.id}" exited with status ${res.status}`); return { resultText: res.stdout ?? '' }; // CLIs rarely report tokens ⇒ estimated downstream diff --git a/packages/core/src/registry.ts b/packages/core/src/registry.ts index 52f51bf..d944855 100644 --- a/packages/core/src/registry.ts +++ b/packages/core/src/registry.ts @@ -226,17 +226,16 @@ function parseLane(entry: unknown, index: number): Lane { throw new LaneConfigError(`${at('endpoint')}: an api lane requires an endpoint.`); } } - // A `@latest` alias is resolved against the price table at routing time. - // Reject anything ending in "@latest" that isn't a well-formed alias on an api - // lane: bare "@latest" (empty family stem) would otherwise parse as a concrete id - // and could reach execution literally; CLI/local lanes pin a concrete model. - if (lane.model.trim().endsWith('@latest')) { - if (lane.kind !== 'api') { - throw new LaneConfigError(`${at('model')}: a "@latest" alias is only supported on api lanes.`); - } - if (!parseModelAlias(lane.model).latest) { - throw new LaneConfigError(`${at('model')}: "@latest" needs a family stem, e.g. "minimax@latest".`); - } + // A `@latest` alias is resolved against the price table at routing time + // (and on the summary path), so a lane tracks the newest priced model in its family + // instead of hard-pinning a version that silently goes stale. Supported on ANY lane + // kind: api lanes send the resolved id in the request body; cli/local lanes spawn the + // resolved id via a `{model}` arg placeholder (see makeCliExecutor) or, for the native + // host lane, use it for pricing/display only. We only reject a MALFORMED alias — bare + // "@latest" with an empty family stem — which would otherwise parse as a concrete id + // and could reach execution literally. + if (lane.model.trim().endsWith('@latest') && !parseModelAlias(lane.model).latest) { + throw new LaneConfigError(`${at('model')}: "@latest" needs a family stem, e.g. "claude-opus@latest".`); } return lane; } diff --git a/packages/core/test/model-freshness.test.ts b/packages/core/test/model-freshness.test.ts index 0779f7c..79d8226 100644 --- a/packages/core/test/model-freshness.test.ts +++ b/packages/core/test/model-freshness.test.ts @@ -13,6 +13,7 @@ import { pricedIdsInFamily, newestPricedInFamily, resolveLaneModel, + staleAgainstPriceTable, sameFamily, assessStaleness, } from '../src/model-freshness.ts'; @@ -127,3 +128,33 @@ test('newestPricedInFamily falls back to version order when releases are absent' }; assert.equal(newestPricedInFamily(noDates, 'foo'), 'foo-10'); // 10 > 2 numerically }); + +// --- staleAgainstPriceTable: the egress-free "are the latest models in use?" check --- + +test('staleAgainstPriceTable flags a concrete pin behind the newest priced in family', () => { + // Covers ANY lane kind — here a cli lane pinned to an older minimax. + const found = staleAgainstPriceTable([{ id: 'l', model: 'minimax-m2' }], table); + assert.equal(found.length, 1); + assert.deepEqual(found[0], { laneId: 'l', family: 'minimax', pinned: 'minimax-m2', newest: 'minimax-m3' }); +}); + +test('staleAgainstPriceTable does NOT flag a lane already on the newest priced model', () => { + assert.deepEqual(staleAgainstPriceTable([{ id: 'l', model: 'minimax-m3' }], table), []); +}); + +test('staleAgainstPriceTable never flags a @latest lane (it resolves to newest)', () => { + // This is why @latest is the fix: a self-updating lane is never "behind". + assert.deepEqual(staleAgainstPriceTable([{ id: 'l', model: 'minimax@latest' }], table), []); +}); + +test('staleAgainstPriceTable skips a pin with no resolvable family (no prefix guessing)', () => { + // `opus` has no family metadata and the lane sets no model_family ⇒ cannot judge. + assert.deepEqual(staleAgainstPriceTable([{ id: 'l', model: 'opus' }], table), []); +}); + +test('staleAgainstPriceTable uses an explicit model_family to judge an unpriced pin', () => { + const found = staleAgainstPriceTable([{ id: 'l', model: 'minimax-m1', model_family: 'minimax' }], table); + assert.equal(found.length, 1); + assert.equal(found[0]!.pinned, 'minimax-m1'); + assert.equal(found[0]!.newest, 'minimax-m3'); +}); diff --git a/packages/core/test/node-executors.test.ts b/packages/core/test/node-executors.test.ts index ee1ac06..efccade 100644 --- a/packages/core/test/node-executors.test.ts +++ b/packages/core/test/node-executors.test.ts @@ -64,6 +64,23 @@ test('makeCliExecutor throws on a non-zero exit (so runTask degrades)', async () await assert.rejects(() => exec(codexCli, 'x')); }); +test('makeCliExecutor substitutes the {model} placeholder with the resolved lane model', async () => { + // MODEL-FRESHNESS: a cli lane uses `--model {model}` instead of a hard-pinned id, so + // the spawn always runs the lane's current (price-table-resolved) model. + let seen: readonly string[] = []; + const exec = makeCliExecutor((_cmd, args) => { + seen = args; + return { status: 0, stdout: 'ok' }; + }); + const sonnet: Lane = { + id: 'claude-sonnet', kind: 'cli', model: 'claude-sonnet-4-6', trust_mode: 'full', + costBasis: 'subscription', provenance: 'anthropic', jurisdiction: 'US', + command: 'claude', args: ['-p', '--model', '{model}'], capability: { codegen: 0.85 }, + }; + await exec(sonnet, 'do it'); + assert.deepEqual(seen, ['-p', '--model', 'claude-sonnet-4-6']); // {model} ⇒ lane.model +}); + test('makeOllamaExecutor posts to /api/generate and maps eval counts to usage', async () => { let url: string | undefined; const exec = makeOllamaExecutor(async (u) => { diff --git a/packages/core/test/price.test.ts b/packages/core/test/price.test.ts index b969bc9..b6e0f1f 100644 --- a/packages/core/test/price.test.ts +++ b/packages/core/test/price.test.ts @@ -171,10 +171,19 @@ test('validatePriceTable rejects a non-object', () => { test('loadPriceTable reads and validates the shipped seed file', () => { const seedPath = new URL('../../../config/prices.seed.json', import.meta.url); const t = loadPriceTable(seedPath); - assert.equal(t.frontier_model, 'claude-opus-4-7'); + // MODEL-FRESHNESS: the frontier baseline tracks the current most-capable Claude + // (claude-opus-4-8); the previous frontier stays priced for back-compat. + assert.equal(t.frontier_model, 'claude-opus-4-8'); assert.equal(t.schema_version, 2); // MODEL-FRESHNESS: metadata-carrying seed - assert.equal(Object.keys(t.models).length, 9); + assert.equal(Object.keys(t.models).length, 11); + assert.equal(t.models['claude-opus-4-8']?.inputPer1M, 5); + assert.equal(t.models['claude-opus-4-8']?.outputPer1M, 25); + assert.equal(t.models['claude-opus-4-8']?.family, 'claude-opus'); assert.equal(t.models['claude-opus-4-7']?.inputPer1M, 15); + // Sonnet 4.6 priced + family-tagged so claude-sonnet@latest resolves to it. + assert.equal(t.models['claude-sonnet-4-6']?.inputPer1M, 3); + assert.equal(t.models['claude-sonnet-4-6']?.outputPer1M, 15); + assert.equal(t.models['claude-sonnet-4-6']?.family, 'claude-sonnet'); assert.equal(t.models['claude-haiku-4-5-20251001']?.outputPer1M, 5); // F2-S5: metered vendor models priced so opted-up reader/worker lanes are routable. assert.ok(t.models['glm-5.1']); diff --git a/packages/core/test/registry.test.ts b/packages/core/test/registry.test.ts index 3bbd947..a351364 100644 --- a/packages/core/test/registry.test.ts +++ b/packages/core/test/registry.test.ts @@ -268,19 +268,38 @@ test('rejects native on a non-full lane (contradictory)', () => { assert.throws(() => parseLaneConfig(cfg), { message: /native is only valid on a full-trust lane/ }); }); -test('rejects a @latest alias on a non-api lane (api-only)', () => { +test('accepts a @latest alias on a cli lane (resolved at routing/spawn via {model})', () => { + // MODEL-FRESHNESS: cli/local/native lanes may now self-update via @latest too — + // not just api lanes. The alias is stored verbatim; routing resolves it to a + // concrete priced id, and the cli executor substitutes it into a {model} arg. const cli = ` lanes: - id: x kind: cli - model: claude@latest + model: claude-opus@latest trust_mode: full costBasis: subscription provenance: anthropic jurisdiction: US command: claude + args: ["-p", "--model", "{model}"] `; - assert.throws(() => parseLaneConfig(cli), { message: /@latest" alias is only supported on api lanes/ }); + assert.equal(parseLaneConfig(cli).byId('x')?.model, 'claude-opus@latest'); +}); + +test('accepts a @latest alias on the native lane (pricing/display only)', () => { + const native = ` +lanes: + - id: claude-native + kind: cli + model: claude-opus@latest + trust_mode: full + costBasis: subscription + provenance: anthropic + jurisdiction: US + native: true +`; + assert.equal(parseLaneConfig(native).byId('claude-native')?.model, 'claude-opus@latest'); }); test('rejects a bare "@latest" with no family stem (would otherwise execute literally)', () => { @@ -319,8 +338,17 @@ test('loadLaneConfig reads and validates the shipped example file', () => { // Pass the file: URL directly; loadLaneConfig handles URL→path (and spaces). const examplePath = new URL('../../../config/lanes.example.yaml', import.meta.url); const reg = loadLaneConfig(examplePath); - assert.equal(reg.lanes.length, 8); + assert.equal(reg.lanes.length, 10); assert.ok(reg.byId('claude-native')); + // MODEL-FRESHNESS: the host lane self-updates via @latest instead of a hard pin. + assert.equal(reg.byId('claude-native')?.model, 'claude-opus@latest'); + // Sonnet's DEFAULT is the full-access Claude Code (CLI subscription) lane, self-updating. + assert.equal(reg.byId('claude-sonnet')?.trust_mode, 'full'); + assert.equal(reg.byId('claude-sonnet')?.kind, 'cli'); + assert.equal(reg.byId('claude-sonnet')?.model, 'claude-sonnet@latest'); + // The Anthropic API Sonnet lane ships as an OPT-IN template (blocked, not a default). + assert.equal(reg.byId('claude-sonnet-api')?.trust_mode, 'blocked'); + assert.equal(reg.byId('claude-sonnet-api')?.kind, 'api'); // CONFIG-1: enabled defaults are the host + the default reviewer (codex) + the // in-family cheaper-Claude lane — all first-party or availability-gated. assert.equal(reg.byId('codex-cli')?.trust_mode, 'full', 'codex-cli is the default reviewer (full)'); @@ -335,7 +363,7 @@ test('loadLaneConfig reads and validates the shipped example file', () => { // assumed) — it ships blocked alongside the named vendor templates. Only the host, // the reviewer, and the in-family lane are trusted out of the box; the rest are the // user's deliberate choice. - for (const id of ['ollama-llama3', 'gemini-cli', 'kimi-cli', 'glm-api', 'minimax-api']) { + for (const id of ['ollama-llama3', 'gemini-cli', 'kimi-cli', 'glm-api', 'minimax-api', 'claude-sonnet-api']) { assert.equal(reg.byId(id)?.trust_mode, 'blocked', `${id} must ship blocked`); } // MODEL-FRESHNESS: api vendor templates default to @latest so enabling one diff --git a/packages/mcp/lanes.starter.yaml b/packages/mcp/lanes.starter.yaml index dc42765..f6e29a7 100644 --- a/packages/mcp/lanes.starter.yaml +++ b/packages/mcp/lanes.starter.yaml @@ -33,7 +33,7 @@ lanes: - id: claude-native kind: cli - model: claude-opus-4-7 + model: claude-opus@latest # self-updating: tracks the newest PRICED claude-opus (e.g. claude-opus-4-8) — never a hard-pinned version trust_mode: full costBasis: subscription provenance: anthropic @@ -106,14 +106,14 @@ lanes: # run with TOKENMAXED_DISABLE=1 so they never re-enter routing / recurse.) - id: claude-haiku kind: cli - model: claude-haiku-4-5-20251001 + model: claude-haiku@latest # self-updating; the {model} arg below spawns the resolved id trust_mode: full costBasis: subscription provenance: anthropic jurisdiction: US execution_mode: answer-only command: claude - args: ["-p", "--model", "claude-haiku-4-5-20251001"] + args: ["-p", "--model", "{model}"] # {model} ⇒ the resolved newest priced claude-haiku # Secondary / in-family manager. Codex above is the default host-turn reviewer # (first eligible in file order). On the escalation path this lane can only # independently review an offloaded output in categories where it isn't weaker @@ -132,11 +132,37 @@ lanes: explain: 0.82 codegen: 0.72 + # Full-access Sonnet (Claude Code): a stronger in-family offload than Haiku for + # bounded subtasks, still on the same subscription (no metered $). Self-updating via + # claude-sonnet@latest; the {model} arg spawns the resolved id. As a manager it is a + # capable INDEPENDENT reviewer for Codex's strong categories (which Haiku can't cover). + - id: claude-sonnet + kind: cli + model: claude-sonnet@latest + trust_mode: full + costBasis: subscription + provenance: anthropic + jurisdiction: US + execution_mode: answer-only + command: claude + args: ["-p", "--model", "{model}"] # {model} ⇒ the resolved newest priced claude-sonnet + roles: [manager] + manager_allowed: true + capability: + feature: 0.90 + refactor: 0.86 + bugfix: 0.85 + boilerplate: 0.88 + explain: 0.88 + codegen: 0.85 + docs: 0.86 + # --- Other provider lanes (SUPPORTED) --------------------------------------- # Popular vendors, shipped as SAFE inert TEMPLATES: each is `blocked` (never - # selected) until YOU pick a trust_mode. (The only enabled defaults ABOVE are the - # host `claude-native`, `codex-cli` (the default reviewer), and the in-family - # `claude-haiku` — all first-party or availability-gated. `ollama-llama3` ships + # selected) until YOU pick a trust_mode. (The enabled defaults ABOVE are the host + # `claude-native`, `codex-cli` (the default reviewer), and the in-family Claude Code + # lanes `claude-haiku` + `claude-sonnet` — all first-party or availability-gated. + # `ollama-llama3` ships # `blocked` too: a local server isn't assumed. In your own config, set every # lane's trust deliberately.) The vendor lanes here add nothing to your trust # surface until you change `blocked`. Trust ladder: blocked < worker < reader < full. @@ -154,6 +180,10 @@ lanes: # BYOK api lanes: put the key in env TOKENMAXED_KEY_ (never in this file). # api lanes speak the OpenAI /chat/completions schema — point `endpoint` at your # provider's OpenAI-COMPATIBLE chat-completions URL (not a vendor-proprietary one). + # COST: `costBasis` is YOUR billing model — NOT implied by `api`. Many vendors (e.g. + # MiniMax) are a flat-rate SUBSCRIPTION token ⇒ `costBasis: subscription` (treated as + # $0 and preferred by routing, like a CLI subscription); pay-per-token ⇒ `metered`. + # /tokenmaxed:setup ASKS you per api lane — TokenMaxed never assumes metered. # CLI lanes (gemini-cli, kimi-cli) can only be `full` or `blocked` — worker/reader # executors are API-only, so a CLI lane set to worker/reader loads but never routes. @@ -199,9 +229,24 @@ lanes: # automatically (family from the price table); for an # unpriced pin add `model_family: minimax`. trust_mode: blocked # → worker (no repo) or reader (repo-read; see above) or full - costBasis: metered + costBasis: subscription # MiniMax is typically a flat-rate SUBSCRIPTION token (not pay-per-token); /tokenmaxed:setup confirms this with you provenance: minimax jurisdiction: CN endpoint: https://api.minimax.io/v1/chat/completions # use the OpenAI-compatible URL authHandle: MINIMAX capability: { codegen: 0.80, boilerplate: 0.80, docs: 0.76 } + + # Sonnet via Anthropic API (BYOK) — an OPT-IN alternative to the Claude Code CLI + # lane above, NOT a default. When the `claude` CLI is available, the subscription + # CLI lane runs Sonnet (no metered $); this is here only if you deliberately want + # Sonnet over the API. Flip trust_mode to worker/full to enable. Self-updating. + - id: claude-sonnet-api + kind: api + model: claude-sonnet@latest + trust_mode: blocked + costBasis: metered + provenance: anthropic + jurisdiction: US + endpoint: https://api.anthropic.com/v1/chat/completions # Anthropic's OpenAI-compatible endpoint + authHandle: ANTHROPIC + capability: { codegen: 0.85, boilerplate: 0.86, docs: 0.86, explain: 0.88 } diff --git a/packages/mcp/prices.seed.json b/packages/mcp/prices.seed.json index 149b25e..11326d7 100644 --- a/packages/mcp/prices.seed.json +++ b/packages/mcp/prices.seed.json @@ -1,8 +1,10 @@ { "schema_version": 2, - "frontier_model": "claude-opus-4-7", + "frontier_model": "claude-opus-4-8", "models": { + "claude-opus-4-8": { "inputPer1M": 5, "outputPer1M": 25, "family": "claude-opus", "released": "2026-05-15" }, "claude-opus-4-7": { "inputPer1M": 15, "outputPer1M": 75, "family": "claude-opus", "released": "2026-01-15" }, + "claude-sonnet-4-6": { "inputPer1M": 3, "outputPer1M": 15, "family": "claude-sonnet", "released": "2025-11-14" }, "claude-haiku-4-5-20251001": { "inputPer1M": 1, "outputPer1M": 5, "family": "claude-haiku", "released": "2025-10-01" }, "gpt-5.5": { "inputPer1M": 10, "outputPer1M": 30, "family": "gpt", "released": "2026-02-01" }, "llama3.1:8b": { "inputPer1M": 0, "outputPer1M": 0, "family": "llama", "released": "2024-07-23" }, diff --git a/packages/mcp/src/host-review.ts b/packages/mcp/src/host-review.ts index 989b95f..bc0c907 100644 --- a/packages/mcp/src/host-review.ts +++ b/packages/mcp/src/host-review.ts @@ -13,12 +13,14 @@ import { spawnSync } from 'node:child_process'; import { randomUUID } from 'node:crypto'; import { existsSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; -import { review } from '@tokenmaxed/core'; +import { parseModelAlias, resolveLaneModel, review } from '@tokenmaxed/core'; import type { Lane, OutcomeEventInput, Policy, ReviewVerdict } from '@tokenmaxed/core'; import { JsonlLedger, loadLaneConfig, + loadPriceTable, makeCliExecutor, makeTrustedApiExecutor, makeTrustedExecutor, @@ -119,6 +121,9 @@ export function makeHostReviewDeps(env: NodeJS.ProcessEnv): HostReviewDeps { const cwd = env.CLAUDE_PROJECT_DIR ?? process.cwd(); const lanesPath = env.TOKENMAXED_LANES ?? homeFile('lanes.yaml'); const ledgerPath = env.TOKENMAXED_LEDGER; // undefined ⇒ JsonlLedger default + // Same package-relative seed the server/summary use, so the review path resolves + // `@latest` against the SAME price table everything else does. + const pricesPath = env.TOKENMAXED_PRICES ?? fileURLToPath(new URL('../prices.seed.json', import.meta.url)); const resolveAuth = makeResolveAuth(env); const executor = makeTrustedExecutor({ cli: makeCliExecutor(makeCliSpawn(REVIEW_CLI_TIMEOUT_MS)), @@ -136,7 +141,22 @@ export function makeHostReviewDeps(env: NodeJS.ProcessEnv): HostReviewDeps { const diff = res.stdout; return diff.length > MAX_DIFF_BYTES ? `${diff.slice(0, MAX_DIFF_BYTES)}\n\n[diff truncated for review]` : diff; }, - loadLanes: () => (existsSync(lanesPath) ? [...loadLaneConfig(lanesPath).lanes] : null), + loadLanes: () => { + if (!existsSync(lanesPath)) return null; + const raw = [...loadLaneConfig(lanesPath).lanes]; + // Resolve `@latest` to a concrete priced id BEFORE manager selection / + // execution — otherwise a manager on an alias (e.g. claude-haiku@latest) would + // spawn `claude --model claude-haiku@latest` (invalid) or send the alias in an + // API body. Mirrors server.ts's routing path. Drop any STILL-unresolved alias + // (no priced family member) so it can never be selected and fail to spawn. + let table; + try { + table = loadPriceTable(pricesPath); + } catch { + return raw; // no price table ⇒ best-effort (concrete pins still work) + } + return raw.map((l) => resolveLaneModel(l, table)).filter((l) => !parseModelAlias(l.model).latest); + }, availableLaneIds: makeAvailabilityProbe(env), loadPolicy: makeLoadPolicy(env), runManager: async (lane, prompt) => (await executor(lane, prompt)).resultText, diff --git a/packages/mcp/src/lane-setup.ts b/packages/mcp/src/lane-setup.ts index 039d87c..4fbbe9a 100644 --- a/packages/mcp/src/lane-setup.ts +++ b/packages/mcp/src/lane-setup.ts @@ -19,6 +19,9 @@ export interface LaneSetupRow { /** The raw `model` when it differs from `model` (i.e. an unresolved/resolved alias). */ rawModel?: string; trustMode: Lane['trust_mode']; + /** Billing model (how the user pays). For `api` lanes this is USER-ASSERTED, never + * inferred — setup asks subscription (flat token) vs metered (pay-per-token). */ + costBasis: Lane['costBasis']; executionMode: 'answer-only' | 'agentic'; /** 'reviewer' = the lane the host-turn review would use now; else manager-eligibility. */ role: 'active-reviewer' | 'manager-eligible' | 'none'; @@ -60,14 +63,25 @@ const ROLE_LABEL: Record = { export function formatLaneSetup(rows: readonly LaneSetupRow[]): string[] { if (rows.length === 0) return [' (no lanes configured)']; const lines: string[] = ['Lanes (what each may see/do, and whether it can run now):']; + let anyApi = false; for (const r of rows) { const model = r.rawModel && r.rawModel !== r.model ? `${r.rawModel} → ${r.model}` : r.model; const caps = r.capability && Object.keys(r.capability).length > 0 ? ' · caps ' + Object.entries(r.capability).map(([c, v]) => `${c}=${v}`).join(',') : ''; + // Surface billing so the user can confirm it — and flag API lanes, whose costBasis + // is the user's plan (subscription token vs metered), never inferred from "api". + const billing = r.kind === 'api' ? ` · billing=${r.costBasis} (confirm: subscription vs metered)` : ` · billing=${r.costBasis}`; + if (r.kind === 'api') anyApi = true; lines.push( ` • ${r.id} [${r.kind}] ${model} · trust=${r.trustMode} → ${permissionFor(r.trustMode, r.executionMode)}` + - ` · role=${ROLE_LABEL[r.role]} · ${r.available ? 'available' : 'unavailable now'}${caps}`, + `${billing} · role=${ROLE_LABEL[r.role]} · ${r.available ? 'available' : 'unavailable now'}${caps}`, + ); + } + if (anyApi) { + lines.push( + ' ⓘ For each api lane, confirm billing: a flat-rate subscription token (costBasis: subscription, ' + + 'treated as $0 and preferred) or pay-per-token (costBasis: metered). TokenMaxed never assumes — set it per YOUR plan.', ); } return lines; diff --git a/packages/mcp/src/setup.ts b/packages/mcp/src/setup.ts index 23b267c..42ecd8a 100644 --- a/packages/mcp/src/setup.ts +++ b/packages/mcp/src/setup.ts @@ -71,6 +71,7 @@ export async function runSetup(env: NodeJS.ProcessEnv): Promise { model: resolved, ...(resolved !== l.model ? { rawModel: l.model } : {}), trustMode: l.trust_mode, + costBasis: l.costBasis, executionMode: (l.execution_mode ?? 'answer-only') as 'answer-only' | 'agentic', role, available: !!l.native || available.has(l.id), diff --git a/packages/mcp/src/summary-deps.ts b/packages/mcp/src/summary-deps.ts index d3060ba..a0d7b01 100644 --- a/packages/mcp/src/summary-deps.ts +++ b/packages/mcp/src/summary-deps.ts @@ -13,7 +13,8 @@ import { existsSync, readFileSync } from 'node:fs'; import { dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; -import { filterEventsSince, summarize, tokenStats } from '@tokenmaxed/core'; +import { filterEventsSince, resolveLaneModel, staleAgainstPriceTable, summarize, tokenStats } from '@tokenmaxed/core'; +import type { PriceTable } from '@tokenmaxed/core'; import { JsonlLedger, loadLaneConfig, loadPriceTable } from '@tokenmaxed/core/node'; import { makeAvailabilityProbe } from './availability.ts'; @@ -69,29 +70,55 @@ export function makeSummaryFromEnv(env: NodeJS.ProcessEnv): () => Promise@latest` to the concrete + // model the banner displays AND backs the egress-free latest-model check below. + let priceTable: PriceTable | undefined; + if (existsSync(pricesPath)) { try { - staleness = await reportFreshness( + priceTable = loadPriceTable(pricesPath); + } catch { + priceTable = undefined; // a missing/bad price table ⇒ display raw, skip staleness + } + } + // Display the RESOLVED model (e.g. claude-opus@latest ⇒ claude-opus-4-8), so the + // banner shows the concrete model in use rather than the alias. Falls back to the + // raw lane when there is no price table (or the family isn't priced). + const displayLanes = priceTable ? lanes.map((l) => resolveLaneModel(l, priceTable!)) : lanes; + // "Are the latest models in use?" — checked at session start for EVERY lane kind + // against the price table only (no /models egress). An `@latest` lane resolves to + // the newest priced model so it's never flagged; a concrete pin that's behind the + // newest priced model in its family IS flagged (incl. the CLI Claude lanes the + // api-only live check never sees). This is the primary up-to-date signal. + const staleByLane = new Map(); + if (!globallyDisabled && priceTable) { + for (const f of staleAgainstPriceTable(lanes, priceTable)) { + staleByLane.set(f.laneId, { laneId: f.laneId, newest: f.newest, newestPriced: true }); + } + // Overlay the CACHE-ONLY live check (api lanes): it can report a newer model that + // isn't priced yet (a pricing gap the price-table check can't see), so it takes + // precedence per lane. refresh:false ⇒ never a /models call here; fetchList throws + // to prove no egress on the session-start path. + try { + for (const w of await reportFreshness( lanes, { fetchList: () => { throw new Error('summary path must not fetch'); }, - table: loadPriceTable(pricesPath), + table: priceTable, now, readCache: () => readFreshnessCache(cachePath), writeCache: () => {}, }, { refresh: false }, - ); + )) { + staleByLane.set(w.laneId, { laneId: w.laneId, newest: w.newest, newestPriced: w.newestPriced }); + } } catch { - staleness = []; // a missing/bad price table or cache ⇒ just omit staleness + /* a bad cache ⇒ keep the price-table findings already collected */ } } + const staleness = [...staleByLane.values()]; // SETUP-1 B hint: read-only — compare the RAW lane fingerprint to what setup last // recorded for this project. NEVER write here (only /tokenmaxed:setup marks seen). let laneReview: 'first-review' | 'changed' | 'current' = 'current'; @@ -102,7 +129,7 @@ export function makeSummaryFromEnv(env: NodeJS.ProcessEnv): () => Promise { }); const row = (over: Partial & { id: string }): LaneSetupRow => ({ - kind: 'cli', model: 'm', trustMode: 'full', executionMode: 'answer-only', role: 'none', available: true, ...over, + kind: 'cli', model: 'm', trustMode: 'full', costBasis: 'subscription', executionMode: 'answer-only', role: 'none', available: true, ...over, }); test('formatLaneSetup renders model, trust→permission, role, and availability', () => { const lines = formatLaneSetup([ row({ id: 'codex-cli', model: 'gpt-5.5', trustMode: 'full', role: 'active-reviewer' }), - row({ id: 'minimax-api', kind: 'api', model: 'minimax-m3', rawModel: 'minimax@latest', trustMode: 'worker', role: 'none', available: false }), + row({ id: 'minimax-api', kind: 'api', model: 'minimax-m3', rawModel: 'minimax@latest', trustMode: 'worker', costBasis: 'subscription', role: 'none', available: false }), ]); const text = lines.join('\n'); assert.match(text, /codex-cli \[cli\] gpt-5\.5 · trust=full.*role=reviewer \(active\) · available/); @@ -32,6 +32,18 @@ test('formatLaneSetup renders model, trust→permission, role, and availability' assert.match(text, /minimax-api \[api\] minimax@latest → minimax-m3 · trust=worker.*NO repo.*role=— · unavailable now/); }); +test('formatLaneSetup surfaces billing and prompts to confirm it for api lanes', () => { + // api lane: billing is shown AND flagged to confirm (never assumed from "api"). + const apiText = formatLaneSetup([row({ id: 'minimax-api', kind: 'api', costBasis: 'subscription' })]).join('\n'); + assert.match(apiText, /billing=subscription \(confirm: subscription vs metered\)/); + assert.match(apiText, /For each api lane, confirm billing/); + // cli lane: billing shown, no confirm prompt (subscription/local is unambiguous). + const cliText = formatLaneSetup([row({ id: 'codex-cli', kind: 'cli', costBasis: 'subscription' })]).join('\n'); + assert.match(cliText, /billing=subscription/); + assert.doesNotMatch(cliText, /confirm: subscription vs metered/); + assert.doesNotMatch(cliText, /For each api lane/); +}); + test('formatLaneSetup shows declared capability when present, and handles empty', () => { assert.match(formatLaneSetup([row({ id: 'x', capability: { docs: 0.8, bugfix: 0.6 } })]).join('\n'), /caps docs=0\.8,bugfix=0\.6/); assert.deepEqual(formatLaneSetup([]), [' (no lanes configured)']); diff --git a/packages/mcp/test/tools.test.ts b/packages/mcp/test/tools.test.ts index 440e363..3f6499f 100644 --- a/packages/mcp/test/tools.test.ts +++ b/packages/mcp/test/tools.test.ts @@ -492,8 +492,8 @@ test('setup reports the manager + open gate when present', async () => { readerEgress: true, tiered: true, lanes: [ - { id: 'codex-cli', kind: 'cli', model: 'gpt-5.5', trustMode: 'full', executionMode: 'answer-only', role: 'active-reviewer', available: true }, - { id: 'minimax-api', kind: 'api', model: 'minimax-m3', rawModel: 'minimax@latest', trustMode: 'worker', executionMode: 'answer-only', role: 'none', available: false }, + { id: 'codex-cli', kind: 'cli', model: 'gpt-5.5', trustMode: 'full', costBasis: 'subscription', executionMode: 'answer-only', role: 'active-reviewer', available: true }, + { id: 'minimax-api', kind: 'api', model: 'minimax-m3', rawModel: 'minimax@latest', trustMode: 'worker', costBasis: 'subscription', executionMode: 'answer-only', role: 'none', available: false }, ], laneReview: 'changed', }), diff --git a/packages/plugin/hooks/sessionstart.mjs b/packages/plugin/hooks/sessionstart.mjs index 1b77ae7..2287158 100755 --- a/packages/plugin/hooks/sessionstart.mjs +++ b/packages/plugin/hooks/sessionstart.mjs @@ -7613,6 +7613,27 @@ function newestPricedInFamily(table, family) { if (ids.length === 0) return void 0; return [...ids].sort((a, b) => compareNewestFirst(table, a, b))[0]; } +function resolveLaneModel(lane, table) { + const spec = parseModelAlias(lane.model); + if (!spec.latest) return lane; + const concrete = newestPricedInFamily(table, spec.family); + return concrete ? { ...lane, model: concrete } : lane; +} +function staleAgainstPriceTable(lanes, table) { + const out = []; + for (const lane of lanes) { + const spec = parseModelAlias(lane.model); + const pinned = spec.latest ? newestPricedInFamily(table, spec.family) : spec.id; + if (!pinned) continue; + const family = spec.latest ? spec.family : lane.model_family ?? table.models[pinned]?.family; + if (!family) continue; + const newest = newestPricedInFamily(table, family); + if (newest && newest !== pinned && compareNewestFirst(table, newest, pinned) < 0) { + out.push({ laneId: lane.id, family, pinned, newest }); + } + } + return out; +} function sameFamily(id, family) { if (id === family) return true; if (!id.startsWith(family)) return false; @@ -7795,13 +7816,8 @@ function parseLane(entry, index) { throw new LaneConfigError(`${at("endpoint")}: an api lane requires an endpoint.`); } } - if (lane.model.trim().endsWith("@latest")) { - if (lane.kind !== "api") { - throw new LaneConfigError(`${at("model")}: a "@latest" alias is only supported on api lanes.`); - } - if (!parseModelAlias(lane.model).latest) { - throw new LaneConfigError(`${at("model")}: "@latest" needs a family stem, e.g. "minimax@latest".`); - } + if (lane.model.trim().endsWith("@latest") && !parseModelAlias(lane.model).latest) { + throw new LaneConfigError(`${at("model")}: "@latest" needs a family stem, e.g. "claude-opus@latest".`); } return lane; } @@ -8790,27 +8806,41 @@ function makeSummaryFromEnv(env) { const lanes = existsSync5(lanesPath) ? [...loadLaneConfig(lanesPath).lanes] : []; const available = await probeAvailable(lanes); const now = Date.now(); - let staleness = []; - if (!globallyDisabled && existsSync5(pricesPath)) { + let priceTable; + if (existsSync5(pricesPath)) { + try { + priceTable = loadPriceTable(pricesPath); + } catch { + priceTable = void 0; + } + } + const displayLanes = priceTable ? lanes.map((l) => resolveLaneModel(l, priceTable)) : lanes; + const staleByLane = /* @__PURE__ */ new Map(); + if (!globallyDisabled && priceTable) { + for (const f of staleAgainstPriceTable(lanes, priceTable)) { + staleByLane.set(f.laneId, { laneId: f.laneId, newest: f.newest, newestPriced: true }); + } try { - staleness = await reportFreshness( + for (const w of await reportFreshness( lanes, { fetchList: () => { throw new Error("summary path must not fetch"); }, - table: loadPriceTable(pricesPath), + table: priceTable, now, readCache: () => readFreshnessCache(cachePath), writeCache: () => { } }, { refresh: false } - ); + )) { + staleByLane.set(w.laneId, { laneId: w.laneId, newest: w.newest, newestPriced: w.newestPriced }); + } } catch { - staleness = []; } } + const staleness = [...staleByLane.values()]; let laneReview = "current"; if (lanes.length > 0) { const prior = readLaneReviewState(laneStatePath).byProject[reviewProjectKey]?.fingerprint; @@ -8819,7 +8849,7 @@ function makeSummaryFromEnv(env) { } return buildSummaryData({ events: new JsonlLedger(ledgerPath).readAll(), - lanes, + lanes: displayLanes, policy: loadPolicy(), availableLaneIds: available, gateReady, diff --git a/packages/plugin/hooks/stop.mjs b/packages/plugin/hooks/stop.mjs index a4cbf83..73013e6 100755 --- a/packages/plugin/hooks/stop.mjs +++ b/packages/plugin/hooks/stop.mjs @@ -7375,6 +7375,7 @@ import { join as join4 } from "node:path"; import { spawnSync as spawnSync3 } from "node:child_process"; import { randomUUID as randomUUID2 } from "node:crypto"; import { existsSync as existsSync3 } from "node:fs"; +import { fileURLToPath as fileURLToPath2 } from "node:url"; // ../core/src/types.ts var TRUST_MODES = ["full", "worker", "reader", "blocked"]; @@ -7580,6 +7581,51 @@ function parseModelAlias(model) { if (m && m[1].trim() !== "") return { latest: true, family: m[1].trim() }; return { latest: false, id: model }; } +function compareModelVersion(a, b) { + const runs = (s) => s.toLowerCase().match(/(\d+|\D+)/g) ?? []; + const ra = runs(a); + const rb = runs(b); + const n = Math.max(ra.length, rb.length); + for (let i = 0; i < n; i++) { + const xa = ra[i]; + const xb = rb[i]; + if (xa === void 0) return -1; + if (xb === void 0) return 1; + const na = /^\d+$/.test(xa); + const nb = /^\d+$/.test(xb); + if (na && nb) { + const d = Number.parseInt(xa, 10) - Number.parseInt(xb, 10); + if (d !== 0) return d < 0 ? -1 : 1; + } else if (xa !== xb) { + return xa < xb ? -1 : 1; + } + } + return 0; +} +function releasedMs(table, id) { + const r = table.models[id]?.released; + return r === void 0 ? void 0 : Date.parse(r); +} +function compareNewestFirst(table, a, b) { + const ta = releasedMs(table, a); + const tb = releasedMs(table, b); + if (ta !== void 0 && tb !== void 0 && ta !== tb) return tb - ta; + return compareModelVersion(b, a); +} +function pricedIdsInFamily(table, family) { + return Object.keys(table.models).filter((id) => table.models[id].family === family); +} +function newestPricedInFamily(table, family) { + const ids = pricedIdsInFamily(table, family); + if (ids.length === 0) return void 0; + return [...ids].sort((a, b) => compareNewestFirst(table, a, b))[0]; +} +function resolveLaneModel(lane, table) { + const spec = parseModelAlias(lane.model); + if (!spec.latest) return lane; + const concrete = newestPricedInFamily(table, spec.family); + return concrete ? { ...lane, model: concrete } : lane; +} // ../core/src/registry.ts var LANE_KINDS = ["cli", "api", "local"]; @@ -7744,13 +7790,8 @@ function parseLane(entry, index) { throw new LaneConfigError(`${at("endpoint")}: an api lane requires an endpoint.`); } } - if (lane.model.trim().endsWith("@latest")) { - if (lane.kind !== "api") { - throw new LaneConfigError(`${at("model")}: a "@latest" alias is only supported on api lanes.`); - } - if (!parseModelAlias(lane.model).latest) { - throw new LaneConfigError(`${at("model")}: "@latest" needs a family stem, e.g. "minimax@latest".`); - } + if (lane.model.trim().endsWith("@latest") && !parseModelAlias(lane.model).latest) { + throw new LaneConfigError(`${at("model")}: "@latest" needs a family stem, e.g. "claude-opus@latest".`); } return lane; } @@ -7808,6 +7849,66 @@ function parseLaneConfig(text) { return new LaneRegistry(lanes); } +// ../core/src/price.ts +var PriceError = class extends Error { + constructor(message) { + super(message); + this.name = "PriceError"; + } +}; +function isPlainObject3(value) { + return typeof value === "object" && value !== null && !Array.isArray(value); +} +function requireNonNegativeNumber(value, where) { + if (typeof value !== "number" || !Number.isFinite(value) || value < 0) { + throw new PriceError(`${where} must be a finite number >= 0 (got ${JSON.stringify(value)}).`); + } + return value; +} +function validatePriceTable(data) { + if (!isPlainObject3(data)) { + throw new PriceError("Price table must be a JSON object."); + } + if (typeof data.schema_version !== "number") { + throw new PriceError('Price table "schema_version" must be a number.'); + } + if (typeof data.frontier_model !== "string" || data.frontier_model.trim() === "") { + throw new PriceError('Price table "frontier_model" must be a non-empty string.'); + } + if (!isPlainObject3(data.models)) { + throw new PriceError('Price table "models" must be a mapping of model id to prices.'); + } + const models = /* @__PURE__ */ Object.create(null); + for (const [model, raw] of Object.entries(data.models)) { + if (!isPlainObject3(raw)) { + throw new PriceError(`Price table models["${model}"] must be a mapping.`); + } + const entry = { + inputPer1M: requireNonNegativeNumber(raw.inputPer1M, `models["${model}"].inputPer1M`), + outputPer1M: requireNonNegativeNumber(raw.outputPer1M, `models["${model}"].outputPer1M`) + }; + if (raw.family !== void 0) { + if (typeof raw.family !== "string" || raw.family.trim() === "") { + throw new PriceError(`models["${model}"].family must be a non-empty string when present.`); + } + entry.family = raw.family; + } + if (raw.released !== void 0) { + if (typeof raw.released !== "string" || Number.isNaN(Date.parse(raw.released))) { + throw new PriceError(`models["${model}"].released must be an ISO date string when present.`); + } + entry.released = raw.released; + } + models[model] = entry; + } + if (!Object.hasOwn(models, data.frontier_model)) { + throw new PriceError( + `Price table frontier_model "${data.frontier_model}" has no entry in models.` + ); + } + return { schema_version: data.schema_version, frontier_model: data.frontier_model, models }; +} + // ../core/src/ledger.ts var SCHEMA_VERSION = 1; var TASK_STATUSES = ["ok", "failed", "blocked", "fallback"]; @@ -7872,7 +7973,7 @@ var LedgerError = class extends Error { this.name = "LedgerError"; } }; -function isPlainObject3(value) { +function isPlainObject4(value) { return typeof value === "object" && value !== null && !Array.isArray(value); } function requireString2(value, where) { @@ -7892,7 +7993,7 @@ function requireIsoTimestamp(value, where) { } return s; } -function requireNonNegativeNumber(value, where) { +function requireNonNegativeNumber2(value, where) { if (typeof value !== "number" || !Number.isFinite(value) || value < 0) { throw new LedgerError(`${where} must be a finite number >= 0 (got ${JSON.stringify(value)}).`); } @@ -7917,9 +8018,9 @@ function requireEnum2(value, allowed, where) { return value; } function validateEventInput(input) { - const actual_cost = requireNonNegativeNumber(input.actual_cost, "task.actual_cost"); - const frontier_cost = requireNonNegativeNumber(input.frontier_cost, "task.frontier_cost"); - const metered_spent = requireNonNegativeNumber(input.metered_spent, "task.metered_spent"); + const actual_cost = requireNonNegativeNumber2(input.actual_cost, "task.actual_cost"); + const frontier_cost = requireNonNegativeNumber2(input.frontier_cost, "task.frontier_cost"); + const metered_spent = requireNonNegativeNumber2(input.metered_spent, "task.metered_spent"); const out = { task_id: requireString2(input.task_id, "task.task_id"), attempt: requireNonNegativeInt(input.attempt, "task.attempt"), @@ -8009,7 +8110,7 @@ function backfillLegacyTask(obj) { }; } function parseEvent(obj) { - if (!isPlainObject3(obj)) { + if (!isPlainObject4(obj)) { throw new LedgerError("Ledger record must be a JSON object."); } const meta = parseMeta(obj); @@ -8108,6 +8209,24 @@ function loadLaneConfig(path) { } return parseLaneConfig(text); } +function loadPriceTable(path) { + const filePath = typeof path === "string" ? path : fileURLToPath(path); + let text; + try { + text = readFileSync(filePath, "utf8"); + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + throw new PriceError(`Could not read price table at "${filePath}": ${detail}`); + } + let parsed; + try { + parsed = JSON.parse(text); + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + throw new PriceError(`Could not parse price table at "${filePath}" as JSON: ${detail}`); + } + return validatePriceTable(parsed); +} function loadPolicyConfig(path) { const filePath = typeof path === "string" ? path : fileURLToPath(path); let text; @@ -8233,7 +8352,8 @@ function makeCliExecutor(spawnImpl) { return async (lane, instruction, attachments) => { if (!lane.command) throw new Error(`cli lane "${lane.id}" has no command configured`); const input = combinedPrompt(instruction, attachments); - const res = spawn(lane.command, lane.args ?? [], { input, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 }); + const args = (lane.args ?? []).map((a) => a.replaceAll("{model}", lane.model)); + const res = spawn(lane.command, args, { input, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 }); if (res.error) throw new LaneFailure("provider_error", `cli lane "${lane.id}" failed to spawn`); if (res.status !== 0) throw new LaneFailure("provider_error", `cli lane "${lane.id}" exited with status ${res.status}`); return { resultText: res.stdout ?? "" }; @@ -8484,6 +8604,7 @@ function makeHostReviewDeps(env) { const cwd = env.CLAUDE_PROJECT_DIR ?? process.cwd(); const lanesPath = env.TOKENMAXED_LANES ?? homeFile("lanes.yaml"); const ledgerPath = env.TOKENMAXED_LEDGER; + const pricesPath = env.TOKENMAXED_PRICES ?? fileURLToPath2(new URL("../prices.seed.json", import.meta.url)); const resolveAuth = makeResolveAuth(env); const executor = makeTrustedExecutor({ cli: makeCliExecutor(makeCliSpawn(REVIEW_CLI_TIMEOUT_MS)), @@ -8498,7 +8619,17 @@ function makeHostReviewDeps(env) { [diff truncated for review]` : diff; }, - loadLanes: () => existsSync3(lanesPath) ? [...loadLaneConfig(lanesPath).lanes] : null, + loadLanes: () => { + if (!existsSync3(lanesPath)) return null; + const raw = [...loadLaneConfig(lanesPath).lanes]; + let table; + try { + table = loadPriceTable(pricesPath); + } catch { + return raw; + } + return raw.map((l) => resolveLaneModel(l, table)).filter((l) => !parseModelAlias(l.model).latest); + }, availableLaneIds: makeAvailabilityProbe(env), loadPolicy: makeLoadPolicy(env), runManager: async (lane, prompt) => (await executor(lane, prompt)).resultText, diff --git a/packages/plugin/lanes.starter.yaml b/packages/plugin/lanes.starter.yaml index dc42765..f6e29a7 100644 --- a/packages/plugin/lanes.starter.yaml +++ b/packages/plugin/lanes.starter.yaml @@ -33,7 +33,7 @@ lanes: - id: claude-native kind: cli - model: claude-opus-4-7 + model: claude-opus@latest # self-updating: tracks the newest PRICED claude-opus (e.g. claude-opus-4-8) — never a hard-pinned version trust_mode: full costBasis: subscription provenance: anthropic @@ -106,14 +106,14 @@ lanes: # run with TOKENMAXED_DISABLE=1 so they never re-enter routing / recurse.) - id: claude-haiku kind: cli - model: claude-haiku-4-5-20251001 + model: claude-haiku@latest # self-updating; the {model} arg below spawns the resolved id trust_mode: full costBasis: subscription provenance: anthropic jurisdiction: US execution_mode: answer-only command: claude - args: ["-p", "--model", "claude-haiku-4-5-20251001"] + args: ["-p", "--model", "{model}"] # {model} ⇒ the resolved newest priced claude-haiku # Secondary / in-family manager. Codex above is the default host-turn reviewer # (first eligible in file order). On the escalation path this lane can only # independently review an offloaded output in categories where it isn't weaker @@ -132,11 +132,37 @@ lanes: explain: 0.82 codegen: 0.72 + # Full-access Sonnet (Claude Code): a stronger in-family offload than Haiku for + # bounded subtasks, still on the same subscription (no metered $). Self-updating via + # claude-sonnet@latest; the {model} arg spawns the resolved id. As a manager it is a + # capable INDEPENDENT reviewer for Codex's strong categories (which Haiku can't cover). + - id: claude-sonnet + kind: cli + model: claude-sonnet@latest + trust_mode: full + costBasis: subscription + provenance: anthropic + jurisdiction: US + execution_mode: answer-only + command: claude + args: ["-p", "--model", "{model}"] # {model} ⇒ the resolved newest priced claude-sonnet + roles: [manager] + manager_allowed: true + capability: + feature: 0.90 + refactor: 0.86 + bugfix: 0.85 + boilerplate: 0.88 + explain: 0.88 + codegen: 0.85 + docs: 0.86 + # --- Other provider lanes (SUPPORTED) --------------------------------------- # Popular vendors, shipped as SAFE inert TEMPLATES: each is `blocked` (never - # selected) until YOU pick a trust_mode. (The only enabled defaults ABOVE are the - # host `claude-native`, `codex-cli` (the default reviewer), and the in-family - # `claude-haiku` — all first-party or availability-gated. `ollama-llama3` ships + # selected) until YOU pick a trust_mode. (The enabled defaults ABOVE are the host + # `claude-native`, `codex-cli` (the default reviewer), and the in-family Claude Code + # lanes `claude-haiku` + `claude-sonnet` — all first-party or availability-gated. + # `ollama-llama3` ships # `blocked` too: a local server isn't assumed. In your own config, set every # lane's trust deliberately.) The vendor lanes here add nothing to your trust # surface until you change `blocked`. Trust ladder: blocked < worker < reader < full. @@ -154,6 +180,10 @@ lanes: # BYOK api lanes: put the key in env TOKENMAXED_KEY_ (never in this file). # api lanes speak the OpenAI /chat/completions schema — point `endpoint` at your # provider's OpenAI-COMPATIBLE chat-completions URL (not a vendor-proprietary one). + # COST: `costBasis` is YOUR billing model — NOT implied by `api`. Many vendors (e.g. + # MiniMax) are a flat-rate SUBSCRIPTION token ⇒ `costBasis: subscription` (treated as + # $0 and preferred by routing, like a CLI subscription); pay-per-token ⇒ `metered`. + # /tokenmaxed:setup ASKS you per api lane — TokenMaxed never assumes metered. # CLI lanes (gemini-cli, kimi-cli) can only be `full` or `blocked` — worker/reader # executors are API-only, so a CLI lane set to worker/reader loads but never routes. @@ -199,9 +229,24 @@ lanes: # automatically (family from the price table); for an # unpriced pin add `model_family: minimax`. trust_mode: blocked # → worker (no repo) or reader (repo-read; see above) or full - costBasis: metered + costBasis: subscription # MiniMax is typically a flat-rate SUBSCRIPTION token (not pay-per-token); /tokenmaxed:setup confirms this with you provenance: minimax jurisdiction: CN endpoint: https://api.minimax.io/v1/chat/completions # use the OpenAI-compatible URL authHandle: MINIMAX capability: { codegen: 0.80, boilerplate: 0.80, docs: 0.76 } + + # Sonnet via Anthropic API (BYOK) — an OPT-IN alternative to the Claude Code CLI + # lane above, NOT a default. When the `claude` CLI is available, the subscription + # CLI lane runs Sonnet (no metered $); this is here only if you deliberately want + # Sonnet over the API. Flip trust_mode to worker/full to enable. Self-updating. + - id: claude-sonnet-api + kind: api + model: claude-sonnet@latest + trust_mode: blocked + costBasis: metered + provenance: anthropic + jurisdiction: US + endpoint: https://api.anthropic.com/v1/chat/completions # Anthropic's OpenAI-compatible endpoint + authHandle: ANTHROPIC + capability: { codegen: 0.85, boilerplate: 0.86, docs: 0.86, explain: 0.88 } diff --git a/packages/plugin/prices.seed.json b/packages/plugin/prices.seed.json index 149b25e..11326d7 100644 --- a/packages/plugin/prices.seed.json +++ b/packages/plugin/prices.seed.json @@ -1,8 +1,10 @@ { "schema_version": 2, - "frontier_model": "claude-opus-4-7", + "frontier_model": "claude-opus-4-8", "models": { + "claude-opus-4-8": { "inputPer1M": 5, "outputPer1M": 25, "family": "claude-opus", "released": "2026-05-15" }, "claude-opus-4-7": { "inputPer1M": 15, "outputPer1M": 75, "family": "claude-opus", "released": "2026-01-15" }, + "claude-sonnet-4-6": { "inputPer1M": 3, "outputPer1M": 15, "family": "claude-sonnet", "released": "2025-11-14" }, "claude-haiku-4-5-20251001": { "inputPer1M": 1, "outputPer1M": 5, "family": "claude-haiku", "released": "2025-10-01" }, "gpt-5.5": { "inputPer1M": 10, "outputPer1M": 30, "family": "gpt", "released": "2026-02-01" }, "llama3.1:8b": { "inputPer1M": 0, "outputPer1M": 0, "family": "llama", "released": "2024-07-23" }, diff --git a/packages/plugin/server/index.mjs b/packages/plugin/server/index.mjs index 9a9ffe6..e52551b 100755 --- a/packages/plugin/server/index.mjs +++ b/packages/plugin/server/index.mjs @@ -14229,7 +14229,7 @@ var require_dist2 = __commonJS({ import { randomUUID as randomUUID3 } from "node:crypto"; import { existsSync as existsSync8, mkdirSync as mkdirSync5, readFileSync as readFileSync5, writeFileSync as writeFileSync4 } from "node:fs"; import { dirname as dirname6, join as join6 } from "node:path"; -import { fileURLToPath as fileURLToPath4 } from "node:url"; +import { fileURLToPath as fileURLToPath5 } from "node:url"; // ../../node_modules/zod/v4/core/core.js var _a; @@ -23389,6 +23389,21 @@ function resolveLaneModel(lane, table) { const concrete = newestPricedInFamily(table, spec.family); return concrete ? { ...lane, model: concrete } : lane; } +function staleAgainstPriceTable(lanes, table) { + const out = []; + for (const lane of lanes) { + const spec = parseModelAlias(lane.model); + const pinned = spec.latest ? newestPricedInFamily(table, spec.family) : spec.id; + if (!pinned) continue; + const family = spec.latest ? spec.family : lane.model_family ?? table.models[pinned]?.family; + if (!family) continue; + const newest = newestPricedInFamily(table, family); + if (newest && newest !== pinned && compareNewestFirst(table, newest, pinned) < 0) { + out.push({ laneId: lane.id, family, pinned, newest }); + } + } + return out; +} function sameFamily(id, family) { if (id === family) return true; if (!id.startsWith(family)) return false; @@ -23571,13 +23586,8 @@ function parseLane(entry, index) { throw new LaneConfigError(`${at("endpoint")}: an api lane requires an endpoint.`); } } - if (lane.model.trim().endsWith("@latest")) { - if (lane.kind !== "api") { - throw new LaneConfigError(`${at("model")}: a "@latest" alias is only supported on api lanes.`); - } - if (!parseModelAlias(lane.model).latest) { - throw new LaneConfigError(`${at("model")}: "@latest" needs a family stem, e.g. "minimax@latest".`); - } + if (lane.model.trim().endsWith("@latest") && !parseModelAlias(lane.model).latest) { + throw new LaneConfigError(`${at("model")}: "@latest" needs a family stem, e.g. "claude-opus@latest".`); } return lane; } @@ -24749,7 +24759,8 @@ function makeCliExecutor(spawnImpl) { return async (lane, instruction, attachments) => { if (!lane.command) throw new Error(`cli lane "${lane.id}" has no command configured`); const input = combinedPrompt(instruction, attachments); - const res = spawn(lane.command, lane.args ?? [], { input, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 }); + const args = (lane.args ?? []).map((a) => a.replaceAll("{model}", lane.model)); + const res = spawn(lane.command, args, { input, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 }); if (res.error) throw new LaneFailure("provider_error", `cli lane "${lane.id}" failed to spawn`); if (res.status !== 0) throw new LaneFailure("provider_error", `cli lane "${lane.id}" exited with status ${res.status}`); return { resultText: res.stdout ?? "" }; @@ -25381,27 +25392,41 @@ function makeSummaryFromEnv(env) { const lanes = existsSync5(lanesPath) ? [...loadLaneConfig(lanesPath).lanes] : []; const available = await probeAvailable(lanes); const now = Date.now(); - let staleness = []; - if (!globallyDisabled && existsSync5(pricesPath)) { + let priceTable; + if (existsSync5(pricesPath)) { + try { + priceTable = loadPriceTable(pricesPath); + } catch { + priceTable = void 0; + } + } + const displayLanes = priceTable ? lanes.map((l) => resolveLaneModel(l, priceTable)) : lanes; + const staleByLane = /* @__PURE__ */ new Map(); + if (!globallyDisabled && priceTable) { + for (const f of staleAgainstPriceTable(lanes, priceTable)) { + staleByLane.set(f.laneId, { laneId: f.laneId, newest: f.newest, newestPriced: true }); + } try { - staleness = await reportFreshness( + for (const w of await reportFreshness( lanes, { fetchList: () => { throw new Error("summary path must not fetch"); }, - table: loadPriceTable(pricesPath), + table: priceTable, now, readCache: () => readFreshnessCache(cachePath), writeCache: () => { } }, { refresh: false } - ); + )) { + staleByLane.set(w.laneId, { laneId: w.laneId, newest: w.newest, newestPriced: w.newestPriced }); + } } catch { - staleness = []; } } + const staleness = [...staleByLane.values()]; let laneReview = "current"; if (lanes.length > 0) { const prior = readLaneReviewState(laneStatePath).byProject[reviewProjectKey]?.fingerprint; @@ -25410,7 +25435,7 @@ function makeSummaryFromEnv(env) { } return buildSummaryData({ events: new JsonlLedger(ledgerPath).readAll(), - lanes, + lanes: displayLanes, policy: loadPolicy(), availableLaneIds: available, gateReady, @@ -25428,6 +25453,7 @@ function makeSummaryFromEnv(env) { import { spawnSync as spawnSync3 } from "node:child_process"; import { randomUUID as randomUUID2 } from "node:crypto"; import { existsSync as existsSync6 } from "node:fs"; +import { fileURLToPath as fileURLToPath3 } from "node:url"; // ../mcp/src/reviewer.ts var VERDICT_RE = /^[ \t>]*VERDICT:\s*(pass|needs-rework|fail)\s*$/gim; @@ -25500,6 +25526,7 @@ function makeHostReviewDeps(env) { const cwd = env.CLAUDE_PROJECT_DIR ?? process.cwd(); const lanesPath = env.TOKENMAXED_LANES ?? homeFile("lanes.yaml"); const ledgerPath = env.TOKENMAXED_LEDGER; + const pricesPath = env.TOKENMAXED_PRICES ?? fileURLToPath3(new URL("../prices.seed.json", import.meta.url)); const resolveAuth = makeResolveAuth(env); const executor = makeTrustedExecutor({ cli: makeCliExecutor(makeCliSpawn(REVIEW_CLI_TIMEOUT_MS)), @@ -25514,7 +25541,17 @@ function makeHostReviewDeps(env) { [diff truncated for review]` : diff; }, - loadLanes: () => existsSync6(lanesPath) ? [...loadLaneConfig(lanesPath).lanes] : null, + loadLanes: () => { + if (!existsSync6(lanesPath)) return null; + const raw = [...loadLaneConfig(lanesPath).lanes]; + let table; + try { + table = loadPriceTable(pricesPath); + } catch { + return raw; + } + return raw.map((l) => resolveLaneModel(l, table)).filter((l) => !parseModelAlias(l.model).latest); + }, availableLaneIds: makeAvailabilityProbe(env), loadPolicy: makeLoadPolicy(env), runManager: async (lane, prompt) => (await executor(lane, prompt)).resultText, @@ -25566,10 +25603,10 @@ async function runReviewWithBudget(runner, newId, opts) { // ../mcp/src/setup.ts import { copyFileSync, existsSync as existsSync7, mkdirSync as mkdirSync4 } from "node:fs"; import { dirname as dirname5, join as join5 } from "node:path"; -import { fileURLToPath as fileURLToPath3 } from "node:url"; -var LANES_STARTER = fileURLToPath3(new URL("../lanes.starter.yaml", import.meta.url)); -var POLICY_STARTER = fileURLToPath3(new URL("../policy.starter.yaml", import.meta.url)); -var DEFAULT_PRICES = fileURLToPath3(new URL("../prices.seed.json", import.meta.url)); +import { fileURLToPath as fileURLToPath4 } from "node:url"; +var LANES_STARTER = fileURLToPath4(new URL("../lanes.starter.yaml", import.meta.url)); +var POLICY_STARTER = fileURLToPath4(new URL("../policy.starter.yaml", import.meta.url)); +var DEFAULT_PRICES = fileURLToPath4(new URL("../prices.seed.json", import.meta.url)); async function runSetup(env) { const lanesPath = env.TOKENMAXED_LANES ?? homeFile("lanes.yaml"); const policyPath = env.TOKENMAXED_POLICY ?? homeFile("policy.yaml"); @@ -25603,6 +25640,7 @@ async function runSetup(env) { model: resolved, ...resolved !== l.model ? { rawModel: l.model } : {}, trustMode: l.trust_mode, + costBasis: l.costBasis, executionMode: l.execution_mode ?? "answer-only", role, available: !!l.native || available.has(l.id), @@ -25665,11 +25703,19 @@ var ROLE_LABEL = { function formatLaneSetup(rows) { if (rows.length === 0) return [" (no lanes configured)"]; const lines = ["Lanes (what each may see/do, and whether it can run now):"]; + let anyApi = false; for (const r of rows) { const model = r.rawModel && r.rawModel !== r.model ? `${r.rawModel} \u2192 ${r.model}` : r.model; const caps = r.capability && Object.keys(r.capability).length > 0 ? " \xB7 caps " + Object.entries(r.capability).map(([c, v]) => `${c}=${v}`).join(",") : ""; + const billing = r.kind === "api" ? ` \xB7 billing=${r.costBasis} (confirm: subscription vs metered)` : ` \xB7 billing=${r.costBasis}`; + if (r.kind === "api") anyApi = true; + lines.push( + ` \u2022 ${r.id} [${r.kind}] ${model} \xB7 trust=${r.trustMode} \u2192 ${permissionFor(r.trustMode, r.executionMode)}${billing} \xB7 role=${ROLE_LABEL[r.role]} \xB7 ${r.available ? "available" : "unavailable now"}${caps}` + ); + } + if (anyApi) { lines.push( - ` \u2022 ${r.id} [${r.kind}] ${model} \xB7 trust=${r.trustMode} \u2192 ${permissionFor(r.trustMode, r.executionMode)} \xB7 role=${ROLE_LABEL[r.role]} \xB7 ${r.available ? "available" : "unavailable now"}${caps}` + " \u24D8 For each api lane, confirm billing: a flat-rate subscription token (costBasis: subscription, treated as $0 and preferred) or pay-per-token (costBasis: metered). TokenMaxed never assumes \u2014 set it per YOUR plan." ); } return lines; @@ -26090,7 +26136,7 @@ function unknownKeys(inputSchema, args) { // ../mcp/src/server.ts var DEFAULT_LANES = homeFile("lanes.yaml"); -var DEFAULT_PRICES2 = fileURLToPath4(new URL("../prices.seed.json", import.meta.url)); +var DEFAULT_PRICES2 = fileURLToPath5(new URL("../prices.seed.json", import.meta.url)); function recordableLane(lane, priceTable) { if (lane.native || lane.costBasis !== "metered") return true; try { diff --git a/packages/plugin/skills/setup/SKILL.md b/packages/plugin/skills/setup/SKILL.md index 41b3404..55a3527 100644 --- a/packages/plugin/skills/setup/SKILL.md +++ b/packages/plugin/skills/setup/SKILL.md @@ -10,7 +10,20 @@ disable-model-invocation: true starter templates if they don't exist (it never overwrites), validates them, and reports status. 2. Present the status report to the user verbatim. -3. Then guide them through any next steps the report implies: +3. **For every `api` lane in the report, ASK the user how it is billed — never assume.** + An API endpoint is NOT inherently metered: many vendors (e.g. MiniMax) are accessed + via a flat-rate **subscription token**. For each `api` lane, ask whether their access + is: + - a **subscription** (flat-rate / prepaid token, no per-task charge) → set + `costBasis: subscription` in `~/.tokenmaxed/lanes.yaml` (treated as $0 and + preferred by routing, like a CLI subscription — true to the premise of maximizing + subscriptions), or + - **metered** (pay-per-token) → set `costBasis: metered` (priced per token). + + Use the AskUserQuestion tool (one question per api lane, or one grouped question if + there are several) and then edit `costBasis` in `lanes.yaml` to match their answer. + This keeps the user from being mislabeled as metered when they are on a subscription. +4. Then guide them through any next steps the report implies: - Edit `~/.tokenmaxed/lanes.yaml` to add/trust the lanes they want (provider CLIs like Codex/Gemini, a local Ollama, the cheaper-Claude lane, or a BYOK OpenAI-compatible worker).