diff --git a/backends/opencode/index.ts b/backends/opencode/index.ts index d70f65b..184278c 100644 --- a/backends/opencode/index.ts +++ b/backends/opencode/index.ts @@ -56,7 +56,67 @@ const PREVIEW_TOOLS = new Set(["edit", "write", "multiedit", "bash", "apply_patc // ── Shim invocation ────────────────────────────────────────────── -function runHook(event: "pre" | "post", payload: object): void { +const HOOK_TIMEOUT_MS = 15000 +// A genuine timeout takes ~HOOK_TIMEOUT_MS; the spurious libuv timeout below +// returns almost instantly. Anything faster than this is treated as spurious. +const SPURIOUS_TIMEOUT_MS = 2000 +const MAX_HOOK_ATTEMPTS = 3 + +// Run `run` (a synchronous, throwing operation — here a spawnSync), retrying a +// SPURIOUS timeout. +// +// Why this exists: Node's spawnSync (used by execSync) derives its timeout +// deadline from libuv's *cached* loop time, which is only refreshed once per +// loop iteration. The first spawnSync that runs right after async work — here, +// the awaited enqueueHook in the tool hooks — sees a stale "now", so +// `now + timeout` is already in the past and libuv SIGTERMs the child the +// instant it spawns: a spurious ETIMEDOUT that returns in ~15ms instead of 15s. +// On Windows this drops the FIRST hook of a concurrent burst, so that file gets +// no diff/marker (issue #46; Unix is unaffected — it execs the .sh directly and +// doesn't hit this the same way). +// +// The fix: retry, but first `await` a turn of the event loop so libuv refreshes +// its cached time — a synchronous retry would re-read the same stale value and +// fail again (which is exactly why the *next* hook in a burst always succeeds). +// A genuine timeout takes ~HOOK_TIMEOUT_MS, far above SPURIOUS_TIMEOUT_MS, so it +// is never retried. `label` is used only for the diagnostic log. Exported so the +// retry behaviour can be exercised by the test harness (it's a Windows libuv +// quirk that can't otherwise be reproduced on CI). +export async function runWithSpuriousRetry( + run: () => void, + label = "hook-entry", +): Promise { + for (let attempt = 1; attempt <= MAX_HOOK_ATTEMPTS; attempt++) { + const start = Date.now() + try { + run() + return + } catch (err: any) { + const elapsed = Date.now() - start + // A timeout-kill surfaces as code ETIMEDOUT and/or signal SIGTERM — Node + // has historically reported one or the other depending on platform — so + // match both, or the spurious-timeout retry could be missed where only + // SIGTERM is set. Still gated on elapsed < SPURIOUS_TIMEOUT_MS below, so a + // genuine ~15s hang (also SIGTERM'd) is never mistaken for spurious. + const timedOut = + !!err && (err.code === "ETIMEDOUT" || err.signal === "SIGTERM") + if (timedOut && elapsed < SPURIOUS_TIMEOUT_MS && attempt < MAX_HOOK_ATTEMPTS) { + // Yield so libuv refreshes its cached loop time before retrying. + await new Promise((resolve) => setImmediate(resolve)) + continue + } + // Abstain on any failure. Log a genuine timeout with elapsed ms so a real + // hang (~15s) is distinguishable from exhausted spurious retries. + if (timedOut) { + // eslint-disable-next-line no-console + console.debug(`[code-preview] ${label} timed out after ${elapsed}ms`) + } + return + } + } +} + +async function runHook(event: "pre" | "post", payload: object): Promise { const shim = resolveHookEntry() if (!shim) { // Surface enough breadcrumb that a misconfigured bin-path.txt isn't a @@ -69,22 +129,15 @@ function runHook(event: "pre" | "post", payload: object): void { const cmd = IS_WIN ? `powershell -NoProfile -ExecutionPolicy Bypass -File "${shim}" opencode ${event}` : `"${shim}" opencode ${event}` - try { + + await runWithSpuriousRetry(() => { execSync(cmd, { input: JSON.stringify(payload), env: { ...process.env, CODE_PREVIEW_BACKEND: "opencode" }, - timeout: 15000, + timeout: HOOK_TIMEOUT_MS, stdio: ["pipe", "pipe", "pipe"], }) - } catch (err: any) { - // Abstain on any failure. Log timeouts at debug-equivalent because - // a silent 15s hang is otherwise hard to diagnose; everything else - // is treated as best-effort and swallowed. - if (err && (err.code === "ETIMEDOUT" || err.signal === "SIGTERM")) { - // eslint-disable-next-line no-console - console.debug(`[code-preview] hook-entry ${event} timed out after 15s`) - } - } + }, `hook-entry ${event}`) } // ── Hook serialisation ─────────────────────────────────────────── @@ -96,9 +149,9 @@ function runHook(event: "pre" | "post", payload: object): void { let hookQueue: Promise = Promise.resolve() -function enqueueHook(fn: () => void): Promise { - hookQueue = hookQueue.then(() => { - try { fn() } catch { /* non-fatal */ } +function enqueueHook(fn: () => void | Promise): Promise { + hookQueue = hookQueue.then(async () => { + try { await fn() } catch { /* non-fatal */ } }) return hookQueue } @@ -116,6 +169,8 @@ const plugin: Plugin = async ({ directory }) => { "tool.execute.after": async (input, _output) => { if (!PREVIEW_TOOLS.has(input.tool)) return + // OpenCode's after-hook carries the tool args on `input` (the before-hook + // carries them on `output`), confirmed on the live API (issue #46). const args = ((input as any).args as Record) ?? {} const payload = { tool: input.tool, args, cwd: directory } await enqueueHook(() => runHook("post", payload)) diff --git a/tests/backends/opencode/retry_test.ts b/tests/backends/opencode/retry_test.ts new file mode 100644 index 0000000..7bbbf3f --- /dev/null +++ b/tests/backends/opencode/retry_test.ts @@ -0,0 +1,92 @@ +// retry_test.ts — Unit guard for runWithSpuriousRetry (issue #46). +// +// The Windows libuv quirk — the first spawnSync after async work spuriously +// times out because libuv's cached loop time is stale — can't be reproduced on +// CI. So we test the retry logic directly by injecting a `run` that throws a +// fast ETIMEDOUT / SIGTERM. Cheap insurance against someone later "simplifying" +// the retry (or the SIGTERM match) away. No nvim required. +// +// Run via: bun retry_test.ts (or) npx tsx retry_test.ts +// Prints "ALL OK" and exits 0 on success; exits 1 on any failed check. + +import { resolve, dirname } from "path" +import { fileURLToPath, pathToFileURL } from "url" + +const __dirname = dirname(fileURLToPath(import.meta.url)) + +type RunWithSpuriousRetry = (run: () => void, label?: string) => Promise + +// Build an error shaped like Node's timeout-kill: ETIMEDOUT via `code`, or a +// SIGTERM kill via `signal` (platform-dependent — see the retry's `timedOut`). +function timeoutErr(kind: "code" | "signal"): Error { + const e = new Error("simulated timeout") as any + if (kind === "code") e.code = "ETIMEDOUT" + else e.signal = "SIGTERM" + return e +} + +let failures = 0 +function check(name: string, cond: boolean): void { + console.log(`${cond ? "ok " : "FAIL"} - ${name}`) + if (!cond) failures++ +} + +async function main(): Promise { + // pathToFileURL so the dynamic import works with Windows absolute paths too + // (the bare `D:\…` path is rejected by the ESM loader as an unsupported scheme). + const indexPath = resolve(__dirname, "../../../backends/opencode/index.ts") + const mod = await import(pathToFileURL(indexPath).href) + const runWithSpuriousRetry = mod.runWithSpuriousRetry as RunWithSpuriousRetry + + // Success on the first attempt → run called exactly once. + { + let calls = 0 + await runWithSpuriousRetry(() => { calls++ }) + check("success on first attempt runs once", calls === 1) + } + + // The core case: a fast ETIMEDOUT recovers on retry (the libuv quirk). + { + let calls = 0 + await runWithSpuriousRetry(() => { calls++; if (calls === 1) throw timeoutErr("code") }) + check("fast ETIMEDOUT recovers on retry (runs twice)", calls === 2) + } + + // Platform variance: a SIGTERM-only fast kill must also recover (review fix). + { + let calls = 0 + await runWithSpuriousRetry(() => { calls++; if (calls === 1) throw timeoutErr("signal") }) + check("fast SIGTERM recovers on retry (runs twice)", calls === 2) + } + + // A non-timeout error must NOT be retried — abstain after one attempt. + { + let calls = 0 + await runWithSpuriousRetry(() => { + calls++ + const e = new Error("nope") as any + e.code = "ENOENT" + throw e + }) + check("non-timeout error is not retried (runs once)", calls === 1) + } + + // A persistent fast timeout must be bounded at MAX_HOOK_ATTEMPTS — no infinite + // loop if every attempt spuriously times out. + { + let calls = 0 + await runWithSpuriousRetry(() => { calls++; throw timeoutErr("code") }) + check("persistent fast timeout is bounded (runs 3x)", calls === 3) + } + + if (failures > 0) { + console.log(`RETRY GUARD FAILED (${failures})`) + process.exit(1) + } + console.log("ALL OK") +} + +main().catch((err) => { + console.error(err) + process.exit(1) +}) diff --git a/tests/backends/opencode/test_retry.sh b/tests/backends/opencode/test_retry.sh new file mode 100644 index 0000000..253dfcb --- /dev/null +++ b/tests/backends/opencode/test_retry.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# test_retry.sh — Unit guard for the OpenCode plugin's spurious-timeout retry. +# +# The Windows libuv quirk (the first spawnSync after async work spuriously times +# out; issue #46) can't be reproduced on CI, so this drives retry_test.ts, which +# exercises runWithSpuriousRetry directly with an injected `run` that throws a +# fast ETIMEDOUT/SIGTERM. No nvim/socket needed — it's a pure unit guard against +# the retry being accidentally removed or narrowed. + +# ── Check for tsx/bun ──────────────────────────────────────────── + +_OPENCODE_RUNNER="" +if command -v bun >/dev/null 2>&1; then + _OPENCODE_RUNNER="bun" +elif command -v npx >/dev/null 2>&1; then + _OPENCODE_RUNNER="npx tsx" +else + echo -e "${YELLOW} ⊘ Skipping OpenCode retry guard (neither bun nor npx found)${NC}" + return 0 2>/dev/null || exit 0 +fi + +RETRY_TEST="$SCRIPT_DIR/backends/opencode/retry_test.ts" + +# ── Test: retry helper recovers / is bounded ───────────────────── + +test_opencode_retry_guard() { + local output + output="$($_OPENCODE_RUNNER "$RETRY_TEST" 2>&1)" + if [[ "$output" != *"ALL OK"* ]]; then + echo -e " ${RED}retry guard output:${NC}" >&2 + echo "$output" >&2 + return 1 + fi +} + +run_test "OpenCode retry recovers fast ETIMEDOUT/SIGTERM, bounded, no over-retry" test_opencode_retry_guard