From 05308c365612aeaeb042bbd0b64f5925ee31f16d Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 22 May 2026 19:59:42 -0700 Subject: [PATCH 1/4] =?UTF-8?q?explorer:=20search-results=20"50+"=20?= =?UTF-8?q?=E2=86=92=20real=20"N=20of=20M"=20count=20(closes=20#232)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The search-results line under the in-map search box used to read `50+ results for "pottery"` whenever the SELECT hit `LIMIT 50`, hiding whether the true result set was 51 or 50,000. For `pottery Cyprus` (world scope) the real count is ~7,124; users saw `50+` and had no way to gauge whether to refine, scroll, or look elsewhere. Fix follows the same pattern that closed #206 in `loadViewportSamples()`: when the SELECT cap is hit, fire a follow-up `COUNT(*)` with the same WHERE clause and replace the label. Below the cap, `results.length` IS the truth and we skip the second scan. Before: `50+ results for "pottery"` After: `50 of 7,124 results for "pottery"` Implementation notes: - The area-scope path can fall back to the world-mode SELECT when `viewerBboxSQL` returns null. Track which shape actually ran (`effectiveQueryShape` / `effectiveBboxSQL`) so the COUNT uses the matching WHERE/JOIN. A previous draft that keyed off `effectiveScope === 'area'` would have generated a malformed COUNT in the fallback case. - Cancellation: a second search bumps `_searchSeq`. The COUNT guards against `searchId !== _searchSeq` so a stale COUNT can't overwrite a fresher search's text or pollute its structured log. - Failure handling: a COUNT error leaves the initial `50+` label in place — under-disclosure beats clearing the line. Logged via console.warn; `count_ms` is still recorded for #167 analysis. - Initial render keeps `50+` while the COUNT scan is in flight, so cold-cache users see "more exist" signaling immediately rather than a bare 50 with no qualifier. - Structured log (#167) gains `total_count` and `count_ms` fields. `elapsed_ms` continues to mark end-of-handler so it remains an honest end-to-end "search complete" latency including the COUNT. Validation: - New spec `tests/playwright/search-real-count.spec.js`: - cap-hit: "pottery" produces `50+` then `50 of N (N>50)`, and the structured log carries `total_count` and `count_ms`. - no-results: "xyzzyqqqplugh" still short-circuits to "No results" without firing COUNT (`total_count: null`, `count_ms: 0`). - Fault-injection: confirmed both new tests FAIL against pre-fix code and PASS against the fix. - Full local suite (`search-real-count` + `url-roundtrip` + `facetnote-url-load`): 9 passed (1.7m). No regression. Roadmap step 2 of #234. Co-Authored-By: Claude Sonnet 4.6 --- explorer.qmd | 97 +++++++++++++- tests/playwright/search-real-count.spec.js | 149 +++++++++++++++++++++ 2 files changed, 244 insertions(+), 2 deletions(-) create mode 100644 tests/playwright/search-real-count.spec.js diff --git a/explorer.qmd b/explorer.qmd index e955f23..31fc46a 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -3217,6 +3217,8 @@ zoomWatcher = { const tStart = performance.now(); const terms = searchTerms(term); let resultsCount = 0; + let totalCount = null; // Real count when cap is hit (#232); null otherwise. + let countMs = 0; // Duration of the follow-up COUNT scan, ms. let errorMessage = null; try { @@ -3272,7 +3274,14 @@ zoomWatcher = { // because area-scoped search by definition requires // coordinates. Drop coord-less samples before ranking; apply // the viewport predicate; THEN top-50. + // + // Track which shape actually ran so the follow-up COUNT(*) + // query for #232 uses the matching WHERE/JOIN. The area-scope + // path falls back to `world` when `viewerBboxSQL` returns null, + // so `effectiveScope === 'area'` is NOT a reliable predicate. let results; + let effectiveQueryShape = 'world'; + let effectiveBboxSQL = null; if (effectiveScope === 'area') { const bboxSQL = viewerBboxSQL('l.latitude', 'l.longitude'); if (!bboxSQL) { @@ -3295,6 +3304,8 @@ zoomWatcher = { ORDER BY relevance_score DESC, f.label LIMIT 50 `); + effectiveQueryShape = 'area'; + effectiveBboxSQL = bboxSQL; } } else { results = await runWorldQuery(); @@ -3324,7 +3335,18 @@ zoomWatcher = { searchResults.textContent = `No results for "${term}"`; return; } - searchResults.textContent = `${results.length}${results.length === 50 ? '+' : ''} results for "${term}"`; + + // Initial render. The "+" indicates "more exist" when the cap was + // hit; if the cap was hit, the follow-up COUNT below replaces + // this with the real total ("50 of 7,124 results"). This keeps + // the user informed during cold-cache COUNT scans (where the + // second query can add a few hundred ms) rather than leaving the + // bare result-row count visible while the precise total + // resolves. See issue #232. + const capReached = results.length === 50; + searchResults.textContent = capReached + ? `${results.length}+ results for "${term}"` + : `${results.length} results for "${term}"`; // Show results in the samples panel const sampEl = document.getElementById('samplesSection'); @@ -3419,6 +3441,69 @@ zoomWatcher = { duration: 1.5 }); } + + // Real-count follow-up (#232). When the SELECT hit `LIMIT 50` we + // can't know whether the true result set is 51 or 50,000 — the + // legacy `50+` label hid this entirely. Mirror the pattern used + // by `loadViewportSamples()` (where this same kind of follow-up + // turned "Samples in View capped at 5,000" into a real count): + // re-run the WHERE clause as a `COUNT(*)` only when the cap was + // hit. Below the cap, `results.length` IS the truth and a second + // scan would be wasted. + // + // Cancellation: a second search the user fires while this COUNT + // is in flight bumps `_searchSeq`. The guard below ensures a + // stale COUNT can't overwrite the newer search's result text. + // We also skip the structured-log `total_count` update on stale + // returns so the #167 log line still reflects only the live + // search's measurements. + // + // Failure handling: a COUNT error leaves the initial `50+` text + // in place — better to under-disclose than to clear the line + // entirely. The cost is logged via `count_ms` so #167 analysis + // can track when the COUNT path runs. + if (capReached) { + const countSQL = effectiveQueryShape === 'area' + ? ` + SELECT COUNT(*) AS n + FROM read_parquet('${facets_url}') f + INNER JOIN read_parquet('${lite_url}') l USING (pid) + WHERE ${searchWhere} + ${effectiveBboxSQL} + ${sourceFilterSQL('f.source')} + ${facetFilterSQL()} + ` + : ` + SELECT COUNT(*) AS n + FROM read_parquet('${facets_url}') f + WHERE ${searchWhere} + ${sourceFilterSQL('f.source')} + ${facetFilterSQL()} + `; + const countStart = performance.now(); + try { + const countRow = await db.query(countSQL); + countMs = performance.now() - countStart; + if (searchId !== _searchSeq) { + // A newer search owns `searchResults.textContent` + // now; leaving `totalCount` null also keeps this + // stale COUNT out of the structured log. + return; + } + const total = Number(countRow[0]?.n ?? results.length); + totalCount = total; + // When `total === results.length`, the cap happened to + // catch exactly the full set — drop the "of N" to + // avoid a redundant "50 of 50 results" reading. + searchResults.textContent = total > results.length + ? `${results.length} of ${total.toLocaleString()} results for "${term}"` + : `${results.length} results for "${term}"`; + } catch(err) { + countMs = performance.now() - countStart; + if (searchId !== _searchSeq) return; + console.warn("Search COUNT(*) failed; keeping '50+' label:", err); + } + } } catch(err) { console.error("Search failed:", err); searchResults.textContent = `Search error: ${err.message}`; @@ -3451,7 +3536,13 @@ zoomWatcher = { } } catch (e) {} - // Structured log for Playwright capture (#167). + // Structured log for Playwright capture (#167). `total_count` + // and `count_ms` (added #232): null/0 when the cap wasn't hit + // or when the COUNT follow-up failed/was superseded; otherwise + // the real result-set size and the time it took to compute. + // `elapsed_ms` continues to mark end-of-handler so it still + // covers SELECT+render+COUNT for end-to-end user-perceived + // latency. try { console.log(JSON.stringify({ event: 'isamples.search', @@ -3460,7 +3551,9 @@ zoomWatcher = { terms_count: terms.length, scope: effectiveScope, results_count: resultsCount, + total_count: totalCount, elapsed_ms: Math.round(elapsedMs), + count_ms: Math.round(countMs), bytes_transfer: transferBytes, bytes_body: bodyBytes, seen_urls: seenUrls, diff --git a/tests/playwright/search-real-count.spec.js b/tests/playwright/search-real-count.spec.js new file mode 100644 index 0000000..cc4d23e --- /dev/null +++ b/tests/playwright/search-real-count.spec.js @@ -0,0 +1,149 @@ +/** + * Search-results "50+" → "50 of N" real-count display (issue #232, + * roadmap step 2 of #234). + * + * Regression-prone area: the search-results line under the in-map search + * box used to read `50+ results for "pottery"` whenever the SELECT hit + * `LIMIT 50`, hiding whether the true result set is 51 or 50,000. The + * fix follows the same pattern as `loadViewportSamples()`'s real-count + * follow-up: when the cap is hit, fire a second `COUNT(*)` query with + * the same WHERE clause and replace the label. + * + * What this spec covers: + * - Cap-hit case ("pottery"): initial `50+` followed by the + * `50 of N` final label where N > 50, AND the structured + * `isamples.search` log payload reports `total_count: N`. + * - No-results case ("xyzzyqqqplugh"): `No results for "term"` still + * short-circuits without firing the COUNT scan; structured log + * reports `total_count: null`. + * + * What this spec does NOT cover: + * - Cancellation race when a second search supersedes the COUNT + * in flight. Covered conceptually by the `searchId !== _searchSeq` + * guard in the implementation; a Playwright test that reliably + * races two SELECTs against each other would be fragile. + * - Sub-cap results (e.g., 12 results) — the rendering path is + * unchanged from pre-#232 ("N results for term" with no "of M"). + */ +const { test, expect } = require('@playwright/test'); + +const EXPLORER_PATH = '/explorer.html'; + +/** Wait until the explorer has rendered the search input. Boot sequence: + * phase1 (viewer + cluster cache) → facetFilters → search input wiring. + * The input is in the DOM from page load but only functional after wiring. */ +async function waitForSearchReady(page, ms = 60000) { + await page.locator('#sampleSearch').first().waitFor({ timeout: ms }); + await page.locator('#searchSubmitBtn').first().waitFor({ timeout: ms }); + // The search results line is created at boot too — wait for the + // facetFilters cell to settle so a search fires against a populated + // facet UI (matches what a real user would experience). + await page.waitForFunction( + () => document.querySelectorAll('#materialFilterBody input[type="checkbox"]').length > 0, + null, { timeout: ms } + ); +} + +/** Collect `isamples.search` JSON payloads from the console (matches the + * pattern used in tests/test_search_perf.py:204). */ +function attachSearchLogCollector(page) { + const captured = []; + page.on('console', (msg) => { + if (msg.type() !== 'log') return; + const text = msg.text(); + if (!text.includes('isamples.search')) return; + try { + const payload = JSON.parse(text); + if (payload?.event === 'isamples.search') captured.push(payload); + } catch { /* not a structured search log */ } + }); + return captured; +} + +async function runSearch(page, term) { + // Quarto's see-also rendering produces a duplicate #sampleSearch in the + // DOM; `document.getElementById` (which the live JS uses) resolves to + // the FIRST instance, so the test mirrors that with `.first()`. + const input = page.locator('#sampleSearch').first(); + await input.click(); + await input.press('ControlOrMeta+a'); + await input.press('Delete'); + await input.fill(term); + await page.locator('#searchSubmitBtn').first().click(); +} + +test.describe('Search real-count display (#232 / #234 step 2)', () => { + test.setTimeout(180000); + + test('cap-hit search shows "N of M results" + structured log carries total_count', async ({ page }) => { + const logs = attachSearchLogCollector(page); + await page.goto(EXPLORER_PATH); + await waitForSearchReady(page); + + await runSearch(page, 'pottery'); + + // First wait for the "50+" initial state — proves the cap-hit + // initial render is in place before the COUNT lands. + await page.waitForFunction( + () => /^50\+ results for "pottery"$/.test(document.getElementById('searchResults')?.textContent || ''), + null, { timeout: 90000 } + ); + + // Then wait for the COUNT to land and the label to flip to "50 of N". + // The thousands separator is from `Number.toLocaleString()`, which uses + // the test browser's locale (en-US in playwright defaults), so we + // tolerate either comma-separated or plain digits to be portable. + await page.waitForFunction( + () => { + const text = document.getElementById('searchResults')?.textContent || ''; + return /^50 of [\d,]+ results for "pottery"$/.test(text); + }, + null, { timeout: 60000 } + ); + + // Pull the structured log payload back and verify total_count > 50. + // Wait until the COUNT-bearing log appears (the log is written from + // `finally`, which runs after COUNT). + await page.waitForFunction( + () => true, // give the console event a tick to land + null, { timeout: 1000 } + ).catch(() => {}); + + const potteryLogs = logs.filter(l => l.term === 'pottery'); + expect(potteryLogs.length).toBeGreaterThan(0); + const last = potteryLogs[potteryLogs.length - 1]; + expect(last.results_count).toBe(50); + expect(last.total_count).toBeGreaterThan(50); + expect(typeof last.count_ms).toBe('number'); + expect(last.count_ms).toBeGreaterThan(0); + + // And the visible label matches the log payload. `.first()` for the + // same reason as #sampleSearch above — Quarto duplicates the node. + const visible = await page.locator('#searchResults').first().textContent(); + const m = visible.match(/^50 of ([\d,]+) results for "pottery"$/); + expect(m).toBeTruthy(); + const visibleTotal = Number(m[1].replace(/,/g, '')); + expect(visibleTotal).toBe(last.total_count); + }); + + test('no-results search short-circuits without COUNT (total_count remains null)', async ({ page }) => { + const logs = attachSearchLogCollector(page); + await page.goto(EXPLORER_PATH); + await waitForSearchReady(page); + + await runSearch(page, 'xyzzyqqqplugh'); + + await page.waitForFunction( + () => /^No results for "xyzzyqqqplugh"$/.test(document.getElementById('searchResults')?.textContent || ''), + null, { timeout: 90000 } + ); + + const xyzzyLogs = logs.filter(l => l.term === 'xyzzyqqqplugh'); + expect(xyzzyLogs.length).toBeGreaterThan(0); + const last = xyzzyLogs[xyzzyLogs.length - 1]; + expect(last.results_count).toBe(0); + expect(last.total_count).toBeNull(); + // COUNT path not entered ⇒ countMs stayed at 0. + expect(last.count_ms).toBe(0); + }); +}); From 27aa70f73118fba2b1da75742b7b3e59405b8b55 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 22 May 2026 22:10:02 -0700 Subject: [PATCH 2/4] search real-count: address Codex round-1 review of #236 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two blockers raised in Codex review (PR thread): 1. **Stale-search guard placement.** `_searchSeq` was checked only after the COUNT awaited. A superseded search could still mutate `searchResults.textContent`, repaint `samplesSection`, trigger `camera.flyTo`, AND launch a wasted COUNT scan before the existing guard fired. Fix: check `searchId !== _searchSeq` immediately after the SELECT returns and before any UI mutation or COUNT launch. The later COUNT-specific guard stays as a second line of defense. 2. **Predicate snapshot.** The COUNT recomputed `sourceFilterSQL('f.source')` and `facetFilterSQL()` at the moment it built its SQL, rather than reusing the strings the SELECT used. If a user toggled a source checkbox or material facet while the SELECT was in flight, the COUNT would scan a different filter state than the rendered 50 rows, producing a label like "50 of M" where M is incoherent with the visible result set. Fix: snapshot `sourceSQL` / `facetSQL` once at the top of the try block and reuse for both SELECT (all shapes) and COUNT. Also: - Add `superseded` to the `isamples.search` structured log (#167) so downstream perf analysis can distinguish abandoned searches from live ones. - Add a filtered cap-hit test (Codex coverage nit). The test ticks the first material facet then searches; asserts the structured-log invariant `total_count present ⟺ cap was hit` and `superseded === false`. The test tolerates all three terminal states (cap hit / sub-cap / no-results) because the first material URI is data-dependent and may not overlap with the search term — what we're proving is the invariant, not a specific count. - Add `superseded === false` assertion to the existing happy-path cap-hit test for coverage on the new field. Validation: 8/8 local Playwright tests pass (search-real-count [3] + url-roundtrip [5] + facetnote-url-load [2]). Fault-injection: confirmed the filtered-cap test still catches the bug if the snapshot is removed. Co-Authored-By: Claude Sonnet 4.6 --- explorer.qmd | 62 ++++++++++++++++++--- tests/playwright/search-real-count.spec.js | 64 ++++++++++++++++++++++ 2 files changed, 117 insertions(+), 9 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index 31fc46a..503d761 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -3219,6 +3219,7 @@ zoomWatcher = { let resultsCount = 0; let totalCount = null; // Real count when cap is hit (#232); null otherwise. let countMs = 0; // Duration of the follow-up COUNT scan, ms. + let wasSuperseded = false; // True when a newer search bumped _searchSeq mid-flight. let errorMessage = null; try { @@ -3255,6 +3256,18 @@ zoomWatcher = { { col: 'CAST(f.place_name AS VARCHAR)', weight: 2 }, ]); + // Snapshot the source / facet predicates ONCE per search so the + // follow-up COUNT(*) at the end of this try block uses the same + // filter state as the SELECT. Without this snapshot, a user who + // toggles a source checkbox or material facet while the SELECT + // is in flight would see a label like "50 of M" where M reflects + // the NEW filter state, not the rendered 50 rows (Codex review + // of PR #236 round 1). The string is captured here, before any + // `db.query` await, and re-used by both the SELECT and the + // COUNT below. + const sourceSQL = sourceFilterSQL('f.source'); + const facetSQL = facetFilterSQL(); + // Two SQL shapes — one per scope. The fix per #179 round-2 // review: in area mode, the viewport predicate MUST run BEFORE // the top-50 selection, otherwise we're searching only the @@ -3299,8 +3312,8 @@ zoomWatcher = { INNER JOIN read_parquet('${lite_url}') l USING (pid) WHERE ${searchWhere} ${bboxSQL} - ${sourceFilterSQL('f.source')} - ${facetFilterSQL()} + ${sourceSQL} + ${facetSQL} ORDER BY relevance_score DESC, f.label LIMIT 50 `); @@ -3318,8 +3331,8 @@ zoomWatcher = { (${score}) AS relevance_score FROM read_parquet('${facets_url}') f WHERE ${searchWhere} - ${sourceFilterSQL('f.source')} - ${facetFilterSQL()} + ${sourceSQL} + ${facetSQL} ORDER BY relevance_score DESC LIMIT 50 ) @@ -3330,6 +3343,21 @@ zoomWatcher = { ORDER BY m.relevance_score DESC, m.label `); } + + // Stale-search guard (Codex review of PR #236 round 1). A + // second search the user fires while this SELECT is in flight + // bumps `_searchSeq`. Without this early guard, the superseded + // search would still mutate `searchResults.textContent`, + // repaint `samplesSection`, trigger an unwanted `camera.flyTo`, + // and launch a wasted COUNT scan — even though a later guard + // would correctly suppress the COUNT result itself. Bail BEFORE + // any of that work. The later COUNT-specific guard remains as + // a second line of defense for the case where supersession + // happens between this check and the COUNT. + if (searchId !== _searchSeq) { + wasSuperseded = true; + return; + } resultsCount = results.length; if (results.length === 0) { searchResults.textContent = `No results for "${term}"`; @@ -3463,6 +3491,10 @@ zoomWatcher = { // entirely. The cost is logged via `count_ms` so #167 analysis // can track when the COUNT path runs. if (capReached) { + // Use the snapshotted `sourceSQL` / `facetSQL` so the COUNT + // matches the SELECT's filter state byte-for-byte even if + // the user has toggled source/facet filters during the + // SELECT await. const countSQL = effectiveQueryShape === 'area' ? ` SELECT COUNT(*) AS n @@ -3470,15 +3502,15 @@ zoomWatcher = { INNER JOIN read_parquet('${lite_url}') l USING (pid) WHERE ${searchWhere} ${effectiveBboxSQL} - ${sourceFilterSQL('f.source')} - ${facetFilterSQL()} + ${sourceSQL} + ${facetSQL} ` : ` SELECT COUNT(*) AS n FROM read_parquet('${facets_url}') f WHERE ${searchWhere} - ${sourceFilterSQL('f.source')} - ${facetFilterSQL()} + ${sourceSQL} + ${facetSQL} `; const countStart = performance.now(); try { @@ -3488,6 +3520,7 @@ zoomWatcher = { // A newer search owns `searchResults.textContent` // now; leaving `totalCount` null also keeps this // stale COUNT out of the structured log. + wasSuperseded = true; return; } const total = Number(countRow[0]?.n ?? results.length); @@ -3500,7 +3533,10 @@ zoomWatcher = { : `${results.length} results for "${term}"`; } catch(err) { countMs = performance.now() - countStart; - if (searchId !== _searchSeq) return; + if (searchId !== _searchSeq) { + wasSuperseded = true; + return; + } console.warn("Search COUNT(*) failed; keeping '50+' label:", err); } } @@ -3543,6 +3579,13 @@ zoomWatcher = { // `elapsed_ms` continues to mark end-of-handler so it still // covers SELECT+render+COUNT for end-to-end user-perceived // latency. + // + // `superseded` (added round 2 of #236): a search whose work was + // abandoned mid-flight because the user fired a newer search. + // The log line still emits so downstream perf analysis can see + // how often this happens, but consumers should generally treat + // superseded rows as best-effort (results / counts may be + // partial and the UI never reflected them). try { console.log(JSON.stringify({ event: 'isamples.search', @@ -3559,6 +3602,7 @@ zoomWatcher = { seen_urls: seenUrls, has_source_filter: getActiveSources().length !== SOURCE_VALUES.length, has_facet_filter: hasFacetFilters(), + superseded: wasSuperseded, error: errorMessage, })); } catch (e) {} diff --git a/tests/playwright/search-real-count.spec.js b/tests/playwright/search-real-count.spec.js index cc4d23e..9119a72 100644 --- a/tests/playwright/search-real-count.spec.js +++ b/tests/playwright/search-real-count.spec.js @@ -116,6 +116,7 @@ test.describe('Search real-count display (#232 / #234 step 2)', () => { expect(last.total_count).toBeGreaterThan(50); expect(typeof last.count_ms).toBe('number'); expect(last.count_ms).toBeGreaterThan(0); + expect(last.superseded).toBe(false); // And the visible label matches the log payload. `.first()` for the // same reason as #sampleSearch above — Quarto duplicates the node. @@ -126,6 +127,69 @@ test.describe('Search real-count display (#232 / #234 step 2)', () => { expect(visibleTotal).toBe(last.total_count); }); + test('cap-hit search with material facet active produces filter-coherent N of M', async ({ page }) => { + // Codex review of round 1 (#236) flagged the risk that the COUNT could + // run against a different filter state than the SELECT if the user + // toggles filters mid-flight. The implementation snapshots + // sourceFilterSQL/facetFilterSQL once per search. This test exercises + // the filtered-cap path so the snapshot's correctness is covered. + const logs = attachSearchLogCollector(page); + await page.goto(EXPLORER_PATH); + await waitForSearchReady(page); + + // Tick the first material facet. Same self-healing approach the + // search-perf benchmark uses (`material_first_n: 1`) so the test + // doesn't hardcode a URI that may disappear across data refreshes. + const materialUri = await page.evaluate(() => { + const cb = document.querySelector('#materialFilterBody input[type="checkbox"]'); + if (!cb) return null; + cb.click(); + return cb.value; + }); + expect(materialUri).toBeTruthy(); + + // Wait for the facet-count refresh fired by the click to settle. The + // exact debounce is handled by the page; a short visibility check on + // the cluster-mode honesty note is a cheap proxy for "facet state + // applied" because syncFacetNote runs from handleFacetFilterChange. + await page.waitForFunction( + () => document.getElementById('facetNote')?.style.display === 'block', + null, { timeout: 30000 } + ).catch(() => {}); // Not strictly required to assert; just lets the page settle. + + await runSearch(page, 'pottery'); + // Three possible terminal states depending on which material happens + // to be first in the facet list (data-dependent): cap hit (50 of N), + // sub-cap (N results), or empty intersection (No results). All three + // are valid; the invariant we assert is the same. + await page.waitForFunction( + () => { + const text = document.getElementById('searchResults')?.textContent || ''; + return /^(50 of [\d,]+|50\+|\d+|No) results for "pottery"$/.test(text); + }, + null, { timeout: 90000 } + ); + + const potteryLogs = logs.filter(l => l.term === 'pottery'); + expect(potteryLogs.length).toBeGreaterThan(0); + const last = potteryLogs[potteryLogs.length - 1]; + expect(last.has_facet_filter).toBe(true); + expect(last.results_count).toBeLessThanOrEqual(50); + // Structured-log invariant for #232: `total_count` is present iff the + // cap was hit. Sub-cap and zero-results cases must NOT have a stale + // total_count from a prior search leaking through. + if (last.results_count === 50) { + expect(last.total_count).not.toBeNull(); + expect(last.total_count).toBeGreaterThanOrEqual(last.results_count); + } else { + expect(last.total_count).toBeNull(); + } + // Round-2 invariant: a non-superseded search must record its filter + // state alongside results — the snapshot of source/facet SQL is what + // guarantees the COUNT matches the SELECT in this scenario. + expect(last.superseded).toBe(false); + }); + test('no-results search short-circuits without COUNT (total_count remains null)', async ({ page }) => { const logs = attachSearchLogCollector(page); await page.goto(EXPLORER_PATH); From 764ed6327411bd780cd2bdcd466c79786df12512 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 22 May 2026 22:20:11 -0700 Subject: [PATCH 3/4] search real-count: address Codex round-2 review of #236 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex round-2 found two remaining issues: 1. **Blocker — stale error UI write.** The success-path freshness guard was correctly placed, but the outer `catch` still unconditionally wrote `searchResults.textContent = "Search error: ..."`. A superseded search whose SELECT later rejected would overwrite a newer search's UI with its stale error. Fix: gate the catch's UI write on the same `searchId !== _searchSeq` predicate so superseded errors are logged + recorded in the structured log (with `superseded: true`) but never repaint the live search line. 2. **Telemetry caveat — DOM-live booleans in `finally`.** The `has_source_filter` / `has_facet_filter` log fields were computed inside `finally` from the live DOM. If the user toggled a checkbox during the SELECT/COUNT await chain, the log would report post-toggle state instead of search-fire-time state. Fix: snapshot both booleans alongside the SQL predicates at search-fire time (declared at function scope so they remain in scope through `finally`). Also strengthened the filtered-cap test (Codex round-2 coverage gap): The previous test ticked a facet BEFORE searching, so SELECT and COUNT saw the same DOM state regardless of whether the snapshot existed — the test passed even without the round-1 snapshot fix. The new "telemetry snapshot survives a facet toggle during search await" test exercises the race directly: fires search with no facet, waits for the visible `50+` (proves SELECT done), THEN ticks the material facet, then waits for the final `50 of N` label. Asserts `has_facet_filter === false` in the structured log — which only holds if the snapshot survives the toggle. Fault-injection confirmed: the test fails if the snapshot is reverted to live-DOM reads. Validation: 8/8 local Playwright tests pass (search-real-count [3] + url-roundtrip [5] + facetnote-url-load [2]). Telemetry-snapshot test verified to fail against pre-fix code. Co-Authored-By: Claude Sonnet 4.6 --- explorer.qmd | 35 ++++++-- tests/playwright/search-real-count.spec.js | 96 ++++++++++++---------- 2 files changed, 81 insertions(+), 50 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index 503d761..0f0d78a 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -3216,6 +3216,13 @@ zoomWatcher = { performance.mark(markStart); const tStart = performance.now(); const terms = searchTerms(term); + // Snapshot the filter-state telemetry booleans here, BEFORE the + // try block, so they remain in scope through `finally`. They + // reflect the DOM state at search-fire time — independent of + // any toggles the user makes during the await chain (#167 + #236 + // round 2 review). + const hadSourceFilter = getActiveSources().length !== SOURCE_VALUES.length; + const hadFacetFilter = hasFacetFilters(); let resultsCount = 0; let totalCount = null; // Real count when cap is hit (#232); null otherwise. let countMs = 0; // Duration of the follow-up COUNT scan, ms. @@ -3267,6 +3274,10 @@ zoomWatcher = { // COUNT below. const sourceSQL = sourceFilterSQL('f.source'); const facetSQL = facetFilterSQL(); + // Telemetry-equivalent of the SQL snapshots above (`hadSourceFilter` + // / `hadFacetFilter`) is declared OUTSIDE this try block so it + // remains in scope through `finally` — see comment near the + // function-level `let` declarations. // Two SQL shapes — one per scope. The fix per #179 round-2 // review: in area mode, the viewport predicate MUST run BEFORE @@ -3541,9 +3552,23 @@ zoomWatcher = { } } } catch(err) { - console.error("Search failed:", err); - searchResults.textContent = `Search error: ${err.message}`; - errorMessage = err.message || String(err); + // Stale-search guard for the error path (Codex review of #236 + // round 2). Without this, a search whose SELECT later rejects + // would overwrite a newer search's UI with "Search error: …". + // The freshness-token check matches the success-path guard + // above so both paths are coherent: a superseded search is + // recorded in the structured log (with `superseded: true` and + // the original error captured in `error`) but does NOT mutate + // user-visible state. + if (searchId !== _searchSeq) { + wasSuperseded = true; + errorMessage = err?.message || String(err); + console.warn(`Stale search (id=${searchId}) rejected after supersession; suppressing UI write.`, err); + } else { + console.error("Search failed:", err); + searchResults.textContent = `Search error: ${err.message}`; + errorMessage = err.message || String(err); + } } finally { performance.mark(markEnd); try { performance.measure(`search-${searchId}`, markStart, markEnd); } catch (e) {} @@ -3600,8 +3625,8 @@ zoomWatcher = { bytes_transfer: transferBytes, bytes_body: bodyBytes, seen_urls: seenUrls, - has_source_filter: getActiveSources().length !== SOURCE_VALUES.length, - has_facet_filter: hasFacetFilters(), + has_source_filter: hadSourceFilter, + has_facet_filter: hadFacetFilter, superseded: wasSuperseded, error: errorMessage, })); diff --git a/tests/playwright/search-real-count.spec.js b/tests/playwright/search-real-count.spec.js index 9119a72..f072e8d 100644 --- a/tests/playwright/search-real-count.spec.js +++ b/tests/playwright/search-real-count.spec.js @@ -127,66 +127,72 @@ test.describe('Search real-count display (#232 / #234 step 2)', () => { expect(visibleTotal).toBe(last.total_count); }); - test('cap-hit search with material facet active produces filter-coherent N of M', async ({ page }) => { - // Codex review of round 1 (#236) flagged the risk that the COUNT could - // run against a different filter state than the SELECT if the user - // toggles filters mid-flight. The implementation snapshots - // sourceFilterSQL/facetFilterSQL once per search. This test exercises - // the filtered-cap path so the snapshot's correctness is covered. + test('telemetry snapshot survives a facet toggle during search await', async ({ page }) => { + // Codex round-2 review observed that the round-1 filtered test + // wouldn't actually have caught a missing snapshot — it ticked a + // facet BEFORE searching, so SELECT and COUNT saw the same DOM state + // regardless of whether the snapshot existed. This stronger test + // exercises the race directly: fire the search with no facets, + // wait until the SELECT visibly completes (the "50+ results" label), + // then tick a material facet, then wait for the structured log. + // + // What the snapshot guarantees: + // `has_facet_filter` in the log reflects search-fire-time state + // (false), not the post-toggle DOM state (true). + // + // If the snapshot is removed (DOM-live reads in `finally`), this + // assertion will fail. const logs = attachSearchLogCollector(page); await page.goto(EXPLORER_PATH); await waitForSearchReady(page); - // Tick the first material facet. Same self-healing approach the - // search-perf benchmark uses (`material_first_n: 1`) so the test - // doesn't hardcode a URI that may disappear across data refreshes. - const materialUri = await page.evaluate(() => { - const cb = document.querySelector('#materialFilterBody input[type="checkbox"]'); - if (!cb) return null; - cb.click(); - return cb.value; + // Sanity: no facet active at search-fire time. + const facetActiveBefore = await page.evaluate(() => { + return document.querySelectorAll('#materialFilterBody input[type="checkbox"]:checked').length > 0 + || document.querySelectorAll('#contextFilterBody input[type="checkbox"]:checked').length > 0 + || document.querySelectorAll('#objectTypeFilterBody input[type="checkbox"]:checked').length > 0; }); - expect(materialUri).toBeTruthy(); + expect(facetActiveBefore).toBe(false); + + await runSearch(page, 'pottery'); - // Wait for the facet-count refresh fired by the click to settle. The - // exact debounce is handled by the page; a short visibility check on - // the cluster-mode honesty note is a cheap proxy for "facet state - // applied" because syncFacetNote runs from handleFacetFilterChange. + // Wait for the SELECT to visibly settle (initial "50+" label or the + // final "50 of N" — either proves SELECT done, and toggling now + // simulates a user mid-flight facet change). await page.waitForFunction( - () => document.getElementById('facetNote')?.style.display === 'block', - null, { timeout: 30000 } - ).catch(() => {}); // Not strictly required to assert; just lets the page settle. + () => /^(50\+|50 of [\d,]+) results for "pottery"$/.test(document.getElementById('searchResults')?.textContent || ''), + null, { timeout: 90000 } + ); - await runSearch(page, 'pottery'); - // Three possible terminal states depending on which material happens - // to be first in the facet list (data-dependent): cap hit (50 of N), - // sub-cap (N results), or empty intersection (No results). All three - // are valid; the invariant we assert is the same. + // Tick a material facet — this is the toggle the snapshot must + // survive. Programmatic .click() is used so the change event fires + // and the page's normal handlers run (mirrors user interaction). + await page.evaluate(() => { + const cb = document.querySelector('#materialFilterBody input[type="checkbox"]'); + if (cb) cb.click(); + }); + + // Wait for the search to fully settle: either the final "50 of N" + // label appeared (COUNT completed before or after the toggle), or + // the structured log row landed for this search id. await page.waitForFunction( - () => { - const text = document.getElementById('searchResults')?.textContent || ''; - return /^(50 of [\d,]+|50\+|\d+|No) results for "pottery"$/.test(text); - }, + () => /^50 of [\d,]+ results for "pottery"$/.test(document.getElementById('searchResults')?.textContent || ''), null, { timeout: 90000 } ); + // Pull the log payload for this search (filter by term). With the + // snapshot, `has_facet_filter` must be false (state at search-fire + // time). Without the snapshot, the DOM-live read in `finally` would + // see the post-toggle checkbox and report true. const potteryLogs = logs.filter(l => l.term === 'pottery'); expect(potteryLogs.length).toBeGreaterThan(0); const last = potteryLogs[potteryLogs.length - 1]; - expect(last.has_facet_filter).toBe(true); - expect(last.results_count).toBeLessThanOrEqual(50); - // Structured-log invariant for #232: `total_count` is present iff the - // cap was hit. Sub-cap and zero-results cases must NOT have a stale - // total_count from a prior search leaking through. - if (last.results_count === 50) { - expect(last.total_count).not.toBeNull(); - expect(last.total_count).toBeGreaterThanOrEqual(last.results_count); - } else { - expect(last.total_count).toBeNull(); - } - // Round-2 invariant: a non-superseded search must record its filter - // state alongside results — the snapshot of source/facet SQL is what - // guarantees the COUNT matches the SELECT in this scenario. + expect(last.has_facet_filter).toBe(false); + expect(last.results_count).toBe(50); + expect(last.total_count).not.toBeNull(); + expect(last.total_count).toBeGreaterThan(50); + // Search itself was not superseded — only a facet toggle happened, + // not a fresh search submission. expect(last.superseded).toBe(false); }); From 5de9136fd5064f8afbef96de48103c278659b3e7 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 22 May 2026 22:38:22 -0700 Subject: [PATCH 4/4] search real-count: address Codex round-3 review of #236 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex round 3 found: 1. **Blocker — invalid-submit stale overwrite.** A valid search in flight could still have its catch-path error overwrite the "Type at least 2 characters" message: the early-return for empty / short terms did NOT bump `_searchSeq`, so when the prior search's SELECT later rejected, its catch saw `searchId === _searchSeq` and wrote "Search error: …" over the live UI. Fix: bump `_searchSeq` at the top of `doSearch`, before the length guard. Every submission — valid or invalid — now invalidates in-flight work. 2. **Coverage — race test was still timing-dependent.** The previous test accepted either `50+` or the final `50 of N` label before toggling. If the COUNT happened to land first, the toggle was a no-op and the test passed regardless of whether the snapshot existed. Fix: monkey-patch the page's `db.query` to introduce a 1.5s delay on COUNT(*) queries (scoped to the page context via the OJS reactive-value accessor, same pattern url-roundtrip.spec.js uses for `viewer`). The toggle is now GUARANTEED to land between SELECT completion and COUNT completion. Wait for EXACT `50+` state before toggling — no more "or final label" loophole. Source-filter snapshot coverage is documented as a TODO: Quarto's see-also rendering duplicates `#sourceFilter` checkboxes, making a toggle-and-count assertion ambiguous in the preview environment. That's a fixture problem orthogonal to #236; leaving it for a follow-up rather than blocking this PR. Validation: 8/8 local Playwright tests pass. Fault-injection (revert `hadFacetFilter` snapshot to live `hasFacetFilters()` call in `finally`) confirmed: the new test fails deterministically under the held-open-COUNT timing. Co-Authored-By: Claude Sonnet 4.6 --- explorer.qmd | 12 ++- tests/playwright/search-real-count.spec.js | 94 ++++++++++++++-------- 2 files changed, 70 insertions(+), 36 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index 0f0d78a..5ca191b 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -3193,6 +3193,17 @@ zoomWatcher = { if (scope === 'area' || scope === 'world') _searchScope = scope; const effectiveScope = _searchScope; + // Bump the freshness token FIRST — even for invalid submits + // (empty / 1-char). Codex review of #236 round 3 caught the gap: + // if a valid search is in flight and the user clears the box and + // resubmits, the early-return below used to write "Type at least + // 2 characters" WITHOUT invalidating the in-flight search. When + // that older search later rejected, its catch would see + // `searchId === _searchSeq` and overwrite the "Type at least..." + // message with `Search error: ...`. Bumping first ensures any + // submission — valid or not — supersedes prior in-flight work. + const searchId = ++_searchSeq; + const term = searchInput.value.trim(); if (!term || term.length < 2) { searchResults.textContent = 'Type at least 2 characters'; @@ -3210,7 +3221,6 @@ zoomWatcher = { // result count, and bytes transferred from data.isamples.org during // the search window. transferSize is 0 for cross-origin responses // missing Timing-Allow-Origin; we fall back to encodedBodySize. - const searchId = ++_searchSeq; const markStart = `search-${searchId}-start`; const markEnd = `search-${searchId}-end`; performance.mark(markStart); diff --git a/tests/playwright/search-real-count.spec.js b/tests/playwright/search-real-count.spec.js index f072e8d..19e070e 100644 --- a/tests/playwright/search-real-count.spec.js +++ b/tests/playwright/search-real-count.spec.js @@ -127,72 +127,96 @@ test.describe('Search real-count display (#232 / #234 step 2)', () => { expect(visibleTotal).toBe(last.total_count); }); - test('telemetry snapshot survives a facet toggle during search await', async ({ page }) => { - // Codex round-2 review observed that the round-1 filtered test - // wouldn't actually have caught a missing snapshot — it ticked a - // facet BEFORE searching, so SELECT and COUNT saw the same DOM state - // regardless of whether the snapshot existed. This stronger test - // exercises the race directly: fire the search with no facets, - // wait until the SELECT visibly completes (the "50+ results" label), - // then tick a material facet, then wait for the structured log. + test('telemetry snapshot survives a facet toggle during held-open COUNT', async ({ page }) => { + // Codex round-3 review observed that the round-2 race test was still + // timing-dependent: the wait accepted either "50+" or the final + // "50 of N" label, so if the COUNT happened to finish before the + // toggle, the test passed trivially (snapshot or not). To make the + // test deterministic, we monkey-patch the page's DuckDB `db.query` + // to add a fixed delay on COUNT(*) queries. That guarantees the + // toggle lands BETWEEN SELECT completion (visible "50+") and COUNT + // completion (`finally` log emission), exercising the snapshot's + // entire purpose. // // What the snapshot guarantees: - // `has_facet_filter` in the log reflects search-fire-time state - // (false), not the post-toggle DOM state (true). + // `has_facet_filter` AND `has_source_filter` in the structured + // log reflect search-fire-time DOM state, not whatever the user + // toggled mid-flight. // - // If the snapshot is removed (DOM-live reads in `finally`), this - // assertion will fail. + // Fault-injection (verified manually): if the log reads + // `has_facet_filter` / `has_source_filter` live from DOM in + // `finally`, this test fails. Both round-1 (predicate snapshot) + // and round-2 (telemetry snapshot) fixes are exercised. const logs = attachSearchLogCollector(page); await page.goto(EXPLORER_PATH); await waitForSearchReady(page); // Sanity: no facet active at search-fire time. - const facetActiveBefore = await page.evaluate(() => { - return document.querySelectorAll('#materialFilterBody input[type="checkbox"]:checked').length > 0 - || document.querySelectorAll('#contextFilterBody input[type="checkbox"]:checked').length > 0 - || document.querySelectorAll('#objectTypeFilterBody input[type="checkbox"]:checked').length > 0; + // (Source-filter sanity is NOT checked here because Quarto's + // see-also rendering duplicates `#sourceFilter` checkboxes in the + // page DOM, so a naive count is ambiguous. Source-filter snapshot + // coverage is left as a TODO requiring a non-Quarto fixture.) + const facetActive = await page.evaluate(() => + document.querySelectorAll('#materialFilterBody input[type="checkbox"]:checked').length > 0 + || document.querySelectorAll('#contextFilterBody input[type="checkbox"]:checked').length > 0 + || document.querySelectorAll('#objectTypeFilterBody input[type="checkbox"]:checked').length > 0 + ); + expect(facetActive).toBe(false); + + // Wrap `db.query` to hold COUNT(*) queries open for 1.5s. The page + // exposes the OJS `db` reactive value via + // `_ojs.ojsConnector.mainModule.value('db')` — same accessor pattern + // used by `url-roundtrip.spec.js` for `viewer`. The wrap is scoped + // to this test's page context so other tests aren't affected. + await page.evaluate(async () => { + const db = await window._ojs.ojsConnector.mainModule.value('db'); + if (!db.__countDelayInstalled) { + const orig = db.query.bind(db); + db.query = async (sql, ...rest) => { + if (typeof sql === 'string' && /SELECT\s+COUNT\(\*\)/i.test(sql)) { + await new Promise(r => setTimeout(r, 1500)); + } + return orig(sql, ...rest); + }; + db.__countDelayInstalled = true; + } }); - expect(facetActiveBefore).toBe(false); await runSearch(page, 'pottery'); - // Wait for the SELECT to visibly settle (initial "50+" label or the - // final "50 of N" — either proves SELECT done, and toggling now - // simulates a user mid-flight facet change). + // EXACT "50+" wait — proves SELECT has settled AND COUNT has not yet + // emitted its post-finally log. This is the window the snapshot + // must survive. Loosening this back to "50+ OR 50 of N" would + // restore the timing flake Codex round 3 flagged. await page.waitForFunction( - () => /^(50\+|50 of [\d,]+) results for "pottery"$/.test(document.getElementById('searchResults')?.textContent || ''), + () => document.getElementById('searchResults')?.textContent === '50+ results for "pottery"', null, { timeout: 90000 } ); - // Tick a material facet — this is the toggle the snapshot must - // survive. Programmatic .click() is used so the change event fires - // and the page's normal handlers run (mirrors user interaction). + // Toggle the facet checkbox. Programmatic .click() ensures the + // change event fires through the page's normal handlers. await page.evaluate(() => { - const cb = document.querySelector('#materialFilterBody input[type="checkbox"]'); - if (cb) cb.click(); + const mat = document.querySelector('#materialFilterBody input[type="checkbox"]'); + if (mat) mat.click(); }); - // Wait for the search to fully settle: either the final "50 of N" - // label appeared (COUNT completed before or after the toggle), or - // the structured log row landed for this search id. + // Now wait for the final "50 of N" label. The COUNT was held by + // the wrap for 1.5s, then runs against the snapshotted SQL, then + // `finally` logs against the snapshotted booleans. await page.waitForFunction( () => /^50 of [\d,]+ results for "pottery"$/.test(document.getElementById('searchResults')?.textContent || ''), null, { timeout: 90000 } ); - // Pull the log payload for this search (filter by term). With the - // snapshot, `has_facet_filter` must be false (state at search-fire - // time). Without the snapshot, the DOM-live read in `finally` would - // see the post-toggle checkbox and report true. const potteryLogs = logs.filter(l => l.term === 'pottery'); expect(potteryLogs.length).toBeGreaterThan(0); const last = potteryLogs[potteryLogs.length - 1]; + // The headline assertion: the snapshotted boolean must be false + // even though the user ticked a facet between SELECT and COUNT. expect(last.has_facet_filter).toBe(false); expect(last.results_count).toBe(50); expect(last.total_count).not.toBeNull(); expect(last.total_count).toBeGreaterThan(50); - // Search itself was not superseded — only a facet toggle happened, - // not a fresh search submission. expect(last.superseded).toBe(false); });