From c95faa9d67471edd6f8de7be054b5a4cbf3d85cf Mon Sep 17 00:00:00 2001 From: ylm Date: Mon, 29 Jun 2026 13:39:18 -0400 Subject: [PATCH] Indexer skips archived realms; archive cancels in-flight indexing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The full-reindex sweep source (getFullReindexRealmUrls) excludes any realm with realm_metadata.archived_at IS NOT NULL, so the Grafana /_full-reindex endpoint and the post-deployment hook never enqueue from-scratch-index jobs for archived realms. fetchAllRealmsWithOwners stays unchanged: full-reindex uses it as a lookup (URL → username), not as a source, and the unarchive handler legitimately needs to reach a just-archived row to enqueue its restore reindex. handle-archive-realm calls cancelAllJobsInConcurrencyGroup on `indexing:${realmURL}` after archiveRealm. This mirrors the realm-level cancel-jobs endpoint (Realm.handleCancelJobsRequest): in-flight from-scratch / incremental-index jobs are marked rejected, the pending queue is dropped, reservations are released, and a NOTIFY jobs_finished fans out to peer replicas. The unarchive flow rebuilds boxel_index from disk via the existing full-reindex enqueue, so any partial work left behind by an in-flight cancellation is discarded on restore. Catalog / public realms are unaffected because the archive endpoint rejects them with 422 — they can never carry archived_at. Tests: - full-reindex-test: getFullReindexRealmUrls returns only active realms across two registered realms with one archived; an unarchived realm reappears on the next call. - archive-realm-test: a pending from-scratch-index job for the realm is marked rejected once the archive endpoint runs. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../handlers/handle-archive-realm.ts | 11 ++++ .../lib/full-reindex-realm-urls.ts | 9 +++- .../realm-server/tests/full-reindex-test.ts | 53 ++++++++++++++++++- .../server-endpoints/archive-realm-test.ts | 40 ++++++++++++++ 4 files changed, 111 insertions(+), 2 deletions(-) diff --git a/packages/realm-server/handlers/handle-archive-realm.ts b/packages/realm-server/handlers/handle-archive-realm.ts index bf278ceae83..93ee3d3cdf6 100644 --- a/packages/realm-server/handlers/handle-archive-realm.ts +++ b/packages/realm-server/handlers/handle-archive-realm.ts @@ -1,6 +1,7 @@ import type Koa from 'koa'; import { archiveRealm, + cancelAllJobsInConcurrencyGroup, createResponse, logger, SupportedMimeType, @@ -32,6 +33,16 @@ export default function handleArchiveRealm({ try { await archiveRealm(dbAdapter, new URL(realmURL)); + // Stop the realm's indexer: cancel any in-flight from-scratch / + // incremental-index job and drop the pending queue for this realm's + // concurrency group. Mirrors the realm-level cancel-jobs endpoint + // (Realm.handleCancelJobsRequest). `cancelAllJobsInConcurrencyGroup` + // marks jobs rejected and emits NOTIFY jobs_finished so peer + // replicas evict job-scoped search-cache rows. The unarchive flow + // rebuilds boxel_index from disk via the full-reindex enqueue, so + // any partial work left behind by an in-flight cancellation is + // discarded on restore. + await cancelAllJobsInConcurrencyGroup(dbAdapter, `indexing:${realmURL}`); let response = createResponse({ body: JSON.stringify( diff --git a/packages/realm-server/lib/full-reindex-realm-urls.ts b/packages/realm-server/lib/full-reindex-realm-urls.ts index 91c00dc885f..c9e0f96a2b0 100644 --- a/packages/realm-server/lib/full-reindex-realm-urls.ts +++ b/packages/realm-server/lib/full-reindex-realm-urls.ts @@ -5,9 +5,16 @@ type RealmRegistryRow = { url: string; }; +// The system-wide full-reindex source list. Archived realms are sealed and +// their contents can't drift while archived, so the sweep skips them — a +// realm rejoins this list when unarchive clears archived_at, and the +// unarchive handler separately enqueues the one-time reindex that brings +// boxel_index back up to date. export async function getFullReindexRealmUrls(dbAdapter: DBAdapter) { let rows = (await query(dbAdapter, [ - `SELECT url FROM realm_registry ORDER BY url`, + `SELECT url FROM realm_registry + WHERE url NOT IN (SELECT url FROM realm_metadata WHERE archived_at IS NOT NULL) + ORDER BY url`, ])) as RealmRegistryRow[]; return rows.map(({ url }) => url); diff --git a/packages/realm-server/tests/full-reindex-test.ts b/packages/realm-server/tests/full-reindex-test.ts index 7fb7db6cb1e..41770a2ca40 100644 --- a/packages/realm-server/tests/full-reindex-test.ts +++ b/packages/realm-server/tests/full-reindex-test.ts @@ -10,13 +10,19 @@ import type { VirtualNetwork, } from '@cardstack/runtime-common'; import { + archiveRealm, fullReindex, insertPermissions, logger, + unarchiveRealm, uuidv4, } from '@cardstack/runtime-common'; -import { upsertPublishedRealmInRegistry } from '../lib/realm-registry-writes.ts'; +import { getFullReindexRealmUrls } from '../lib/full-reindex-realm-urls.ts'; +import { + insertSourceRealmInRegistry, + upsertPublishedRealmInRegistry, +} from '../lib/realm-registry-writes.ts'; import { setupDB } from './helpers/index.ts'; module(basename(import.meta.filename), function (hooks) { @@ -174,4 +180,49 @@ module(basename(import.meta.filename), function (hooks) { 'no jobs are enqueued for bot-owned realms', ); }); + + module('getFullReindexRealmUrls', function () { + async function seedSourceRealm(realmURL: string) { + await insertSourceRealmInRegistry(dbAdapter, { + url: realmURL, + diskId: uuidv4(), + ownerUsername: '@owner:localhost', + }); + } + + test('returns only active realms from realm_registry', async function (assert) { + const activeA = 'http://example.com/active-a/'; + const activeB = 'http://example.com/active-b/'; + const archived = 'http://example.com/archived/'; + + await seedSourceRealm(activeA); + await seedSourceRealm(activeB); + await seedSourceRealm(archived); + await archiveRealm(dbAdapter, new URL(archived)); + + let urls = await getFullReindexRealmUrls(dbAdapter); + assert.deepEqual( + [...urls].sort(), + [activeA, activeB].sort(), + 'archived realms are excluded from the sweep source', + ); + }); + + test('an unarchived realm returns to the sweep source', async function (assert) { + const realmURL = 'http://example.com/restored/'; + + await seedSourceRealm(realmURL); + await archiveRealm(dbAdapter, new URL(realmURL)); + assert.notOk( + (await getFullReindexRealmUrls(dbAdapter)).includes(realmURL), + 'archived realm is absent', + ); + + await unarchiveRealm(dbAdapter, new URL(realmURL)); + assert.ok( + (await getFullReindexRealmUrls(dbAdapter)).includes(realmURL), + 'unarchived realm reappears', + ); + }); + }); }); diff --git a/packages/realm-server/tests/server-endpoints/archive-realm-test.ts b/packages/realm-server/tests/server-endpoints/archive-realm-test.ts index f63c4aee2e5..435073cb1d4 100644 --- a/packages/realm-server/tests/server-endpoints/archive-realm-test.ts +++ b/packages/realm-server/tests/server-endpoints/archive-realm-test.ts @@ -3,8 +3,10 @@ const { module, test } = QUnit; import { basename } from 'path'; import { v4 as uuidv4 } from 'uuid'; import { + FROM_SCRATCH_JOB_TIMEOUT_SEC, insertPermissions, isRealmArchived, + systemInitiatedPriority, type RealmPermissions, } from '@cardstack/runtime-common'; import { realmSecretSeed } from '../helpers/index.ts'; @@ -119,6 +121,44 @@ module(`server-endpoints/${basename(import.meta.filename)}`, function () { ); }); + test('POST /_archive-realm cancels pending indexing jobs for the realm', async function (assert) { + const owner = '@archive-owner:localhost'; + const realmURL = makeRealmURL(); + await seedSourceRealm(realmURL, { + [owner]: ['read', 'write', 'realm-owner'], + }); + + let pending = await context.publisher.publish({ + jobType: 'from-scratch-index', + concurrencyGroup: `indexing:${realmURL}`, + timeout: FROM_SCRATCH_JOB_TIMEOUT_SEC, + priority: systemInitiatedPriority, + args: { + realmURL, + realmUsername: 'archive-owner', + clearLastModified: false, + }, + }); + + let response = await context.request + .post('/_archive-realm') + .set('Accept', 'application/vnd.api+json') + .set('Content-Type', 'application/json') + .set('Authorization', authHeader(owner)) + .send(JSON.stringify({ data: { type: 'realm', id: realmURL } })); + assert.strictEqual(response.status, 200, 'HTTP 200 status'); + + let rows = (await context.dbAdapter.execute( + `SELECT status FROM jobs WHERE id = $1`, + { bind: [pending.id] }, + )) as { status: string }[]; + assert.strictEqual( + rows[0]?.status, + 'rejected', + 'the pending indexing job is marked rejected', + ); + }); + test('POST /_archive-realm returns 403 for a non-owner', async function (assert) { const owner = '@archive-owner:localhost'; const intruder = '@intruder:localhost';