From 9a9ca9ceff4a286efdf056dbfee31b7c3289cbe9 Mon Sep 17 00:00:00 2001 From: Jarrod Servilla Date: Mon, 8 Jun 2026 20:10:02 -0400 Subject: [PATCH 1/2] feat(scryfall): ingest curated Japanese collector printings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enrich the bulk ingest with Japanese-exclusive-art and Kamigawa full-art printings pulled from the Scryfall search API. These link to existing English Cards by name, so no new Card rows or oracle/legality drift — only Printings gain `lang` and `printed_name`. Key changes: - Add `lang` + `printed_name` to Printing model and map them from the Scryfall card payload (both feed the version hash) - Add `ingestCollectorPrintings` step running after bulk upsert and before the checkpoint commit, paginating search and deduping by scryfallId - Add JP_COLLECTOR_QUERIES as the curated source of search queries - Treat a 404 search response as an empty result, not an error - Fix token upsert target table: card_token -> card_tokens (@@map'd plural; singular threw 42P01 in prod) --- lib/scryfall/__tests__/map.test.ts | 27 ++++ lib/scryfall/jp-collector-queries.ts | 18 +++ lib/scryfall/map.ts | 2 + lib/scryfall/schema.ts | 1 + .../migration.sql | 3 + prisma/schema.prisma | 2 + workflows/scryfall/__tests__/ingest.test.ts | 9 ++ workflows/scryfall/__tests__/steps.test.ts | 128 ++++++++++++++++++ workflows/scryfall/ingest.ts | 12 ++ workflows/scryfall/steps.ts | 94 ++++++++++++- 10 files changed, 293 insertions(+), 3 deletions(-) create mode 100644 lib/scryfall/jp-collector-queries.ts create mode 100644 prisma/migrations/20260608223646_printing_lang/migration.sql diff --git a/lib/scryfall/__tests__/map.test.ts b/lib/scryfall/__tests__/map.test.ts index 2b0f989..18fc6a9 100644 --- a/lib/scryfall/__tests__/map.test.ts +++ b/lib/scryfall/__tests__/map.test.ts @@ -327,6 +327,33 @@ describe("toPrintingCreate", () => { expect(a.version).toBe(b.version); }); + it("defaults lang from card.lang and printedName null when absent", () => { + const p = toPrintingCreate(1, makeCard()); + expect(p.lang).toBe("en"); + expect(p.printedName).toBeNull(); + }); + + it("maps lang and printedName from a Japanese printing", () => { + const p = toPrintingCreate( + 1, + makeCard({ lang: "ja", printed_name: "対抗呪文" }), + ); + expect(p.lang).toBe("ja"); + expect(p.printedName).toBe("対抗呪文"); + }); + + it("changing lang changes the version", () => { + const a = toPrintingCreate(1, makeCard()); + const b = toPrintingCreate(1, makeCard({ lang: "ja" })); + expect(a.version).not.toBe(b.version); + }); + + it("changing printedName changes the version", () => { + const a = toPrintingCreate(1, makeCard({ printed_name: "稲妻" })); + const b = toPrintingCreate(1, makeCard({ printed_name: "対抗呪文" })); + expect(a.version).not.toBe(b.version); + }); + it("changing setCode changes the version", () => { const a = toPrintingCreate(1, makeCard()); const b = toPrintingCreate(1, makeCard({ set: "other" })); diff --git a/lib/scryfall/jp-collector-queries.ts b/lib/scryfall/jp-collector-queries.ts new file mode 100644 index 0000000..c506695 --- /dev/null +++ b/lib/scryfall/jp-collector-queries.ts @@ -0,0 +1,18 @@ +// Source of truth for which Japanese collector printings the ingest enriches +// after the English bulk upsert. Each entry is a Scryfall search query run with +// `unique=prints`; results across queries are deduped by `scryfallId` before +// upsert. Spot-check a query's result count before adding it. +// +// The art tag does the heavy lifting — `art:japanese-exclusive-art` already +// isolates Japanese-exclusive-art printings across every set (sta, soa, war, +// iko, sld, snc, ...), so new exclusive-art sets are picked up with no +// per-set maintenance. NEO is a named supplement because its soft-glow / +// ukiyo-e treatments reuse the English art (a finish/frame, not exclusive art) +// and so fall outside the art tag. +export const JP_COLLECTOR_QUERIES = [ + // Cross-set Japanese-exclusive-art collector printings. `-is:promo` drops the + // scattered promo sets (Player Rewards, Worlds, premium-foil). + "art:japanese-exclusive-art lang:ja -is:promo", + // Kamigawa soft-glow / ukiyo-e reuse English art, so the art tag misses them. + "set:neo lang:ja is:fullart", +] as const; diff --git a/lib/scryfall/map.ts b/lib/scryfall/map.ts index c093af5..7a1315b 100644 --- a/lib/scryfall/map.ts +++ b/lib/scryfall/map.ts @@ -142,6 +142,8 @@ export function toPrintingCreate( priceEurFoil: parsePrice(card.prices?.eur_foil), priceEurEtched: parsePrice(card.prices?.eur_etched), rarity: normalizeRarity(card.rarity), + lang: card.lang, + printedName: card.printed_name ?? null, }; return { ...base, version: hashObject(base) }; diff --git a/lib/scryfall/schema.ts b/lib/scryfall/schema.ts index cc41be4..e49bdbf 100644 --- a/lib/scryfall/schema.ts +++ b/lib/scryfall/schema.ts @@ -48,6 +48,7 @@ export const ScryfallCardSchema = z layout: z.string(), games: z.array(z.string()), name: z.string().min(1), + printed_name: z.string().optional(), type_line: z.string().optional(), oracle_text: z.string().optional(), mana_cost: z.string().optional(), diff --git a/prisma/migrations/20260608223646_printing_lang/migration.sql b/prisma/migrations/20260608223646_printing_lang/migration.sql new file mode 100644 index 0000000..bf313be --- /dev/null +++ b/prisma/migrations/20260608223646_printing_lang/migration.sql @@ -0,0 +1,3 @@ +-- AlterTable +ALTER TABLE "printing" ADD COLUMN "lang" TEXT NOT NULL DEFAULT 'en', +ADD COLUMN "printed_name" TEXT; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 4790c64..252d380 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -148,6 +148,8 @@ model Printing { priceEurFoil Decimal? @map("price_eur_foil") @db.Decimal(10, 2) priceEurEtched Decimal? @map("price_eur_etched") @db.Decimal(10, 2) rarity Rarity? + lang String @default("en") + printedName String? @map("printed_name") version String? deckCards DeckCard[] holdings Holding[] diff --git a/workflows/scryfall/__tests__/ingest.test.ts b/workflows/scryfall/__tests__/ingest.test.ts index 767cb21..3c44b5e 100644 --- a/workflows/scryfall/__tests__/ingest.test.ts +++ b/workflows/scryfall/__tests__/ingest.test.ts @@ -12,6 +12,7 @@ vi.mock("../steps", () => ({ releaseIngestLock: vi.fn(), downloadAndStage: vi.fn(), upsertBatch: vi.fn(), + ingestCollectorPrintings: vi.fn(), commitScryfallCheckpoint: vi.fn(), cleanupStaging: vi.fn(), })); @@ -23,6 +24,7 @@ import { downloadAndStage, fetchBulkManifest, getLastCheckpoint, + ingestCollectorPrintings, releaseIngestLock, SCRYFALL_SOURCE, upsertBatch, @@ -35,6 +37,7 @@ const mockedAcquireLock = vi.mocked(acquireIngestLock); const mockedReleaseLock = vi.mocked(releaseIngestLock); const mockedDownload = vi.mocked(downloadAndStage); const mockedUpsert = vi.mocked(upsertBatch); +const mockedIngestJp = vi.mocked(ingestCollectorPrintings); const mockedCommit = vi.mocked(commitScryfallCheckpoint); const mockedCleanup = vi.mocked(cleanupStaging); @@ -54,6 +57,7 @@ function emptyBatchStats() { beforeEach(() => { vi.clearAllMocks(); mockedUpsert.mockResolvedValue(emptyBatchStats()); + mockedIngestJp.mockResolvedValue(emptyBatchStats()); mockedCommit.mockResolvedValue(undefined); mockedCleanup.mockResolvedValue(undefined); mockedAcquireLock.mockResolvedValue(true); @@ -99,6 +103,10 @@ describe("scryfallIngestWorkflow", () => { callOrder.push("upsert"); return emptyBatchStats(); }); + mockedIngestJp.mockImplementation(async () => { + callOrder.push("ingestCollectorPrintings"); + return emptyBatchStats(); + }); mockedCommit.mockImplementation(async () => { callOrder.push("commitScryfallCheckpoint"); }); @@ -125,6 +133,7 @@ describe("scryfallIngestWorkflow", () => { "download", "upsert", "upsert", + "ingestCollectorPrintings", "commitScryfallCheckpoint", "cleanup", ]); diff --git a/workflows/scryfall/__tests__/steps.test.ts b/workflows/scryfall/__tests__/steps.test.ts index 980fdd5..cb944d9 100644 --- a/workflows/scryfall/__tests__/steps.test.ts +++ b/workflows/scryfall/__tests__/steps.test.ts @@ -12,6 +12,7 @@ import { downloadAndStage, fetchBulkManifest, getLastCheckpoint, + ingestCollectorPrintings, invalidateSearchCache, releaseIngestLock, SCRYFALL_SOURCE, @@ -872,6 +873,12 @@ describe("upsertBatch — token enrichment", () => { executeRawCallsBefore, ); expect(mockedPrisma.cardToken.upsert).not.toHaveBeenCalled(); + // Guard the table name: the CardToken model is @@map'd to `card_tokens` + // (plural). A singular `card_token` target throws 42P01 in prod. + const tokenCall = mockedPrisma.$executeRaw.mock.calls.at(-1)!; + const sql = (tokenCall[0] as readonly string[]).join(""); + expect(sql).toContain("INSERT INTO card_tokens"); + expect(sql).not.toMatch(/INSERT INTO card_token\b/); }); it("does not create CardToken rows for meld_part, meld_result, or combo_piece parts", async () => { @@ -982,6 +989,127 @@ describe("upsertBatch — token enrichment", () => { }); }); +describe("ingestCollectorPrintings", () => { + function searchResponse( + cards: ScryfallCard[], + opts: { has_more?: boolean; next_page?: string } = {}, + ): Response { + return new Response( + JSON.stringify({ + object: "list", + data: cards, + has_more: opts.has_more ?? false, + next_page: opts.next_page, + }), + { status: 200 }, + ); + } + + it("happy path: upserts JP printings linked to existing English Cards", async () => { + const jp1 = makeCard({ + id: "jp-1", + name: "Counterspell", + lang: "ja", + printed_name: "対抗呪文", + set: "sta", + collector_number: "100", + }); + const jp2 = makeCard({ + id: "jp-2", + name: "Lightning Bolt", + lang: "ja", + printed_name: "稲妻", + set: "neo", + collector_number: "200", + }); + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockResolvedValueOnce(searchResponse([jp1])) + .mockResolvedValueOnce(searchResponse([jp2])); + mockedPrisma.card.findMany.mockResolvedValue([ + { id: 1, name: "Counterspell" }, + { id: 2, name: "Lightning Bolt" }, + ] as never); + mockedPrisma.printing.findMany.mockResolvedValue([] as never); + mockedPrisma.printing.createMany.mockResolvedValue({ count: 2 } as never); + + const stats = await ingestCollectorPrintings(); + + expect(stats.printingsInserted).toBe(2); + expect(mockedPrisma.printing.createMany).toHaveBeenCalledWith( + expect.objectContaining({ + data: expect.arrayContaining([ + expect.objectContaining({ lang: "ja", printedName: "対抗呪文" }), + ]), + }), + ); + // One fetch per query (no pagination), both queries issued. + expect(fetchSpy).toHaveBeenCalledTimes(2); + fetchSpy.mockRestore(); + }); + + it("follows next_page to collect every printing across pages", async () => { + const jp1 = makeCard({ id: "jp-1", name: "A", lang: "ja" }); + const jp2 = makeCard({ id: "jp-2", name: "B", lang: "ja" }); + const fetchSpy = vi + .spyOn(globalThis, "fetch") + // query 1, page 1 → has_more + .mockResolvedValueOnce( + searchResponse([jp1], { + has_more: true, + next_page: "https://api.scryfall.com/cards/search?page=2", + }), + ) + // query 1, page 2 + .mockResolvedValueOnce(searchResponse([jp2])) + // query 2, empty + .mockResolvedValueOnce(searchResponse([])); + mockedPrisma.card.findMany.mockResolvedValue([ + { id: 1, name: "A" }, + { id: 2, name: "B" }, + ] as never); + mockedPrisma.printing.findMany.mockResolvedValue([] as never); + mockedPrisma.printing.createMany.mockResolvedValue({ count: 2 } as never); + + const stats = await ingestCollectorPrintings(); + + expect(stats.printingsInserted).toBe(2); + expect(fetchSpy).toHaveBeenCalledTimes(3); + fetchSpy.mockRestore(); + }); + + it("skips printings whose name has no matching Card", async () => { + const jp1 = makeCard({ id: "jp-1", name: "Unknown Card", lang: "ja" }); + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockResolvedValueOnce(searchResponse([jp1])) + .mockResolvedValueOnce(searchResponse([])); + mockedPrisma.card.findMany.mockResolvedValue([] as never); + + const stats = await ingestCollectorPrintings(); + + expect(stats.printingsInserted).toBe(0); + expect(mockedPrisma.printing.createMany).not.toHaveBeenCalled(); + fetchSpy.mockRestore(); + }); + + it("treats a 404 (no cards matched) as an empty result, not an error", async () => { + const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response( + JSON.stringify({ object: "error", code: "not_found" }), + { status: 404 }, + ), + ); + + const stats = await ingestCollectorPrintings(); + + expect(stats.printingsInserted).toBe(0); + // No cards collected → never reaches the Card lookup. + expect(mockedPrisma.card.findMany).not.toHaveBeenCalled(); + fetchSpy.mockRestore(); + }); +}); + describe("invalidateSearchCache", () => { it("revalidates the card-search tag exactly once", async () => { await invalidateSearchCache(); diff --git a/workflows/scryfall/ingest.ts b/workflows/scryfall/ingest.ts index fac6f43..4439f1b 100644 --- a/workflows/scryfall/ingest.ts +++ b/workflows/scryfall/ingest.ts @@ -7,6 +7,7 @@ import { downloadAndStage, fetchBulkManifest, getLastCheckpoint, + ingestCollectorPrintings, type IngestStats, releaseIngestLock, SCRYFALL_SOURCE, @@ -71,6 +72,17 @@ export async function scryfallIngestWorkflow() { stats.skipped += batchStats.skipped; } + // Enrich with curated Japanese collector printings via the search API. + // Cards are guaranteed present (bulk upsert above completed); a failure + // here throws inside the try, so the finally still releases the lock and + // cleans staging. + const jpStats = await ingestCollectorPrintings(); + stats.printingsInserted += jpStats.printingsInserted; + stats.printingsUpdated += jpStats.printingsUpdated; + stats.printingsUnchanged += jpStats.printingsUnchanged; + stats.printingsFailed += jpStats.printingsFailed; + stats.skipped += jpStats.skipped; + // Single atomic step so cache-invalidate failures don't strand the // checkpoint ahead of a stale cache. See `commitScryfallCheckpoint`. await commitScryfallCheckpoint(SCRYFALL_SOURCE, manifest.updatedAt); diff --git a/workflows/scryfall/steps.ts b/workflows/scryfall/steps.ts index e785284..2213456 100644 --- a/workflows/scryfall/steps.ts +++ b/workflows/scryfall/steps.ts @@ -13,6 +13,7 @@ import { } from "@/lib/scryfall/diff"; import { fetchWithRetry } from "@/lib/http"; import { filterCard } from "@/lib/scryfall/filter"; +import { JP_COLLECTOR_QUERIES } from "@/lib/scryfall/jp-collector-queries"; import { type CardCreateData, type PrintingCreateData, @@ -100,6 +101,10 @@ export type IngestStats = { type BatchStats = IngestStats; +// JP collector enrichment touches only Printings (Cards already exist), but it +// reuses the shared printing helpers, which write the full BatchStats shape. +type PrintingStats = BatchStats; + function emptyStats(): BatchStats { return { cardsInserted: 0, @@ -252,6 +257,87 @@ export async function commitScryfallCheckpoint( revalidateTag("card-search", "max"); } +const SCRYFALL_SEARCH_PAGE_DELAY_MS = 100; + +// Paginate a Scryfall `/cards/search` query with `unique=prints`, following +// `next_page` while `has_more`. A 404 means the query matched no cards — a +// valid empty result, not an error. ~100ms courtesy delay between pages keeps +// us within Scryfall's rate guidance. Not a "use step": it's a helper invoked +// from within the `ingestCollectorPrintings` step. +async function fetchScryfallSearch( + query: string, +): Promise { + const out: ScryfallCard[] = []; + let url: string | undefined = `https://api.scryfall.com/cards/search?q=${encodeURIComponent( + query, + )}&unique=prints`; + let firstPage = true; + while (url) { + if (!firstPage) { + await new Promise((r) => setTimeout(r, SCRYFALL_SEARCH_PAGE_DELAY_MS)); + } + firstPage = false; + const res = await fetchWithRetry(url, { + headers: { "User-Agent": USER_AGENT, Accept: "application/json" }, + }); + // Scryfall returns 404 with an `object: "error"` body when a query matches + // nothing — a legitimate empty result for a set we don't carry yet. + if (res.status === 404) return out; + if (!res.ok) throwForStatus(`scryfall search "${query}"`, res); + const body = (await res.json()) as { + data?: unknown[]; + has_more?: boolean; + next_page?: string; + }; + for (const raw of body.data ?? []) { + const parsed = parseScryfallCard(raw); + if (parsed) out.push(parsed); + } + url = body.has_more ? body.next_page : undefined; + } + return out; +} + +// Enrichment step: pull the curated JP collector printings via the search API +// and upsert them as Printings against the already-ingested English Cards. +// Runs after the bulk upsert (Cards guaranteed present) and before the +// checkpoint commit. JP printings keep `name` in English (`printed_name` holds +// the Japanese title), so each links to its existing Card by name — no new +// Card rows, no oracle/legality drift. +export async function ingestCollectorPrintings(): Promise { + "use step"; + const stats = emptyStats(); + + // Fetch every curated query, deduping by scryfallId across queries. + const byScryfallId = new Map(); + for (const query of JP_COLLECTOR_QUERIES) { + const cards = await fetchScryfallSearch(query); + for (const c of cards) { + if (!byScryfallId.has(c.id)) byScryfallId.set(c.id, c); + } + } + const cards = [...byScryfallId.values()]; + if (cards.length === 0) return stats; + + // Resolve cardIds by English name. `buildPrintings` skips any card whose name + // has no matching Card row. + const names = [...new Set(cards.map((c) => c.name))]; + const existing = await prisma.card.findMany({ + where: { name: { in: names } }, + select: { id: true, name: true }, + }); + const idByName = new Map(existing.map((r) => [r.name, r.id] as const)); + + const printings = buildPrintings(cards, idByName, stats); + if (printings.length === 0) return stats; + + const existingPrintings = await loadExistingPrintings(printings); + const diff = diffPrintings(printings, existingPrintings); + await applyPrintingWrites(diff, stats); + + return stats; +} + async function loadExistingCards( cardByName: Map, ) { @@ -404,13 +490,13 @@ async function applyPrintingWrites( ${p.priceUsd}::decimal(10,2), ${p.priceUsdFoil}::decimal(10,2), ${p.priceUsdEtched}::decimal(10,2), ${p.priceEur}::decimal(10,2), ${p.priceEurFoil}::decimal(10,2), ${p.priceEurEtched}::decimal(10,2), - ${p.rarity}::"rarity", ${p.version})`, + ${p.rarity}::"rarity", ${p.lang}, ${p.printedName}, ${p.version})`, ); await prisma.$executeRaw` INSERT INTO printing (card_id, scryfall_id, set_code, set_name, collector_number, is_serialized, finishes, image_uri, back_image_uri, price_usd, price_usd_foil, price_usd_etched, price_eur, - price_eur_foil, price_eur_etched, rarity, version) + price_eur_foil, price_eur_etched, rarity, lang, printed_name, version) VALUES ${Prisma.join(rows)} ON CONFLICT (scryfall_id) DO UPDATE SET card_id = EXCLUDED.card_id, @@ -428,6 +514,8 @@ async function applyPrintingWrites( price_eur_foil = EXCLUDED.price_eur_foil, price_eur_etched = EXCLUDED.price_eur_etched, rarity = EXCLUDED.rarity, + lang = EXCLUDED.lang, + printed_name = EXCLUDED.printed_name, version = EXCLUDED.version WHERE printing.version IS DISTINCT FROM EXCLUDED.version `; @@ -476,7 +564,7 @@ async function upsertTokens( Prisma.sql`(${r.cardId}, ${r.tokenName}, ${r.tokenScryfallId})`, ); await prisma.$executeRaw` - INSERT INTO card_token (card_id, token_name, token_scryfall_id) + INSERT INTO card_tokens (card_id, token_name, token_scryfall_id) VALUES ${Prisma.join(rows)} ON CONFLICT (card_id, token_scryfall_id) DO UPDATE SET token_name = EXCLUDED.token_name From cfd202848ac75c3a2fdefbea7fc815aab19b7c55 Mon Sep 17 00:00:00 2001 From: Jarrod Servilla Date: Mon, 8 Jun 2026 20:36:57 -0400 Subject: [PATCH 2/2] fix(scryfall): make JP collector enrichment best-effort A search-API outage in the JP collector-printing enrichment step threw before the checkpoint commit, stranding the checkpoint and forcing the next cron to re-download the full ~500MB bulk. Make enrichment best-effort and harden its filtering and tests. Key changes: - ingest: wrap ingestCollectorPrintings in try/catch + logWarn so the checkpoint commits even when JP enrichment rejects - filter: extract language-agnostic isPaperPlayable guard and apply it in the JP search path to keep digital-only/token-layout printings out - steps: add inter-query courtesy delay mirroring the inter-page delay - tests: cover the printing UPDATE-path SQL (lang/printed_name + version guard) and the checkpoint-commits-on-JP-failure path; assert query count against JP_COLLECTOR_QUERIES.length - runbook: document the one-time printingsUpdated spike on first deploy --- docs/runbook/ingest.md | 21 +++++++++- lib/scryfall/filter.ts | 11 ++++- workflows/scryfall/__tests__/ingest.test.ts | 26 ++++++++++++ workflows/scryfall/__tests__/steps.test.ts | 45 ++++++++++++++++++++- workflows/scryfall/ingest.ts | 27 ++++++++----- workflows/scryfall/steps.ts | 14 ++++++- 6 files changed, 128 insertions(+), 16 deletions(-) diff --git a/docs/runbook/ingest.md b/docs/runbook/ingest.md index ac2414d..76d17c6 100644 --- a/docs/runbook/ingest.md +++ b/docs/runbook/ingest.md @@ -121,6 +121,25 @@ The chunk shape is owned by the workflow; check `workflows/scryfall/steps.ts` fo --- -## 6. Related runbooks +## 6. First run after the JP-collector-printings deploy (one-time `printingsUpdated` spike) + +The commit that added curated Japanese collector printings (`feat(scryfall): ingest curated +Japanese collector printings`) folded `lang` and `printedName` into the printing **version** +hash (`lib/scryfall/map.ts`). Every printing row stored before that deploy carries a hash that +predates those two fields, so the **first** post-deploy Scryfall bulk run sees every printing +as `toUpdate` and rewrites the entire `printing` table once. + +Expected on that first run only: + +- `printingsUpdated` ≈ the full printing count (not the usual near-zero delta). +- Elevated step duration and Postgres WAL volume for `upsertBatch` (one large rewrite). + +This is **bounded and expected** — not a regression. Subsequent runs return to the normal +near-zero `printingsUpdated`. If the spike repeats on a later run, that *is* anomalous: check +that the version hash is stable (no per-run nondeterminism in `hashObject(base)`). + +--- + +## 7. Related runbooks - `docs/ops/postgres-runbook.md` — Postgres pressure during/after ingest, autovacuum tuning on `card`/`printing`, and §10 specifically for "ingest hasn't run lately" diagnostics. diff --git a/lib/scryfall/filter.ts b/lib/scryfall/filter.ts index ec3a39e..7f41856 100644 --- a/lib/scryfall/filter.ts +++ b/lib/scryfall/filter.ts @@ -16,9 +16,16 @@ const DENIED_LAYOUTS = new Set([ "art_series", ]); -export function filterCard(card: ScryfallCard): boolean { - if (card.lang !== "en") return false; +// Language-agnostic playability guard: rejects non-deckable layouts and +// non-paper (digital-only) printings. Shared by the bulk path (via `filterCard`) +// and the JP collector-printing path, which carries `lang !== "en"` by design +// and so can't reuse `filterCard` directly. +export function isPaperPlayable(card: ScryfallCard): boolean { if (DENIED_LAYOUTS.has(card.layout)) return false; if (!card.games?.includes("paper")) return false; return true; } + +export function filterCard(card: ScryfallCard): boolean { + return card.lang === "en" && isPaperPlayable(card); +} diff --git a/workflows/scryfall/__tests__/ingest.test.ts b/workflows/scryfall/__tests__/ingest.test.ts index 3c44b5e..fbbe075 100644 --- a/workflows/scryfall/__tests__/ingest.test.ts +++ b/workflows/scryfall/__tests__/ingest.test.ts @@ -152,6 +152,32 @@ describe("scryfallIngestWorkflow", () => { ); }); + it("commits the checkpoint when JP enrichment rejects (best-effort, no strand)", async () => { + mockedFetch.mockResolvedValue({ + downloadUri: "https://d.example/file.json", + updatedAt: "2026-02-15T00:00:00Z", + }); + mockedGetCheckpoint.mockResolvedValue("2026-01-01T00:00:00Z"); + mockedDownload.mockResolvedValue({ totalBatches: 1, filterSkipped: 0 }); + mockedUpsert.mockResolvedValue(emptyBatchStats()); + // A search-API outage throws inside the enrichment step. It must NOT skip + // the checkpoint write, or the next cron re-downloads the full bulk. + mockedIngestJp.mockRejectedValue(new Error("scryfall search down")); + + const result = await scryfallIngestWorkflow(); + + expect(mockedCommit).toHaveBeenCalledWith( + SCRYFALL_SOURCE, + "2026-02-15T00:00:00Z", + ); + expect(mockedCleanup).toHaveBeenCalledWith("test-run-id", 1); + expect(mockedReleaseLock).toHaveBeenCalledWith( + SCRYFALL_SOURCE, + "test-run-id", + ); + expect(result).toMatchObject({ updatedAt: "2026-02-15T00:00:00Z" }); + }); + it("does not write checkpoint when a batch fails, but still cleans up staging", async () => { mockedFetch.mockResolvedValue({ downloadUri: "https://d.example/file.json", diff --git a/workflows/scryfall/__tests__/steps.test.ts b/workflows/scryfall/__tests__/steps.test.ts index cb944d9..b85d0de 100644 --- a/workflows/scryfall/__tests__/steps.test.ts +++ b/workflows/scryfall/__tests__/steps.test.ts @@ -3,6 +3,7 @@ import { revalidateTag } from "next/cache"; import { getWritable } from "workflow"; import { Prisma } from "@/lib/generated/prisma/client"; import { prisma } from "@/lib/db"; +import { JP_COLLECTOR_QUERIES } from "@/lib/scryfall/jp-collector-queries"; import type { ScryfallCard } from "@/lib/scryfall/types"; import { getBatchStorage } from "@/lib/staging"; import { @@ -1043,8 +1044,48 @@ describe("ingestCollectorPrintings", () => { ]), }), ); - // One fetch per query (no pagination), both queries issued. - expect(fetchSpy).toHaveBeenCalledTimes(2); + // One fetch per query (no pagination), every curated query issued. + expect(fetchSpy).toHaveBeenCalledTimes(JP_COLLECTOR_QUERIES.length); + fetchSpy.mockRestore(); + }); + + it("UPDATE path: stale stored version routes the printing to a $executeRaw upsert carrying lang/printed_name and a version guard", async () => { + const jp1 = makeCard({ + id: "jp-1", + name: "Counterspell", + lang: "ja", + printed_name: "対抗呪文", + set: "sta", + collector_number: "100", + }); + // Fresh Response per call: the body is single-use and every curated query + // issues its own fetch. + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockImplementation(async () => searchResponse([jp1])); + mockedPrisma.card.findMany.mockResolvedValue([ + { id: 1, name: "Counterspell" }, + ] as never); + // Stored row carries a stale version for the fetched scryfallId, so + // diffPrintings routes it to `toUpdate` rather than `toInsert`. + mockedPrisma.printing.findMany.mockResolvedValue([ + { scryfallId: "jp-1", version: "stale" }, + ] as never); + mockedPrisma.$executeRaw.mockResolvedValue(1 as never); + + const stats = await ingestCollectorPrintings(); + + expect(stats.printingsUpdated).toBe(1); + expect(mockedPrisma.printing.createMany).not.toHaveBeenCalled(); + expect(mockedPrisma.$executeRaw).toHaveBeenCalled(); + // Guard the UPDATE SQL: it must INSERT ... ON CONFLICT carrying the new + // lang/printed_name columns and skip no-op rewrites via the version guard. + const updateCall = mockedPrisma.$executeRaw.mock.calls.at(-1)!; + const sql = (updateCall[0] as readonly string[]).join(""); + expect(sql).toContain("INSERT INTO printing"); + expect(sql).toContain("lang"); + expect(sql).toContain("printed_name"); + expect(sql).toContain("WHERE printing.version IS DISTINCT FROM"); fetchSpy.mockRestore(); }); diff --git a/workflows/scryfall/ingest.ts b/workflows/scryfall/ingest.ts index 4439f1b..0b7b5f6 100644 --- a/workflows/scryfall/ingest.ts +++ b/workflows/scryfall/ingest.ts @@ -73,15 +73,24 @@ export async function scryfallIngestWorkflow() { } // Enrich with curated Japanese collector printings via the search API. - // Cards are guaranteed present (bulk upsert above completed); a failure - // here throws inside the try, so the finally still releases the lock and - // cleans staging. - const jpStats = await ingestCollectorPrintings(); - stats.printingsInserted += jpStats.printingsInserted; - stats.printingsUpdated += jpStats.printingsUpdated; - stats.printingsUnchanged += jpStats.printingsUnchanged; - stats.printingsFailed += jpStats.printingsFailed; - stats.skipped += jpStats.skipped; + // Cards are guaranteed present (bulk upsert above completed). This is a + // best-effort step: a search outage must not strand the checkpoint, or the + // next cron sees the manifest changed and re-downloads the full ~500MB bulk. + // Mirror `cleanupStaging`: log + continue so `commitScryfallCheckpoint` runs. + try { + const jpStats = await ingestCollectorPrintings(); + stats.printingsInserted += jpStats.printingsInserted; + stats.printingsUpdated += jpStats.printingsUpdated; + stats.printingsUnchanged += jpStats.printingsUnchanged; + stats.printingsFailed += jpStats.printingsFailed; + stats.skipped += jpStats.skipped; + } catch (err) { + logWarn( + { source: "scryfall.ingest", workflowRunId }, + "ingestCollectorPrintings failed; committing checkpoint without JP enrichment", + err, + ); + } // Single atomic step so cache-invalidate failures don't strand the // checkpoint ahead of a stale cache. See `commitScryfallCheckpoint`. diff --git a/workflows/scryfall/steps.ts b/workflows/scryfall/steps.ts index 2213456..9f5d53d 100644 --- a/workflows/scryfall/steps.ts +++ b/workflows/scryfall/steps.ts @@ -12,7 +12,7 @@ import { diffPrintings, } from "@/lib/scryfall/diff"; import { fetchWithRetry } from "@/lib/http"; -import { filterCard } from "@/lib/scryfall/filter"; +import { filterCard, isPaperPlayable } from "@/lib/scryfall/filter"; import { JP_COLLECTOR_QUERIES } from "@/lib/scryfall/jp-collector-queries"; import { type CardCreateData, @@ -291,7 +291,10 @@ async function fetchScryfallSearch( }; for (const raw of body.data ?? []) { const parsed = parseScryfallCard(raw); - if (parsed) out.push(parsed); + // JP cards carry `lang !== "en"`, so `filterCard` would drop them; apply + // the language-agnostic guard to keep digital-only / token-layout + // printings out of the paper `Printing` table. + if (parsed && isPaperPlayable(parsed)) out.push(parsed); } url = body.has_more ? body.next_page : undefined; } @@ -310,7 +313,14 @@ export async function ingestCollectorPrintings(): Promise { // Fetch every curated query, deduping by scryfallId across queries. const byScryfallId = new Map(); + let firstQuery = true; for (const query of JP_COLLECTOR_QUERIES) { + // Courtesy delay between queries, mirroring the inter-page delay in + // `fetchScryfallSearch`, to stay within Scryfall's rate guidance. + if (!firstQuery) { + await new Promise((r) => setTimeout(r, SCRYFALL_SEARCH_PAGE_DELAY_MS)); + } + firstQuery = false; const cards = await fetchScryfallSearch(query); for (const c of cards) { if (!byScryfallId.has(c.id)) byScryfallId.set(c.id, c);