From 08a3786049a45617df2b98c3b88ca1ba6e712ce1 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 16 Apr 2026 16:09:09 -0400
Subject: [PATCH] fix(vendor): harden firecrawl trust center crawling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(vendor): harden firecrawl trust center crawling

* refactor(vendor): export TRUSTED_PORTAL_DOMAINS and add host check helper

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* feat(vendor): add trust portal section-url discovery helper

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* feat(vendor): add certification merge helper with status priority

Pure mergeCertifications function dedupes by canonical slug and resolves
status via verified > expired > unknown > not_certified priority, preferring
core URL/dates on ties.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* feat(vendor): scaffold trust portal deep-scrape orchestrator with gate

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* feat(vendor): implement trust portal deep-scrape orchestrator

Clicks through SPA sidebar sections, concatenates markdown from each,
and extracts certifications via Claude Sonnet 4.6.

* fix(vendor): escape CSS selector values and cover concurrency bound

Add cssEscapeAttr helper to sanitize `\` and `"` inside CSS double-quoted
attribute values in buildSectionScrapeOptions, preventing silent selector
no-ops for anchor slugs containing CSS-reserved characters. Add two new
tests: one verifying the escaping (using `\` which survives URL normalization)
and one confirming mapWithConcurrency covers all items when section count (8)
exceeds SECTION_CONCURRENCY (5).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* feat(vendor): run trust portal deep-scrape after core agent

Resolves a source URL (trust center -> security page -> verified cert url),
runs deepScrapeTrustPortal, and merges certifications before returning.

* refactor(vendor): extract pickDeepScrapeSourceUrl and tighten extraction prompt

Move pickDeepScrapeSourceUrl into its own module with unit tests so
firecrawl-agent-core.ts drops below the 300-line limit. Also hoist the
Firecrawl Agent JSON schema into firecrawl-agent-schema-json.ts for the
same reason. Tighten the Sonnet 4.6 extraction prompt to explicitly
require evidence_snippet so Claude doesn't silently drop rows.

* feat(vendor): log Agent snapshot, deep-scrape decision, and persisted certs

Adds three diagnostic logs so a trigger.dev run tells the full story:

- "Firecrawl Agent returned — pre-deep-scrape snapshot" dumps the raw
  Agent links, normalized links, and cert types/statuses before the
  deep-scrape decision. Exposes what the LLM actually found.

- Deep-scrape branch logs either "source URL resolved" + merged types,
  "returned no certifications", or "skipped: no usable URL on vendor
  domain" with available links + verified certs — no more silent
  gate decisions.

- "Risk level and badges extracted" now includes the full compliance
  badge payload and the certifications array being persisted to the
  vendor record, so DB-write state is inspectable from logs.

* fix(vendor): json-stringify complex diagnostic log fields

Trigger.dev's OpenTelemetry attribute pipeline strips nested objects
and arrays — keeping only top-level scalars — so rich log payloads
like rawAgentLinks, normalizedLinks, and complianceBadges were being
silently discarded. Serialize them to JSON strings so they survive
the OTel export and surface in the dashboard / MCP span details.

* feat(vendor): rewrite Firecrawl Agent prompt — URL-discovery first

Prior prompt treated trust_center_url as just another field, so when the
Agent failed to extract certifications from a JavaScript SPA (e.g.
ui.com/trust-center) it abandoned the whole output — including the URL
the downstream deep-scrape needs.

New prompt reframes the mission:
- Primary goal: return trust_center_url even when page content is empty
  or SPA-only. Deep-scrape handles rendering; Agent just has to find.
- Explicit numbered URL paths to try when nav discovery fails, including
  third-party portals keyed off the vendor slug.
- Explicit instruction to return URLs of SPA-only pages rather than
  discarding them.
- Stricter output contract marking trust_center_url as REQUIRED when
  any trust/security/compliance surface exists on the vendor domain.
- Bumped maxCredits 2500 → 4000 to give the Agent headroom on sites
  that require multi-hop discovery.

Prompt extracted into firecrawl-agent-prompt.ts to keep core orchestrator
under the 300-line limit.

* chore(vendor): log raw firecrawl agent response for ui.com diagnosis

Adds temporary diagnostic logs capturing:
- agentResponse.success / status / error / keys (before schema parse)
- first 4KB of the raw agentResponse JSON
- first 4KB of parsed.data JSON, plus security_assessment and risk_level

The agent is returning links: null for ubiquiti even after the URL-first
prompt rewrite — need to see what it IS returning to understand whether
it's a fetch block, a model compliance issue, or a parse path we're
missing. Pushes the file to 315 lines; will roll back once diagnosed.

* fix(vendor): handle firecrawl agent processing status + extend timeouts

Discovered via new diagnostic log: the Firecrawl SDK's agent call was
returning status="processing" on ui.com because its internal poll timed
out (360s) before the agent job completed on Firecrawl's side. Our code
only guarded against status="failed", so it silently parsed the empty
response as success — leaving vendor records with no certifications
even when the agent could have found them given more time.

Changes:
- Guard on status !== "completed" instead of just "failed"; log clearly
  when SDK returns while job is still processing so timeouts are
  visible instead of silent.
- Bump agent SDK timeout 360s -> 1500s (25 min) so slow SPA trust
  centers like Ubiquiti have room to finish.
- Bump task maxDuration 10 min -> 30 min to accommodate the longer
  agent call plus deep-scrape + DB writes.

* fix(vendor): score agent payload candidates by populated fields

The firecrawl agent response has a nested shape:
  { success, status, data: { links, certifications, ... }, ... }

extractAgentPayloadCandidates returns [wrapper, wrapper.data] in that
order, and every field in vendorRiskAssessmentAgentSchema is optional.
The wrapper therefore parsed successfully as an empty object and won
the first-match .find() lookup — even though it contained no real
fields. The actual .data payload (with trust_center_url, security
page, privacy policy, etc.) was silently discarded.

Pick the candidate with the most populated schema fields instead of
the first success. This has been a latent bug on main — the ubiquiti
run on v20260415.12 showed the same "found 0 links, 0 certifications"
symptom.

* fix(vendor): remove invalid maxCredits from scrape calls

Firecrawl's v2 /scrape endpoint rejects maxCredits — that option
belongs to the Agent API, not scrape. We were passing it on both
the initial scrape and the per-section scrapes, and Firecrawl was
returning "Unrecognized key in body", causing the deep-scrape pass
to fail on its very first call.

Replace with `timeout` (2 min per scrape, within Firecrawl's 5-min
cap) which is the scrape v2 equivalent of "budget per call."

* chore(vendor): log raw initial scrape output for section discovery diag

Ubiquiti run finished with sectionCount=0 even though the initial
scrape returned 9891 chars of markdown. Need to see what
firecrawlClient.scrape actually returned in `links` to understand
whether the sidebar items are missing from the response or whether
discoverSectionUrls is wrongly filtering them out.

Logs the first 50 links and the first 2KB of markdown from the initial
scrape. Temporary diagnostic, will trim once the sidebar discovery
strategy is fixed.

* feat(vendor): llm-driven tab discovery for spa trust portals

Ubiquiti's trust center sidebar items are <button>/<div onClick>
elements with no href, so Firecrawl's `links` format returns 0 anchor
URLs for them. URL-based section discovery then had nothing to work
with and the deep-scrape only ever saw the landing tab.

Add a tab-discovery step: when URL-based discovery yields zero
sections, pass the initial markdown to Claude Sonnet 4.6 to identify
sidebar labels, then scrape each one with an executeJavascript
click-by-text action. The click script finds the matching element by
exact textContent, scrolls it into view, and clicks it. Works for any
SPA that has tab labels visible in the rendered markdown — not just
Ubiquiti.

Flow:
  1. Initial scrape -> markdown + links
  2. URL-based discovery (existing, unchanged)
  3. If urlSections.length === 0 and markdown non-empty,
     call identifySidebarTabs to get labels from the LLM
  4. Merge url-based + tab-label sections, dedupe by label, cap at 25
  5. Per-section scrape with click-by-text OR click-by-href
  6. Combine markdown, extract certs, merge

Files:
  new   trust-portal-deep-scrape-tabs.ts  (92 lines)
  edit  trust-portal-deep-scrape.ts       (+70 lines)
  edit  trust-portal-deep-scrape-sections.ts  (+tabLabel field)
  edit  trust-portal-deep-scrape.spec.ts  (1 new test, 3 updated)

* fix(vendor): apply same processing-status + timeout fixes to news agent

firecrawlResearchNews had the exact two bugs we already fixed for
firecrawlResearchCore:

1. Status guard was too loose (only `=== 'failed'`), so when the SDK
   returned `status: 'processing'` (Firecrawl still running the job
   after our SDK poll timed out) we silently proceeded to read
   agentResponse.data.news, got undefined, and logged "no news items."

2. Timeout was 360s while matching agent jobs for slow vendor sites
   routinely take 6+ minutes. Ubiquiti run hit 6m 1s and returned empty,
   matching the timeout boundary almost exactly.

Bump timeout 360s -> 1500s (matches core), guard on `!== 'completed'`,
and add the same diagnostic logs we added to core so future runs surface
the raw agent response + data shape when news comes back empty.

* refactor(vendor): extract payload + scrape-option helpers, trim verbose logs

Post-debugging cleanup. No behavior change.

Files split so both orchestrators drop back under the 300-line rule:
  - firecrawl-agent-payload.ts (58) — asRecord, extractAgentPayloadCandidates,
    countPopulatedAgentFields. Moved out of firecrawl-agent-core.ts so the
    payload-candidate logic can be shared and tested separately.
  - trust-portal-deep-scrape-scrape-options.ts (107) — cssEscapeAttr,
    buildClickByTextScript, buildInitialScrapeOptions, buildSectionScrapeOptions.
    Moved out of trust-portal-deep-scrape.ts so the scrape-option + click-by-text
    JS builders are isolated from the orchestration code.

Log trimming — drop the 4KB agent-response and 2KB markdown-head dumps from
happy-path logs. They were added for live diagnosis and landed big blobs in
every prod run. Keep scalar summary fields. Full raw-response JSON now only
logged on the exceptional "not completed" warning path where it is actually
useful, not on every successful run.

File line counts:
  firecrawl-agent-core.ts         315 -> 296
  trust-portal-deep-scrape.ts     383 -> 293
  firecrawl-agent-news.ts         172 -> 158

67/67 tests still pass.

---------

Co-authored-by: Mariano Fuentes <marfuen98@gmail.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../vendor/vendor-risk-assessment-task.ts     |  16 +-
 .../deep-scrape-source-url.spec.ts            | 147 ++++++
 .../deep-scrape-source-url.ts                 |  47 ++
 .../firecrawl-agent-core.ts                   | 313 +++++++-----
 .../firecrawl-agent-news.ts                   |  21 +-
 .../firecrawl-agent-payload.ts                |  58 +++
 .../firecrawl-agent-prompt.ts                 |  54 ++
 .../firecrawl-agent-schema-json.ts            |  95 ++++
 .../firecrawl-agent-shared.spec.ts            |  70 +++
 .../firecrawl-agent-shared.ts                 |  49 +-
 .../trust-portal-deep-scrape-merge.spec.ts    | 145 ++++++
 .../trust-portal-deep-scrape-merge.ts         |  86 ++++
 ...trust-portal-deep-scrape-scrape-options.ts | 107 ++++
 .../trust-portal-deep-scrape-sections.spec.ts | 145 ++++++
 .../trust-portal-deep-scrape-sections.ts      | 101 ++++
 .../trust-portal-deep-scrape-tabs.ts          |  92 ++++
 .../trust-portal-deep-scrape.spec.ts          | 471 ++++++++++++++++++
 .../trust-portal-deep-scrape.ts               | 293 +++++++++++
 .../vendor-risk-assessment/url-validation.ts  |  15 +-
 19 files changed, 2191 insertions(+), 134 deletions(-)
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/deep-scrape-source-url.spec.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/deep-scrape-source-url.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-payload.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-prompt.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-schema-json.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-shared.spec.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-merge.spec.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-merge.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-scrape-options.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-sections.spec.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-sections.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-tabs.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape.spec.ts
 create mode 100644 apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape.ts

diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment-task.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment-task.ts
index 78210a5138..23abf82887 100644
--- a/apps/api/src/trigger/vendor/vendor-risk-assessment-task.ts
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment-task.ts
@@ -476,7 +476,9 @@ export const vendorRiskAssessmentTask: Task<
     minTimeoutInMs: 1000,
     maxTimeoutInMs: 10000,
   },
-  maxDuration: 1000 * 60 * 10,
+  // 30 minutes total: Firecrawl Agent can take up to 25 min on slow SPA
+  // trust centers (Ubiquiti), and deep-scrape + DB writes need room too.
+  maxDuration: 1000 * 60 * 30,
   run: async (payload) => {
     await tags.add([`org:${payload.organizationId}`]);
 
@@ -1061,7 +1063,19 @@ export const vendorRiskAssessmentTask: Task<
           badgeCount: Array.isArray(complianceBadges)
             ? complianceBadges.length
             : 0,
+          complianceBadgesJson: JSON.stringify(complianceBadges ?? null),
           hasLogo: Boolean(logoUrl),
+          certificationsInAssessmentJson: JSON.stringify(
+            Array.isArray(
+              (coreData as { certifications?: unknown })?.certifications,
+            )
+              ? (
+                  coreData as {
+                    certifications?: Array<{ type: string; status: string }>;
+                  }
+                ).certifications
+              : [],
+          ),
         });
 
         // Update vendor with core data (keep status in_progress — news may still be loading)
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/deep-scrape-source-url.spec.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/deep-scrape-source-url.spec.ts
new file mode 100644
index 0000000000..c6dbc19ff6
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/deep-scrape-source-url.spec.ts
@@ -0,0 +1,147 @@
+import type { VendorRiskAssessmentCertification } from './agent-types';
+import { pickDeepScrapeSourceUrl } from './deep-scrape-source-url';
+
+const cert = (
+  overrides: Partial<VendorRiskAssessmentCertification> = {},
+): VendorRiskAssessmentCertification => ({
+  type: 'SOC 2 Type II',
+  status: 'verified',
+  issuedAt: null,
+  expiresAt: null,
+  url: null,
+  ...overrides,
+});
+
+describe('pickDeepScrapeSourceUrl', () => {
+  const vendorDomain = 'acme.com';
+
+  it("prefers 'Trust & Security' link over 'Security Overview'", () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [
+        { label: 'Security Overview', url: 'https://acme.com/security' },
+        { label: 'Trust & Security', url: 'https://acme.com/trust' },
+      ],
+      certifications: [],
+    });
+    expect(result).toBe('https://acme.com/trust');
+  });
+
+  it("falls back to 'Security Overview' when no 'Trust & Security' link", () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [{ label: 'Security Overview', url: 'https://acme.com/security' }],
+      certifications: [],
+    });
+    expect(result).toBe('https://acme.com/security');
+  });
+
+  it('falls back to a verified cert URL on the vendor domain when no labelled links match', () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [],
+      certifications: [
+        cert({ url: 'https://acme.com/reports/soc2.pdf', status: 'verified' }),
+      ],
+    });
+    expect(result).toBe('https://acme.com/reports/soc2.pdf');
+  });
+
+  it('skips subdomain-matching cert URL when status is not verified', () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [],
+      certifications: [
+        cert({ url: 'https://trust.acme.com/iso', status: 'unknown' }),
+      ],
+    });
+    expect(result).toBeNull();
+  });
+
+  it('accepts subdomain-matching cert URL (same registrable domain)', () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [],
+      certifications: [
+        cert({ url: 'https://trust.acme.com/iso', status: 'verified' }),
+      ],
+    });
+    expect(result).toBe('https://trust.acme.com/iso');
+  });
+
+  it('rejects off-domain labelled links', () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [
+        { label: 'Trust & Security', url: 'https://acme.trust.page' },
+      ],
+      certifications: [],
+    });
+    expect(result).toBeNull();
+  });
+
+  it('rejects off-domain verified cert URL', () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [],
+      certifications: [
+        cert({ url: 'https://acme.safebase.io/soc2', status: 'verified' }),
+      ],
+    });
+    expect(result).toBeNull();
+  });
+
+  it('rejects unparseable URLs', () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [{ label: 'Trust & Security', url: 'not a url' }],
+      certifications: [cert({ url: 'also not a url', status: 'verified' })],
+    });
+    expect(result).toBeNull();
+  });
+
+  it('returns null when everything is empty', () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [],
+      certifications: [],
+    });
+    expect(result).toBeNull();
+  });
+
+  it('returns first verified cert URL and ignores later verified certs', () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [],
+      certifications: [
+        cert({
+          type: 'SOC 2',
+          status: 'verified',
+          url: 'https://acme.com/first.pdf',
+        }),
+        cert({
+          type: 'ISO 27001',
+          status: 'verified',
+          url: 'https://acme.com/second.pdf',
+        }),
+      ],
+    });
+    expect(result).toBe('https://acme.com/first.pdf');
+  });
+
+  it('skips verified certs whose URL is null and continues to next cert', () => {
+    const result = pickDeepScrapeSourceUrl({
+      vendorDomain,
+      links: [],
+      certifications: [
+        cert({ type: 'SOC 2', status: 'verified', url: null }),
+        cert({
+          type: 'ISO 27001',
+          status: 'verified',
+          url: 'https://acme.com/iso.pdf',
+        }),
+      ],
+    });
+    expect(result).toBe('https://acme.com/iso.pdf');
+  });
+});
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/deep-scrape-source-url.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/deep-scrape-source-url.ts
new file mode 100644
index 0000000000..38fd670c81
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/deep-scrape-source-url.ts
@@ -0,0 +1,47 @@
+import type { VendorRiskAssessmentCertification } from './agent-types';
+
+/**
+ * Resolve the best "source URL" to feed into `deepScrapeTrustPortal`.
+ *
+ * Fallback order:
+ *   1. The Agent-returned link labelled "Trust & Security" if it's on the vendor's domain.
+ *   2. The Agent-returned link labelled "Security Overview" if it's on the vendor's domain.
+ *   3. The URL of any verified certification that's on the vendor's domain.
+ *
+ * Returns null if nothing qualifies. Off-domain URLs are rejected at every tier —
+ * `deepScrapeTrustPortal` applies an additional third-party-portal gate, but
+ * this helper is the first line of defense against scraping an unrelated host.
+ */
+export function pickDeepScrapeSourceUrl(args: {
+  vendorDomain: string;
+  links: Array<{ label: string; url: string }>;
+  certifications: VendorRiskAssessmentCertification[];
+}): string | null {
+  const { vendorDomain, links, certifications } = args;
+
+  const isOnVendorDomain = (url: string): boolean => {
+    try {
+      const host = new URL(url).hostname.toLowerCase();
+      return host === vendorDomain || host.endsWith(`.${vendorDomain}`);
+    } catch {
+      return false;
+    }
+  };
+
+  const byLabel = (label: string) =>
+    links.find((l) => l.label === label && isOnVendorDomain(l.url))?.url ??
+    null;
+
+  const trustUrl = byLabel('Trust & Security');
+  if (trustUrl) return trustUrl;
+
+  const securityUrl = byLabel('Security Overview');
+  if (securityUrl) return securityUrl;
+
+  for (const cert of certifications) {
+    if (cert.status !== 'verified') continue;
+    if (cert.url && isOnVendorDomain(cert.url)) return cert.url;
+  }
+
+  return null;
+}
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-core.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-core.ts
index cb92637fe3..93683039e3 100644
--- a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-core.ts
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-core.ts
@@ -1,7 +1,10 @@
 // apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-core.ts
 import { logger } from '@trigger.dev/sdk';
 import { vendorRiskAssessmentAgentSchema } from './agent-schema';
-import type { VendorRiskAssessmentDataV1 } from './agent-types';
+import type {
+  VendorRiskAssessmentCertification,
+  VendorRiskAssessmentDataV1,
+} from './agent-types';
 import { validateVendorUrl } from './url-validation';
 import {
   type FirecrawlSetup,
@@ -9,6 +12,16 @@ import {
   normalizeIso,
   setupFirecrawlClient,
 } from './firecrawl-agent-shared';
+import { deepScrapeTrustPortal } from './trust-portal-deep-scrape';
+import { mergeCertifications } from './trust-portal-deep-scrape-merge';
+import { pickDeepScrapeSourceUrl } from './deep-scrape-source-url';
+import { firecrawlAgentJsonSchema } from './firecrawl-agent-schema-json';
+import { buildFirecrawlAgentPrompt } from './firecrawl-agent-prompt';
+import {
+  asRecord,
+  countPopulatedAgentFields,
+  extractAgentPayloadCandidates,
+} from './firecrawl-agent-payload';
 
 export async function firecrawlResearchCore(params: {
   vendorName: string;
@@ -20,125 +33,32 @@ export async function firecrawlResearchCore(params: {
   const { firecrawlClient, vendorDomain, seedUrls } = setup;
   const { vendorName, vendorWebsite } = params;
 
-  const prompt = `Complete cyber security research on the vendor "${vendorName}" with website ${vendorWebsite}.
-
-Extract the following information:
-
-1. **Certifications**: Find all security and compliance certifications. For each one found, determine:
-   - The type of certification (SOC 2 Type I, SOC 2 Type II, ISO 27001, ISO 27017, ISO 27018, ISO 27701, ISO 42001, FedRAMP, HIPAA, PCI DSS, GDPR, TISAX, CSA STAR, C5, SOC 1, SOC 3, etc.)
-   - Whether it's currently active/verified, expired, or not certified
-   - Any issue or expiry dates mentioned
-   - Direct link to the certification report or trust page
-
-2. **Security & Legal Links**: Find the direct URLs to these pages. IMPORTANT: Many vendors host their trust portal on a third-party platform (e.g., SafeBase at trust.page, Vanta, Drata, Whistic). Prefer the actual trust portal where customers can request security reports over documentation pages that just describe compliance processes.
-   - **Trust Center / Security Portal**: The page where customers can review security posture and request compliance reports. This is NOT the docs page about security — it's the dedicated trust portal. Look for links labeled "Trust Center", "Security", "Trust Portal" in the site navigation or footer. It may be hosted on a subdomain (trust.${vendorDomain}, security.${vendorDomain}) or a third-party domain (e.g., ${vendorName.toLowerCase()}.trust.page, ${vendorName.toLowerCase()}.safebase.io). TIP: Try searching "${vendorName} trust portal" or "${vendorName} security trust center" to find it if not immediately visible on the site.
-   - **Privacy Policy**: Usually at /privacy or /privacy-policy
-   - **Terms of Service**: Usually at /terms or /tos
-   - **Security Overview**: A page describing security practices (this CAN be a docs page)
-   - **SOC 2 Report**: Direct link to request or download the SOC 2 report
-
-3. **Summary**: Provide an overall assessment of the vendor's security posture based on your findings.
-
-Focus on the official website ${vendorWebsite} and its trust/security/compliance pages.`;
+  const prompt = buildFirecrawlAgentPrompt({
+    vendorName,
+    vendorWebsite,
+    vendorDomain,
+  });
 
-  let agentResponse;
-  try {
-    agentResponse = await firecrawlClient.agent({
+  const runCoreAgent = async (urls: string[]) =>
+    firecrawlClient.agent({
       prompt,
-      urls: seedUrls,
+      urls,
       strictConstrainToURLs: false,
-      maxCredits: 2500,
-      timeout: 360,
+      maxCredits: 4000,
+      // SDK polls this long before returning whatever status it has. 360s
+      // wasn't enough for slow SPA trust centers (Ubiquiti) — SDK returned
+      // "processing" and we silently parsed empty data. 25 min gives the
+      // agent plenty of room; the new status check also ensures we surface
+      // timeouts instead of pretending success.
+      timeout: 1500,
       pollInterval: 5,
       ...({ model: 'spark-1-pro' } as Record<string, unknown>), // SDK types lag behind API — model is supported but not typed yet
-      schema: {
-        type: 'object',
-        properties: {
-          risk_level: {
-            type: 'string',
-            description:
-              'Overall vendor risk level: critical, high, medium, low, or very_low',
-          },
-          security_assessment: {
-            type: 'string',
-            description:
-              'A detailed paragraph summarizing the vendor security posture, including strengths, weaknesses, and key findings',
-          },
-          last_researched_at: {
-            type: 'string',
-            description: 'ISO 8601 date of when this research was conducted',
-          },
-          certifications: {
-            type: 'array',
-            description:
-              'All security and compliance certifications found on the vendor website',
-            items: {
-              type: 'object',
-              properties: {
-                type: {
-                  type: 'string',
-                  description:
-                    'Certification name, e.g. SOC 2 Type II, ISO 27001, FedRAMP, HIPAA, PCI DSS, GDPR, ISO 42001, ISO 27017, ISO 27018, TISAX, CSA STAR, C5, etc.',
-                },
-                status: {
-                  type: 'string',
-                  enum: ['verified', 'expired', 'not_certified', 'unknown'],
-                  description:
-                    'Whether the certification is currently active/verified, expired, not certified, or unknown',
-                },
-                issued_at: {
-                  type: 'string',
-                  description:
-                    'ISO 8601 date when the certification was issued, if mentioned',
-                },
-                expires_at: {
-                  type: 'string',
-                  description:
-                    'ISO 8601 date when the certification expires, if mentioned',
-                },
-                url: {
-                  type: 'string',
-                  description:
-                    'Direct URL to the certification report or trust page on the vendor domain',
-                },
-              },
-              required: ['type'],
-            },
-          },
-          links: {
-            type: 'object',
-            description:
-              'Direct URLs to key legal and security pages on the vendor domain',
-            properties: {
-              privacy_policy_url: {
-                type: 'string',
-                description: 'Direct URL to the privacy policy page',
-              },
-              terms_of_service_url: {
-                type: 'string',
-                description: 'Direct URL to the terms of service page',
-              },
-              trust_center_url: {
-                type: 'string',
-                description:
-                  'Direct URL to the trust portal where customers can review security posture and request reports. Prefer the dedicated trust portal (often on trust.page, safebase.io, vanta.com, or a trust. subdomain) over documentation pages.',
-              },
-              security_page_url: {
-                type: 'string',
-                description:
-                  'Direct URL to the security overview or security practices page',
-              },
-              soc2_report_url: {
-                type: 'string',
-                description:
-                  'Direct URL to request or download the SOC 2 report',
-              },
-            },
-          },
-        },
-        required: ['security_assessment'],
-      },
+      schema: firecrawlAgentJsonSchema,
     });
+
+  let agentResponse;
+  try {
+    agentResponse = await runCoreAgent(seedUrls);
   } catch (error) {
     return handleFirecrawlError(error, {
       vendorName,
@@ -147,23 +67,98 @@ Focus on the official website ${vendorWebsite} and its trust/security/compliance
     });
   }
 
-  if (!agentResponse.success || agentResponse.status === 'failed') {
+  const responseErrorMessage =
+    typeof agentResponse.error === 'string'
+      ? agentResponse.error
+      : String(agentResponse.error ?? '');
+  const shouldRetryFetchFailed =
+    agentResponse.status === 'failed' &&
+    /fetch failed/i.test(responseErrorMessage);
+
+  if (shouldRetryFetchFailed) {
+    const retryUrls = Array.from(
+      new Set([
+        ...seedUrls,
+        `https://${vendorDomain}`,
+        `https://${vendorDomain}/trust-center`,
+        `https://${vendorDomain}/trust-center#cloud-security`,
+        `https://www.${vendorDomain}`,
+        `https://www.${vendorDomain}/trust-center`,
+        `https://www.${vendorDomain}/trust-center#cloud-security`,
+      ]),
+    );
+
+    logger.warn('Firecrawl core research fetch failed; retrying once', {
+      vendorWebsite,
+      originalStatus: agentResponse.status,
+      originalError: responseErrorMessage,
+      retryUrlCount: retryUrls.length,
+    });
+
+    try {
+      agentResponse = await runCoreAgent(retryUrls);
+    } catch (error) {
+      return handleFirecrawlError(error, {
+        vendorName,
+        vendorWebsite,
+        callType: 'core_retry',
+      });
+    }
+  }
+
+  if (!agentResponse.success || agentResponse.status !== 'completed') {
+    const isProcessing = agentResponse.status === 'processing';
     logger.warn('Firecrawl core research job did not complete successfully', {
       vendorWebsite,
       status: agentResponse.status,
+      success: agentResponse.success,
       error: agentResponse.error,
+      // Full raw response only on the exceptional path — on happy path
+      // the parsed data is already surfaced by the snapshot log below.
+      agentResponseJson: JSON.stringify(agentResponse).slice(0, 4000),
+      note: isProcessing
+        ? 'SDK returned while the agent job is still running on Firecrawl. Bump timeout, or poll with getAgentStatus.'
+        : undefined,
     });
     return null;
   }
 
-  const parsed = vendorRiskAssessmentAgentSchema.safeParse(agentResponse.data);
-  if (!parsed.success) {
+  const payloadCandidates = extractAgentPayloadCandidates(agentResponse);
+  const parseAttempts = payloadCandidates.map((candidate) => ({
+    candidate,
+    result: vendorRiskAssessmentAgentSchema.safeParse(candidate),
+  }));
+  // Pick the candidate that parsed successfully AND populated the most
+  // fields. Every schema field is optional, so the outer wrapper parses
+  // as {} and would otherwise win over the nested `.data` payload — which
+  // is exactly what was dropping real agent output on the floor.
+  const successfulAttempts = parseAttempts.filter((a) => a.result.success);
+  const parsedAttempt = successfulAttempts.reduce<
+    (typeof successfulAttempts)[number] | null
+  >((best, curr) => {
+    if (!curr.result.success) return best;
+    if (!best || !best.result.success) return curr;
+    return countPopulatedAgentFields(curr.result.data) >
+      countPopulatedAgentFields(best.result.data)
+      ? curr
+      : best;
+  }, null);
+
+  if (!parsedAttempt || !parsedAttempt.result.success) {
+    const responseRecord = asRecord(agentResponse);
+    const firstAttempt = parseAttempts[0]?.result;
+    const primaryIssues =
+      firstAttempt && !firstAttempt.success ? firstAttempt.error.issues : [];
+
     logger.warn('Firecrawl core research returned invalid data shape', {
       vendorWebsite,
-      issues: parsed.error.issues,
+      issues: primaryIssues,
+      payloadCandidateCount: payloadCandidates.length,
+      responseKeys: responseRecord ? Object.keys(responseRecord) : [],
     });
     return null;
   }
+  const parsed = parsedAttempt.result;
 
   const links = parsed.data.links ?? null;
   const linkPairs: Array<{ label: string; url: string }> = [];
@@ -200,11 +195,88 @@ Focus on the official website ${vendorWebsite} and its trust/security/compliance
       url: validateVendorUrl(c.url ?? null, vendorDomain, `cert:${c.type}`),
     })) ?? [];
 
+  logger.info('Firecrawl Agent returned — pre-deep-scrape snapshot', {
+    vendorWebsite,
+    normalizedLinksJson: JSON.stringify(normalizedLinks),
+    agentCertificationsJson: JSON.stringify(
+      certifications.map((c) => ({
+        type: c.type,
+        status: c.status,
+      })),
+    ),
+    verifiedAgentCertCount: certifications.filter(
+      (c) => c.status === 'verified',
+    ).length,
+    agentRiskLevel: parsed.data.risk_level ?? null,
+  });
+
+  const deepScrapeSourceUrl = pickDeepScrapeSourceUrl({
+    vendorDomain,
+    links: normalizedLinks,
+    certifications,
+  });
+
+  let mergedCertifications: VendorRiskAssessmentCertification[] =
+    certifications;
+  if (deepScrapeSourceUrl) {
+    logger.info('Trust portal deep-scrape: source URL resolved', {
+      vendorWebsite,
+      vendorDomain,
+      sourceUrl: deepScrapeSourceUrl,
+    });
+    const deepCerts = await deepScrapeTrustPortal({
+      vendorName,
+      vendorDomain,
+      sourceUrl: deepScrapeSourceUrl,
+      firecrawlClient,
+    });
+    if (deepCerts && deepCerts.length > 0) {
+      mergedCertifications = mergeCertifications(certifications, deepCerts);
+      logger.info('Trust portal deep-scrape merged into core certifications', {
+        vendorWebsite,
+        coreCount: certifications.length,
+        deepCount: deepCerts.length,
+        mergedCount: mergedCertifications.length,
+        mergedTypesJson: JSON.stringify(
+          mergedCertifications.map((c) => ({
+            type: c.type,
+            status: c.status,
+          })),
+        ),
+      });
+    } else {
+      logger.info(
+        'Trust portal deep-scrape returned no certifications — keeping Agent result',
+        {
+          vendorWebsite,
+          deepReturnedNull: deepCerts === null,
+          deepReturnedEmpty: Array.isArray(deepCerts) && deepCerts.length === 0,
+        },
+      );
+    }
+  } else {
+    logger.info(
+      'Trust portal deep-scrape skipped: pickDeepScrapeSourceUrl found no usable URL on vendor domain',
+      {
+        vendorWebsite,
+        vendorDomain,
+        availableLinksJson: JSON.stringify(
+          normalizedLinks.map((l) => ({ label: l.label, url: l.url })),
+        ),
+        verifiedCertsWithUrlsJson: JSON.stringify(
+          certifications
+            .filter((c) => c.status === 'verified' && c.url)
+            .map((c) => ({ type: c.type, url: c.url })),
+        ),
+      },
+    );
+  }
+
   logger.info('Firecrawl core research completed', {
     vendorWebsite,
     found: {
       links: normalizedLinks.length,
-      certifications: certifications.length,
+      certifications: mergedCertifications.length,
     },
   });
 
@@ -217,7 +289,8 @@ Focus on the official website ${vendorWebsite} and its trust/security/compliance
       new Date().toISOString(),
     riskLevel: parsed.data.risk_level ?? null,
     securityAssessment: parsed.data.security_assessment ?? null,
-    certifications: certifications.length > 0 ? certifications : null,
+    certifications:
+      mergedCertifications.length > 0 ? mergedCertifications : null,
     links: normalizedLinks.length > 0 ? normalizedLinks : null,
   };
 }
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-news.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-news.ts
index 56b7154dcc..3fef48adb6 100644
--- a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-news.ts
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-news.ts
@@ -81,7 +81,11 @@ Search the company's blog, newsroom, press releases, and reputable tech news sou
       urls: [origin, `${origin}/blog`, `${origin}/newsroom`, `${origin}/press`],
       strictConstrainToURLs: false,
       maxCredits: 2500,
-      timeout: 360,
+      // SDK polls this long before returning whatever status it has.
+      // Matches core agent timeout (25 min) — news agent was hitting 360s
+      // for slow vendor sites and silently returning processing state as
+      // "no news items."
+      timeout: 1500,
       pollInterval: 5,
       ...({ model: 'spark-1-pro' } as Record<string, unknown>),
       schema: newsResponseSchema,
@@ -94,11 +98,18 @@ Search the company's blog, newsroom, press releases, and reputable tech news sou
     });
   }
 
-  if (!agentResponse.success || agentResponse.status === 'failed') {
+  if (!agentResponse.success || agentResponse.status !== 'completed') {
+    const isProcessing = agentResponse.status === 'processing';
     logger.warn('Firecrawl news research job did not complete successfully', {
       vendorWebsite,
       status: agentResponse.status,
+      success: agentResponse.success,
       error: agentResponse.error,
+      // Full raw response only on the exceptional path.
+      agentResponseJson: JSON.stringify(agentResponse).slice(0, 4000),
+      note: isProcessing
+        ? 'SDK returned while the news agent job is still running on Firecrawl. Bump timeout, or poll with getAgentStatus.'
+        : undefined,
     });
     return null;
   }
@@ -110,6 +121,12 @@ Search the company's blog, newsroom, press releases, and reputable tech news sou
   if (!Array.isArray(rawNews) || rawNews.length === 0) {
     logger.info('Firecrawl news research returned no news items', {
       vendorWebsite,
+      agentDataKeys: data ? Object.keys(data) : [],
+      rawNewsType: Array.isArray(rawNews)
+        ? 'empty-array'
+        : rawNews === undefined
+          ? 'undefined'
+          : typeof rawNews,
     });
     return null;
   }
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-payload.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-payload.ts
new file mode 100644
index 0000000000..0baf0ed694
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-payload.ts
@@ -0,0 +1,58 @@
+/**
+ * Helpers for extracting the actual structured payload out of a
+ * Firecrawl Agent response. The SDK wraps data under `.data`, but across
+ * versions it has shown up under `.output`, `.result`, or `.response` too.
+ *
+ * Because every field in `vendorRiskAssessmentAgentSchema` is optional,
+ * parsing the outer wrapper object against the schema succeeds as an
+ * empty `{}` — which would silently beat the populated inner `.data`
+ * payload under a `.find(ok)` lookup. Callers must score candidates by
+ * populated-field count and pick the best, not the first.
+ */
+
+export function asRecord(value: unknown): Record<string, unknown> | null {
+  return value && typeof value === 'object'
+    ? (value as Record<string, unknown>)
+    : null;
+}
+
+export function extractAgentPayloadCandidates(
+  agentResponse: unknown,
+): unknown[] {
+  const candidates: unknown[] = [];
+  const seen = new Set<unknown>();
+
+  const visit = (value: unknown) => {
+    if (value === undefined || seen.has(value)) return;
+    seen.add(value);
+    candidates.push(value);
+
+    const record = asRecord(value);
+    if (!record) return;
+
+    for (const key of ['data', 'output', 'result', 'response']) {
+      visit(record[key]);
+    }
+  };
+
+  visit(agentResponse);
+  return candidates;
+}
+
+/** Count fields on a parsed object that are present and non-trivially empty. */
+export function countPopulatedAgentFields(parsed: unknown): number {
+  if (!parsed || typeof parsed !== 'object') return 0;
+  let count = 0;
+  for (const value of Object.values(parsed as Record<string, unknown>)) {
+    if (value === null || value === undefined) continue;
+    if (Array.isArray(value) && value.length === 0) continue;
+    if (
+      typeof value === 'object' &&
+      !Array.isArray(value) &&
+      Object.keys(value as Record<string, unknown>).length === 0
+    )
+      continue;
+    count += 1;
+  }
+  return count;
+}
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-prompt.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-prompt.ts
new file mode 100644
index 0000000000..4cd4059b50
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-prompt.ts
@@ -0,0 +1,54 @@
+/**
+ * Builds the Firecrawl Agent prompt for core vendor risk research.
+ *
+ * Design intent: URL discovery is the primary goal, not certification
+ * extraction. The Agent often encounters JavaScript-only trust portals
+ * (e.g. Ubiquiti) whose markdown is empty until a browser executes it —
+ * if that happens, the Agent should still return the URL so the
+ * downstream `deepScrapeTrustPortal` orchestrator can handle SPA
+ * rendering via scrape actions.
+ */
+export function buildFirecrawlAgentPrompt(params: {
+  vendorName: string;
+  vendorWebsite: string;
+  vendorDomain: string;
+}): string {
+  const { vendorName, vendorWebsite, vendorDomain } = params;
+  const vendorSlug = vendorName.toLowerCase().replace(/[^a-z0-9]+/g, '');
+
+  return `You are researching the security posture of "${vendorName}" (${vendorWebsite}).
+
+# Primary goal
+Return a trust_center_url whenever the vendor has ANY trust, security, or compliance page — even if you cannot extract certification details from it. A downstream system will deep-scrape the URL you return. Your job is to FIND the URL reliably; extracting certifications yourself is a bonus, not a requirement.
+
+# Search method
+
+1. Start at ${vendorWebsite}. Scan the top-nav, footer, and any "Security", "Trust", "Legal", "Compliance", "Resources", or "About" menus.
+
+2. If nothing is surfaced in the nav, DIRECTLY visit these common paths on ${vendorDomain} and confirm they exist:
+   - /trust-center  /trust  /security  /compliance
+   - /security-and-compliance  /trust/overview  /about/security
+   - Also check subdomains: trust.${vendorDomain}, security.${vendorDomain}
+   - Also check third-party portals: ${vendorSlug}.trust.page (SafeBase), ${vendorSlug}.safebase.io, ${vendorSlug}.vanta.com, ${vendorSlug}.drata.com
+
+3. Some vendor trust centers are JavaScript SPAs that render empty HTML without browser execution. If a trust page loads but the markdown looks thin or only contains navigation chrome (no security content at all), that's a SPA — STILL return its URL as trust_center_url. Do not discard it because you can't see the content.
+
+4. Many trust pages hide certifications behind tabs or sidebar sections (e.g. /trust-center#cloud-security on Ubiquiti, /trust-center/compliance). Visit as many sub-sections as you can; return any certifications you can extract from them.
+
+# Extraction rules for certifications
+
+Only return a certification when the page explicitly names a framework as current: SOC 2 Type I/II, ISO 27001/27017/27018/27701, ISO 42001, ISO 9001, FedRAMP, HIPAA, PCI DSS, GDPR, TISAX, CSA STAR, C5, NEN 7510. For each:
+- status: "verified" when the page lists the framework as current (includes badge images, "we are certified", "compliant with X"). "expired" only if the page explicitly says so. "not_certified" only if the page explicitly says the vendor is NOT certified. "unknown" otherwise.
+- Never invent a cert that isn't on the page. Never default to "not_certified".
+- Include issued_at / expires_at dates only when printed on the page.
+
+# Output contract (strict)
+
+- links.trust_center_url — REQUIRED whenever any of these exist on the vendor's domain or a recognised third-party portal: a /trust*, /security*, /compliance* page; a trust. or security. subdomain; or a third-party trust portal. Return the best landing URL. Leave empty ONLY when you have confirmed no such page exists anywhere.
+- links.privacy_policy_url, links.terms_of_service_url, links.security_page_url, links.soc2_report_url — return only when confirmed; otherwise empty.
+- certifications — may be an empty array. Do NOT pad it.
+- security_assessment — one paragraph summarising what you observed. If the trust portal was SPA-only and you could not read content, say so explicitly ("Trust portal at <url> appears to be a JavaScript SPA; deep-scrape will extract content").
+- risk_level — your best estimate among critical/high/medium/low/very_low based on what you found.
+
+Focus on ${vendorWebsite} and its trust/security/compliance paths. Only cite URLs on ${vendorDomain}, its subdomains, or a recognised third-party portal hosting this vendor's trust page.`;
+}
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-schema-json.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-schema-json.ts
new file mode 100644
index 0000000000..310ee19892
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-schema-json.ts
@@ -0,0 +1,95 @@
+/**
+ * JSON Schema passed to the Firecrawl Agent `agent()` call for core vendor
+ * research. Kept as a separate module so `firecrawl-agent-core.ts` stays
+ * under the 300-line project limit.
+ *
+ * This is the Firecrawl-side schema used to shape the LLM output; runtime
+ * validation of the parsed response happens in `agent-schema.ts`
+ * (vendorRiskAssessmentAgentSchema) via Zod.
+ */
+export const firecrawlAgentJsonSchema = {
+  type: 'object',
+  properties: {
+    risk_level: {
+      type: 'string',
+      description:
+        'Overall vendor risk level: critical, high, medium, low, or very_low',
+    },
+    security_assessment: {
+      type: 'string',
+      description:
+        'A detailed paragraph summarizing the vendor security posture, including strengths, weaknesses, and key findings',
+    },
+    last_researched_at: {
+      type: 'string',
+      description: 'ISO 8601 date of when this research was conducted',
+    },
+    certifications: {
+      type: 'array',
+      description:
+        'All security and compliance certifications found on the vendor website',
+      items: {
+        type: 'object',
+        properties: {
+          type: {
+            type: 'string',
+            description:
+              'Certification name, e.g. SOC 2 Type II, ISO 27001, FedRAMP, HIPAA, PCI DSS, GDPR, ISO 42001, ISO 27017, ISO 27018, TISAX, CSA STAR, C5, etc.',
+          },
+          status: {
+            type: 'string',
+            enum: ['verified', 'expired', 'not_certified', 'unknown'],
+            description:
+              'Whether the certification is currently active/verified, expired, not certified, or unknown',
+          },
+          issued_at: {
+            type: 'string',
+            description:
+              'ISO 8601 date when the certification was issued, if mentioned',
+          },
+          expires_at: {
+            type: 'string',
+            description:
+              'ISO 8601 date when the certification expires, if mentioned',
+          },
+          url: {
+            type: 'string',
+            description:
+              'Direct URL to the certification report or trust page on the vendor domain',
+          },
+        },
+        required: ['type'],
+      },
+    },
+    links: {
+      type: 'object',
+      description:
+        'Direct URLs to key legal and security pages on the vendor domain',
+      properties: {
+        privacy_policy_url: {
+          type: 'string',
+          description: 'Direct URL to the privacy policy page',
+        },
+        terms_of_service_url: {
+          type: 'string',
+          description: 'Direct URL to the terms of service page',
+        },
+        trust_center_url: {
+          type: 'string',
+          description:
+            'Direct URL to the trust portal where customers can review security posture and request reports. Prefer the dedicated trust portal (often on trust.page, safebase.io, vanta.com, or a trust. subdomain) over documentation pages.',
+        },
+        security_page_url: {
+          type: 'string',
+          description:
+            'Direct URL to the security overview or security practices page',
+        },
+        soc2_report_url: {
+          type: 'string',
+          description: 'Direct URL to request or download the SOC 2 report',
+        },
+      },
+    },
+  },
+  required: ['security_assessment'],
+} as const;
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-shared.spec.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-shared.spec.ts
new file mode 100644
index 0000000000..377dbda0df
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-shared.spec.ts
@@ -0,0 +1,70 @@
+import { setupFirecrawlClient } from './firecrawl-agent-shared';
+
+jest.mock('@trigger.dev/sdk', () => ({
+  logger: { warn: jest.fn(), info: jest.fn(), debug: jest.fn() },
+}));
+
+jest.mock('@mendable/firecrawl-js', () =>
+  jest.fn().mockImplementation(() => ({})),
+);
+
+describe('setupFirecrawlClient', () => {
+  const originalApiKey = process.env.FIRECRAWL_API_KEY;
+
+  beforeEach(() => {
+    process.env.FIRECRAWL_API_KEY = 'test-key';
+  });
+
+  afterEach(() => {
+    if (originalApiKey === undefined) {
+      delete process.env.FIRECRAWL_API_KEY;
+    } else {
+      process.env.FIRECRAWL_API_KEY = originalApiKey;
+    }
+  });
+
+  it('includes trust-center and compliance seed URLs for stronger portal discovery', () => {
+    const setup = setupFirecrawlClient({
+      vendorName: 'Ubiquiti',
+      vendorWebsite: 'https://www.ui.com',
+    });
+
+    expect(setup).not.toBeNull();
+    expect(setup?.seedUrls).toEqual(
+      expect.arrayContaining([
+        'https://www.ui.com',
+        'https://www.ui.com/trust',
+        'https://www.ui.com/trust-center',
+        'https://www.ui.com/trust-center#cloud-security',
+        'https://www.ui.com/trust-center#corporate-security',
+        'https://www.ui.com/trust-center#ndaa-compliance',
+        'https://www.ui.com/security',
+        'https://www.ui.com/security/trust-center',
+        'https://www.ui.com/security/compliance',
+        'https://www.ui.com/security-and-compliance',
+        'https://www.ui.com/compliance',
+      ]),
+    );
+
+    // Keep seeds deduplicated to avoid wasting crawl credits.
+    expect(new Set(setup?.seedUrls).size).toBe(setup?.seedUrls.length);
+  });
+
+  it('adds www fallback seeds when vendor website is an apex domain', () => {
+    const setup = setupFirecrawlClient({
+      vendorName: 'Ubiquiti',
+      vendorWebsite: 'https://ui.com',
+    });
+
+    expect(setup).not.toBeNull();
+    expect(setup?.seedUrls).toEqual(
+      expect.arrayContaining([
+        'https://ui.com',
+        'https://ui.com/trust-center#cloud-security',
+        'https://www.ui.com',
+        'https://www.ui.com/trust-center',
+        'https://www.ui.com/trust-center#cloud-security',
+      ]),
+    );
+  });
+});
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-shared.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-shared.ts
index 347df01eaa..c3b9727aa4 100644
--- a/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-shared.ts
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/firecrawl-agent-shared.ts
@@ -69,18 +69,47 @@ export function setupFirecrawlClient(params: {
 
   const firecrawlClient = new Firecrawl({ apiKey });
 
-  const seedUrls = [
-    origin,
-    `${origin}/privacy`,
-    `${origin}/privacy-policy`,
-    `${origin}/terms`,
-    `${origin}/terms-of-service`,
-    `${origin}/security`,
-    `${origin}/trust`,
-    `${origin}/legal`,
-    `${origin}/compliance`,
+  const origins = new Set<string>([origin]);
+  try {
+    const originUrl = new URL(origin);
+    const host = originUrl.hostname.toLowerCase();
+    // Firecrawl can occasionally fail on apex hosts even when the canonical
+    // site is served from www.<domain>. Include a safe fallback origin.
+    if (host === vendorDomain) {
+      origins.add(`${originUrl.protocol}//www.${vendorDomain}`);
+    }
+  } catch {
+    // Keep existing origin-only behavior if URL parsing unexpectedly fails.
+  }
+
+  const seedUrlsFromOrigin = (baseOrigin: string): string[] => [
+    baseOrigin,
+    `${baseOrigin}/trust`,
+    `${baseOrigin}/trust-center`,
+    `${baseOrigin}/trust-center#cloud-security`,
+    `${baseOrigin}/trust-center#corporate-security`,
+    `${baseOrigin}/trust-center#ndaa-compliance`,
+    `${baseOrigin}/security`,
+    `${baseOrigin}/security/trust-center`,
+    `${baseOrigin}/security/compliance`,
+    `${baseOrigin}/security-and-compliance`,
+    `${baseOrigin}/compliance`,
+    `${baseOrigin}/compliance/security`,
+    `${baseOrigin}/privacy`,
+    `${baseOrigin}/privacy-policy`,
+    `${baseOrigin}/terms`,
+    `${baseOrigin}/terms-of-service`,
+    `${baseOrigin}/legal`,
   ];
 
+  const seedUrls = Array.from(
+    new Set([
+      ...Array.from(origins).flatMap((baseOrigin) =>
+        seedUrlsFromOrigin(baseOrigin),
+      ),
+    ]),
+  );
+
   return { firecrawlClient, origin, vendorDomain, seedUrls };
 }
 
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-merge.spec.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-merge.spec.ts
new file mode 100644
index 0000000000..c40dc27c46
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-merge.spec.ts
@@ -0,0 +1,145 @@
+import type { VendorRiskAssessmentCertification } from './agent-types';
+import { mergeCertifications } from './trust-portal-deep-scrape-merge';
+
+const cert = (
+  overrides: Partial<VendorRiskAssessmentCertification> = {},
+): VendorRiskAssessmentCertification => ({
+  type: 'SOC 2 Type II',
+  status: 'verified',
+  issuedAt: null,
+  expiresAt: null,
+  url: null,
+  ...overrides,
+});
+
+describe('mergeCertifications', () => {
+  it('returns core untouched when deep is empty', () => {
+    const core = [cert({ type: 'SOC 2 Type II' })];
+    expect(mergeCertifications(core, [])).toEqual(core);
+  });
+
+  it('returns deep when core is empty', () => {
+    const deep = [cert({ type: 'ISO 27001' })];
+    expect(mergeCertifications([], deep)).toEqual(deep);
+  });
+
+  it('dedupes by canonical slug (SOC 2 variants collapse)', () => {
+    const core = [cert({ type: 'SOC 2 Type II', status: 'verified' })];
+    const deep = [cert({ type: 'SOC2', status: 'unknown' })];
+
+    const result = mergeCertifications(core, deep);
+
+    expect(result).toHaveLength(1);
+    expect(result[0].status).toBe('verified');
+  });
+
+  it('verified wins over unknown regardless of source side', () => {
+    const core = [cert({ type: 'ISO 27001', status: 'unknown' })];
+    const deep = [cert({ type: 'ISO 27001', status: 'verified' })];
+
+    const result = mergeCertifications(core, deep);
+
+    expect(result[0].status).toBe('verified');
+  });
+
+  it('status priority: verified > expired > unknown > not_certified', () => {
+    const cases: Array<{
+      a: VendorRiskAssessmentCertification['status'];
+      b: VendorRiskAssessmentCertification['status'];
+      expected: VendorRiskAssessmentCertification['status'];
+    }> = [
+      { a: 'expired', b: 'unknown', expected: 'expired' },
+      { a: 'unknown', b: 'not_certified', expected: 'unknown' },
+      { a: 'verified', b: 'expired', expected: 'verified' },
+      { a: 'not_certified', b: 'verified', expected: 'verified' },
+    ];
+
+    for (const { a, b, expected } of cases) {
+      const result = mergeCertifications(
+        [cert({ type: 'PCI DSS', status: a })],
+        [cert({ type: 'PCI DSS', status: b })],
+      );
+      expect(result).toHaveLength(1);
+      expect(result[0].status).toBe(expected);
+    }
+  });
+
+  it('preserves url/dates from whichever side provides them', () => {
+    const core = [
+      cert({
+        type: 'ISO 27001',
+        status: 'unknown',
+        url: null,
+        issuedAt: null,
+      }),
+    ];
+    const deep = [
+      cert({
+        type: 'ISO 27001',
+        status: 'verified',
+        url: 'https://acme.com/iso.pdf',
+        issuedAt: '2025-03-01T00:00:00.000Z',
+      }),
+    ];
+
+    const result = mergeCertifications(core, deep);
+
+    expect(result[0]).toMatchObject({
+      type: 'ISO 27001',
+      status: 'verified',
+      url: 'https://acme.com/iso.pdf',
+      issuedAt: '2025-03-01T00:00:00.000Z',
+    });
+  });
+
+  it('prefers core url/dates when both sides have them', () => {
+    const core = [
+      cert({
+        type: 'SOC 2 Type II',
+        status: 'verified',
+        url: 'https://core.example.com/soc2',
+        issuedAt: '2025-01-01T00:00:00.000Z',
+      }),
+    ];
+    const deep = [
+      cert({
+        type: 'SOC 2 Type II',
+        status: 'verified',
+        url: 'https://deep.example.com/soc2',
+        issuedAt: '2024-01-01T00:00:00.000Z',
+      }),
+    ];
+
+    const result = mergeCertifications(core, deep);
+
+    expect(result[0].url).toBe('https://core.example.com/soc2');
+    expect(result[0].issuedAt).toBe('2025-01-01T00:00:00.000Z');
+  });
+
+  it('keeps distinct certifications when slugs differ', () => {
+    const core = [cert({ type: 'SOC 2 Type II' })];
+    const deep = [
+      cert({ type: 'ISO 27001' }),
+      cert({ type: 'PCI DSS' }),
+    ];
+
+    const result = mergeCertifications(core, deep);
+
+    expect(result).toHaveLength(3);
+    expect(result.map((c) => c.type).sort()).toEqual([
+      'ISO 27001',
+      'PCI DSS',
+      'SOC 2 Type II',
+    ]);
+  });
+
+  it('falls back to lowercased type when the slug mapper returns null', () => {
+    const core = [cert({ type: 'FooBar Framework', status: 'unknown' })];
+    const deep = [cert({ type: 'foobar framework', status: 'verified' })];
+
+    const result = mergeCertifications(core, deep);
+
+    expect(result).toHaveLength(1);
+    expect(result[0].status).toBe('verified');
+  });
+});
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-merge.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-merge.ts
new file mode 100644
index 0000000000..93ec13877f
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-merge.ts
@@ -0,0 +1,86 @@
+import type {
+  VendorRiskAssessmentCertification,
+  VendorRiskAssessmentCertificationStatus,
+} from './agent-types';
+
+// Inline slug mapper — mirrors `mapCertificationToBadgeType` in
+// vendor-risk-assessment-task.ts but lives alongside the merge logic
+// so this file has no upward dependency on the orchestrating task.
+// Keep in sync if new frameworks are added there.
+function canonicalSlug(type: string): string {
+  const normalized = type.toLowerCase().replace(/[^a-z0-9]/g, '');
+  if (normalized.includes('soc2') || normalized.includes('soc 2')) return 'soc2';
+  if (normalized.includes('iso27001') || normalized.includes('27001'))
+    return 'iso27001';
+  if (normalized.includes('iso42001') || normalized.includes('42001'))
+    return 'iso42001';
+  if (normalized.includes('iso9001') || normalized.includes('9001'))
+    return 'iso9001';
+  if (normalized.includes('gdpr')) return 'gdpr';
+  if (normalized.includes('hipaa')) return 'hipaa';
+  if (
+    normalized.includes('pcidss') ||
+    normalized.includes('pci') ||
+    normalized.includes('paymentcard')
+  )
+    return 'pci_dss';
+  if (normalized.includes('nen7510') || normalized.includes('7510'))
+    return 'nen7510';
+  // Fallback: lowercased trimmed type string
+  return type.trim().toLowerCase();
+}
+
+const STATUS_PRIORITY: Record<VendorRiskAssessmentCertificationStatus, number> =
+  {
+    verified: 3,
+    expired: 2,
+    unknown: 1,
+    not_certified: 0,
+  };
+
+function pickHigherStatus(
+  a: VendorRiskAssessmentCertificationStatus,
+  b: VendorRiskAssessmentCertificationStatus,
+): VendorRiskAssessmentCertificationStatus {
+  return STATUS_PRIORITY[a] >= STATUS_PRIORITY[b] ? a : b;
+}
+
+/**
+ * Merge certifications from the core Firecrawl Agent and the trust-portal
+ * deep-scrape, deduping by canonical slug. Status resolves via priority
+ * (verified > expired > unknown > not_certified). URL/dates prefer the
+ * core value when present; otherwise the deep value.
+ */
+export function mergeCertifications(
+  core: VendorRiskAssessmentCertification[],
+  deep: VendorRiskAssessmentCertification[],
+): VendorRiskAssessmentCertification[] {
+  if (core.length === 0) return deep;
+  if (deep.length === 0) return core;
+
+  const bySlug = new Map<string, VendorRiskAssessmentCertification>();
+
+  // Seed with core so its URL/date values win on ties.
+  for (const c of core) {
+    bySlug.set(canonicalSlug(c.type), { ...c });
+  }
+
+  for (const d of deep) {
+    const slug = canonicalSlug(d.type);
+    const existing = bySlug.get(slug);
+    if (!existing) {
+      bySlug.set(slug, { ...d });
+      continue;
+    }
+
+    bySlug.set(slug, {
+      type: existing.type, // keep core's display type
+      status: pickHigherStatus(existing.status, d.status),
+      issuedAt: existing.issuedAt ?? d.issuedAt ?? null,
+      expiresAt: existing.expiresAt ?? d.expiresAt ?? null,
+      url: existing.url ?? d.url ?? null,
+    });
+  }
+
+  return Array.from(bySlug.values());
+}
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-scrape-options.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-scrape-options.ts
new file mode 100644
index 0000000000..33fcd62cc0
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-scrape-options.ts
@@ -0,0 +1,107 @@
+import type { DeepScrapeSection } from './trust-portal-deep-scrape-sections';
+
+/**
+ * Builders for the two kinds of Firecrawl `scrape` requests the trust-portal
+ * deep-scrape issues — the initial full-page pull, and the per-section pull
+ * that may need to click a sidebar item (by href, CSS selector, or text) to
+ * reveal the content.
+ */
+
+const INITIAL_WAIT_MS = 3000;
+const CLICK_WAIT_BEFORE_MS = 1500;
+const CLICK_WAIT_AFTER_MS = 2000;
+const PATH_WAIT_MS = 2000;
+// Firecrawl scrape v2 `timeout` is capped at 300000ms.
+const SCRAPE_TIMEOUT_MS = 120_000;
+
+/** Escape `"` and `\` for use inside a CSS double-quoted attribute value. */
+function cssEscapeAttr(value: string): string {
+  return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
+}
+
+/**
+ * JS payload that finds the smallest visible DOM element whose exact
+ * textContent matches `tabLabel` and clicks it. Used when a trust portal
+ * sidebar is composed of buttons/divs without href attributes.
+ */
+function buildClickByTextScript(tabLabel: string): string {
+  const safe = JSON.stringify(tabLabel);
+  return `(() => {
+  const label = ${safe};
+  const candidates = Array.from(
+    document.querySelectorAll(
+      'button, a, [role="tab"], [role="button"], [role="menuitem"], li, span, div'
+    )
+  )
+    .filter((el) => {
+      if (!el || typeof el.textContent !== 'string') return false;
+      if (el.textContent.trim() !== label) return false;
+      if (el.children && el.children.length > 2) return false;
+      if (typeof el.getBoundingClientRect === 'function') {
+        const rect = el.getBoundingClientRect();
+        if (rect.width === 0 || rect.height === 0) return false;
+      }
+      return true;
+    })
+    .sort((a, b) => (a.textContent || '').length - (b.textContent || '').length);
+  const target = candidates[0];
+  if (target) {
+    try { target.scrollIntoView({ block: 'center' }); } catch {}
+    target.click();
+  }
+})();`;
+}
+
+export function buildInitialScrapeOptions() {
+  return {
+    formats: ['markdown', 'links'] as const,
+    onlyMainContent: false,
+    timeout: SCRAPE_TIMEOUT_MS,
+    actions: [{ type: 'wait', milliseconds: INITIAL_WAIT_MS }],
+  };
+}
+
+export function buildSectionScrapeOptions(section: DeepScrapeSection) {
+  if (section.tabLabel) {
+    return {
+      formats: ['markdown'] as const,
+      onlyMainContent: true,
+      timeout: SCRAPE_TIMEOUT_MS,
+      actions: [
+        { type: 'wait', milliseconds: CLICK_WAIT_BEFORE_MS },
+        {
+          type: 'executeJavascript',
+          script: buildClickByTextScript(section.tabLabel),
+        },
+        { type: 'wait', milliseconds: CLICK_WAIT_AFTER_MS },
+      ],
+    };
+  }
+
+  if (section.anchor) {
+    const safeAnchor = cssEscapeAttr(section.anchor);
+    const safeLabel = cssEscapeAttr(section.label);
+    const selector = [
+      `a[href="${safeAnchor}"]`,
+      `a[href$="${safeAnchor}"]`,
+      `[data-tab="${safeLabel}"]`,
+    ].join(', ');
+    return {
+      formats: ['markdown'] as const,
+      onlyMainContent: true,
+      timeout: SCRAPE_TIMEOUT_MS,
+      actions: [
+        { type: 'wait', milliseconds: CLICK_WAIT_BEFORE_MS },
+        { type: 'click', selector },
+        { type: 'wait', milliseconds: CLICK_WAIT_AFTER_MS },
+      ],
+    };
+  }
+
+  return {
+    formats: ['markdown'] as const,
+    onlyMainContent: true,
+    timeout: SCRAPE_TIMEOUT_MS,
+    actions: [{ type: 'wait', milliseconds: PATH_WAIT_MS }],
+  };
+}
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-sections.spec.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-sections.spec.ts
new file mode 100644
index 0000000000..8a575f3b3b
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-sections.spec.ts
@@ -0,0 +1,145 @@
+import { discoverSectionUrls } from './trust-portal-deep-scrape-sections';
+
+describe('discoverSectionUrls', () => {
+  const sourceUrl = 'https://ui.com/us/en/trust-center';
+
+  it('extracts intra-page anchors on the same path', () => {
+    const links = [
+      'https://ui.com/us/en/trust-center#philosophy',
+      'https://ui.com/us/en/trust-center#cloud-security',
+      'https://ui.com/us/en/trust-center#corporate-security',
+      'https://ui.com/us/en/trust-center#ndaa-compliance',
+    ];
+
+    const result = discoverSectionUrls({ sourceUrl, links });
+
+    expect(result.map((r) => r.url)).toEqual(
+      expect.arrayContaining([
+        'https://ui.com/us/en/trust-center#philosophy',
+        'https://ui.com/us/en/trust-center#cloud-security',
+        'https://ui.com/us/en/trust-center#corporate-security',
+        'https://ui.com/us/en/trust-center#ndaa-compliance',
+      ]),
+    );
+    expect(result).toHaveLength(4);
+  });
+
+  it('extracts same-path child URLs', () => {
+    const links = [
+      'https://acme.com/trust-center/cloud-security',
+      'https://acme.com/trust-center/data-centers',
+    ];
+
+    const result = discoverSectionUrls({
+      sourceUrl: 'https://acme.com/trust-center',
+      links,
+    });
+
+    expect(result.map((r) => r.url).sort()).toEqual([
+      'https://acme.com/trust-center/cloud-security',
+      'https://acme.com/trust-center/data-centers',
+    ]);
+  });
+
+  it('rejects external-domain links', () => {
+    const links = [
+      'https://ui.com/us/en/trust-center#cloud-security',
+      'https://example.com/trust',
+      'https://malicious.site/trust-center#fake',
+    ];
+
+    const result = discoverSectionUrls({ sourceUrl, links });
+
+    expect(result).toHaveLength(1);
+    expect(result[0].url).toBe(
+      'https://ui.com/us/en/trust-center#cloud-security',
+    );
+  });
+
+  it('rejects the source URL itself', () => {
+    const links = [
+      'https://ui.com/us/en/trust-center',
+      'https://ui.com/us/en/trust-center#cloud-security',
+    ];
+
+    const result = discoverSectionUrls({ sourceUrl, links });
+
+    expect(result.map((r) => r.url)).toEqual([
+      'https://ui.com/us/en/trust-center#cloud-security',
+    ]);
+  });
+
+  it('dedupes identical URLs', () => {
+    const links = [
+      'https://ui.com/us/en/trust-center#cloud-security',
+      'https://ui.com/us/en/trust-center#cloud-security',
+    ];
+
+    const result = discoverSectionUrls({ sourceUrl, links });
+
+    expect(result).toHaveLength(1);
+  });
+
+  it('caps at 25 sections (safety fuse)', () => {
+    const links = Array.from(
+      { length: 40 },
+      (_, i) => `https://ui.com/us/en/trust-center#section-${i}`,
+    );
+
+    const result = discoverSectionUrls({ sourceUrl, links });
+
+    expect(result).toHaveLength(25);
+  });
+
+  it('handles source URLs with trailing slash', () => {
+    const links = ['https://acme.com/trust-center/cloud-security'];
+
+    const result = discoverSectionUrls({
+      sourceUrl: 'https://acme.com/trust-center/',
+      links,
+    });
+
+    expect(result).toHaveLength(1);
+    expect(result[0].url).toBe('https://acme.com/trust-center/cloud-security');
+  });
+
+  it('skips unparseable links silently', () => {
+    const links = [
+      'not-a-url',
+      '',
+      'https://ui.com/us/en/trust-center#cloud-security',
+    ];
+
+    const result = discoverSectionUrls({ sourceUrl, links });
+
+    expect(result).toHaveLength(1);
+  });
+
+  it('derives a section label from the anchor fragment', () => {
+    const links = ['https://ui.com/us/en/trust-center#cloud-security'];
+
+    const result = discoverSectionUrls({ sourceUrl, links });
+
+    expect(result[0].label).toBe('cloud-security');
+    expect(result[0].anchor).toBe('#cloud-security');
+  });
+
+  it('derives a section label from the trailing path segment', () => {
+    const links = ['https://acme.com/trust-center/cloud-security'];
+
+    const result = discoverSectionUrls({
+      sourceUrl: 'https://acme.com/trust-center',
+      links,
+    });
+
+    expect(result[0].label).toBe('cloud-security');
+    expect(result[0].anchor).toBeNull();
+  });
+
+  it('returns an empty array when links is undefined or empty', () => {
+    expect(discoverSectionUrls({ sourceUrl, links: [] })).toEqual([]);
+    expect(
+      discoverSectionUrls({ sourceUrl, links: undefined as unknown as string[] }),
+    ).toEqual([]);
+  });
+});
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-sections.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-sections.ts
new file mode 100644
index 0000000000..47c9b2afe0
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-sections.ts
@@ -0,0 +1,101 @@
+// Pure helper: convert a Firecrawl scrape's `links` array into an ordered,
+// deduped list of section URLs for the trust-portal deep-scrape pass.
+//
+// A "section URL" is either:
+//   - an intra-page anchor on the same path as the source URL (e.g. `/trust-center#cloud-security`)
+//   - a same-origin URL whose path is nested under the source path (e.g. `/trust-center/cloud-security`)
+//
+// Cross-origin links, the source URL itself, and duplicates are dropped.
+
+export const MAX_SECTION_URLS = 25;
+
+export type DeepScrapeSection = {
+  url: string;
+  /** The anchor fragment including the `#` (e.g. `#cloud-security`), or null for path-based sections. */
+  anchor: string | null;
+  /** A human-friendly label used for logging and markdown section headers. */
+  label: string;
+  /**
+   * When present, the section must be revealed by clicking a DOM element whose
+   * textContent equals this value. Used for SPA trust portals where sidebar
+   * items are buttons/divs without href attributes (e.g. Ubiquiti).
+   */
+  tabLabel?: string | null;
+};
+
+function stripTrailingSlash(path: string): string {
+  return path.length > 1 && path.endsWith('/') ? path.slice(0, -1) : path;
+}
+
+function deriveLabel(sectionUrl: URL, anchor: string | null): string {
+  if (anchor) {
+    return anchor.slice(1); // drop leading `#`
+  }
+  const segments = stripTrailingSlash(sectionUrl.pathname).split('/');
+  return segments[segments.length - 1] || sectionUrl.pathname;
+}
+
+export function discoverSectionUrls(params: {
+  sourceUrl: string;
+  links: string[];
+}): DeepScrapeSection[] {
+  const { sourceUrl, links } = params;
+  if (!links || links.length === 0) return [];
+
+  let source: URL;
+  try {
+    source = new URL(sourceUrl);
+  } catch {
+    return [];
+  }
+
+  const sourceOrigin = source.origin;
+  const sourcePath = stripTrailingSlash(source.pathname);
+  const sourceCanonical = `${sourceOrigin}${sourcePath}`;
+
+  const seen = new Set<string>();
+  const sections: DeepScrapeSection[] = [];
+
+  for (const raw of links) {
+    if (sections.length >= MAX_SECTION_URLS) break;
+    if (!raw || typeof raw !== 'string') continue;
+
+    let parsed: URL;
+    try {
+      parsed = new URL(raw);
+    } catch {
+      continue;
+    }
+
+    if (parsed.origin !== sourceOrigin) continue;
+
+    const parsedPath = stripTrailingSlash(parsed.pathname);
+    const hasFragment = parsed.hash && parsed.hash.length > 1;
+
+    const isIntraPageAnchor = parsedPath === sourcePath && hasFragment;
+    const isSamePathChild =
+      !hasFragment &&
+      parsedPath !== sourcePath &&
+      (parsedPath.startsWith(`${sourcePath}/`) ||
+        (sourcePath === '' && parsedPath.startsWith('/')));
+
+    if (!isIntraPageAnchor && !isSamePathChild) continue;
+
+    const anchor = isIntraPageAnchor ? parsed.hash : null;
+    const canonical = anchor
+      ? `${sourceCanonical}${anchor}`
+      : `${sourceOrigin}${parsedPath}`;
+
+    if (canonical === sourceCanonical) continue;
+    if (seen.has(canonical)) continue;
+    seen.add(canonical);
+
+    sections.push({
+      url: canonical,
+      anchor,
+      label: deriveLabel(new URL(canonical), anchor),
+    });
+  }
+
+  return sections;
+}
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-tabs.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-tabs.ts
new file mode 100644
index 0000000000..934334d1c7
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape-tabs.ts
@@ -0,0 +1,92 @@
+import { logger } from '@trigger.dev/sdk';
+import { anthropic } from '@ai-sdk/anthropic';
+import { generateObject } from 'ai';
+import { z } from 'zod';
+
+/**
+ * Some trust portals are SPAs whose sidebar items are buttons/divs without
+ * href attributes — Firecrawl's `links` format doesn't enumerate them.
+ * When URL-based section discovery yields nothing, ask Claude Sonnet 4.6
+ * to extract sidebar/tab labels from the initial markdown so the orchestrator
+ * can click each by text content.
+ */
+
+const TAB_MODEL = 'claude-sonnet-4-6';
+const MAX_TABS = 15;
+const MARKDOWN_LIMIT = 12_000;
+
+const tabSchema = z.object({
+  tabLabels: z
+    .array(z.string())
+    .describe(
+      'Sidebar/tab labels present on the trust portal landing page. Each label is a short phrase (1-4 words) that, when clicked, reveals additional security/compliance content. Return an empty array if no such items exist.',
+    )
+    .default([]),
+});
+
+function buildPrompt(args: {
+  vendorName: string;
+  initialMarkdown: string;
+}): string {
+  return `You are analyzing the markdown of a vendor's trust portal landing page.
+
+Some trust portals are single-page apps where sidebar/tab items don't have real href URLs — they're buttons that reveal additional security/compliance content when clicked. Your job is to identify those sidebar/tab labels so a downstream scraper can programmatically click each one.
+
+Vendor: ${args.vendorName}
+
+Include labels that:
+- Look like sidebar/tab nav items (typically 1-4 words, e.g. "Cloud Security", "NDAA Compliance", "Corporate Security", "Certifications", "Reports", "Data Centers", "Subprocessors", "Bug Bounty Program", "Advisory Bulletins", "Overview", "Policies").
+- Sit inside or near the trust/security content region of the page.
+
+Exclude:
+- Site-wide navigation labels ("Home", "Products", "Store", "Support", "Contact Us", "Careers", "Blog", "Training", "Investor Relations", "What's New").
+- Footer / legal items ("Privacy Policy", "Terms of Service", "Legal").
+- Product category labels ("Cloud Gateways", "Switching", "WiFi", "Camera Security", "Door Access", "Integrations").
+
+Return at most ${MAX_TABS} labels. Return an empty array if you see no sidebar/tab items.
+
+Markdown:
+
+${args.initialMarkdown.slice(0, MARKDOWN_LIMIT)}`;
+}
+
+export async function identifySidebarTabs(params: {
+  vendorName: string;
+  initialMarkdown: string;
+}): Promise<string[]> {
+  const { vendorName, initialMarkdown } = params;
+
+  if (!initialMarkdown || initialMarkdown.trim().length === 0) {
+    return [];
+  }
+
+  try {
+    const { object } = await generateObject({
+      model: anthropic(TAB_MODEL),
+      schema: tabSchema,
+      prompt: buildPrompt({ vendorName, initialMarkdown }),
+    });
+
+    const deduped = Array.from(
+      new Set(
+        (object.tabLabels ?? [])
+          .map((l) => l.trim())
+          .filter((l) => l.length > 0 && l.length <= 60),
+      ),
+    ).slice(0, MAX_TABS);
+
+    logger.info('Trust portal deep-scrape: tab labels identified', {
+      vendorName,
+      count: deduped.length,
+      tabLabelsJson: JSON.stringify(deduped),
+    });
+
+    return deduped;
+  } catch (error) {
+    logger.warn('Trust portal deep-scrape: tab identification failed', {
+      vendorName,
+      error: error instanceof Error ? error.message : String(error),
+    });
+    return [];
+  }
+}
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape.spec.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape.spec.ts
new file mode 100644
index 0000000000..c040aab360
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape.spec.ts
@@ -0,0 +1,471 @@
+import { deepScrapeTrustPortal } from './trust-portal-deep-scrape';
+
+jest.mock('@trigger.dev/sdk', () => ({
+  logger: {
+    warn: jest.fn(),
+    info: jest.fn(),
+    debug: jest.fn(),
+    error: jest.fn(),
+  },
+}));
+
+jest.mock('@ai-sdk/anthropic', () => ({
+  anthropic: jest.fn(() => 'claude-mock-model'),
+}));
+
+const generateObjectMock = jest.fn();
+jest.mock('ai', () => ({
+  generateObject: (...args: unknown[]) => generateObjectMock(...args),
+}));
+
+type ScrapeMock = jest.Mock<
+  Promise<{ markdown?: string; links?: string[] }>,
+  [string, Record<string, unknown>?]
+>;
+
+function makeFirecrawlMock(scrape: ScrapeMock) {
+  return { scrape } as unknown as import('@mendable/firecrawl-js').default;
+}
+
+describe('deepScrapeTrustPortal — gate', () => {
+  beforeEach(() => {
+    generateObjectMock.mockReset();
+  });
+
+  it('returns null when sourceUrl is null', async () => {
+    const scrape = jest.fn();
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: null,
+      firecrawlClient: makeFirecrawlMock(scrape as ScrapeMock),
+    });
+    expect(result).toBeNull();
+    expect(scrape).not.toHaveBeenCalled();
+  });
+
+  it('returns null when source URL is on a known third-party portal host', async () => {
+    const scrape = jest.fn();
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: 'https://acme.trust.page',
+      firecrawlClient: makeFirecrawlMock(scrape as ScrapeMock),
+    });
+    expect(result).toBeNull();
+    expect(scrape).not.toHaveBeenCalled();
+  });
+
+  it('returns null when source URL is not on the vendor domain', async () => {
+    const scrape = jest.fn();
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: 'https://some-other-site.com/trust',
+      firecrawlClient: makeFirecrawlMock(scrape as ScrapeMock),
+    });
+    expect(result).toBeNull();
+    expect(scrape).not.toHaveBeenCalled();
+  });
+
+  it('returns null when source URL is unparseable', async () => {
+    const scrape = jest.fn();
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: 'not a url',
+      firecrawlClient: makeFirecrawlMock(scrape as ScrapeMock),
+    });
+    expect(result).toBeNull();
+    expect(scrape).not.toHaveBeenCalled();
+  });
+});
+
+describe('deepScrapeTrustPortal — extraction', () => {
+  beforeEach(() => {
+    generateObjectMock.mockReset();
+  });
+
+  it('extracts SOC 2, ISO 27001, PCI-DSS from a Ubiquiti-shaped SPA trust portal', async () => {
+    const sourceUrl = 'https://ui.com/us/en/trust-center';
+
+    const scrape: ScrapeMock = jest
+      .fn()
+      // Initial scrape returns the landing page + all sidebar links
+      .mockResolvedValueOnce({
+        markdown: '# Secure by Design\nUbiquiti trust overview.',
+        links: [
+          'https://ui.com/us/en/trust-center',
+          'https://ui.com/us/en/trust-center#philosophy',
+          'https://ui.com/us/en/trust-center#ndaa-compliance',
+          'https://ui.com/us/en/trust-center#cloud-security',
+          'https://ui.com/us/en/trust-center#corporate-security',
+        ],
+      })
+      // Per-section scrapes
+      .mockResolvedValueOnce({ markdown: '# Philosophy\nSecurity first.' })
+      .mockResolvedValueOnce({
+        markdown:
+          '# NDAA Compliance\nUbiquiti products are NDAA Section 889 compliant.',
+      })
+      .mockResolvedValueOnce({
+        markdown:
+          '# Cloud Security\n\nBadges: Soc 2 Type II, ISO/IEC 27001:2013, PCI-DSS. All verified.',
+      })
+      .mockResolvedValueOnce({
+        markdown:
+          '# Corporate Security\nPolicies covering employees and contractors.',
+      });
+
+    generateObjectMock.mockResolvedValueOnce({
+      object: {
+        certifications: [
+          {
+            type: 'SOC 2 Type II',
+            status: 'verified',
+            evidence_snippet: 'Soc 2 Type II',
+          },
+          {
+            type: 'ISO 27001',
+            status: 'verified',
+            evidence_snippet: 'ISO/IEC 27001:2013',
+          },
+          {
+            type: 'PCI DSS',
+            status: 'verified',
+            evidence_snippet: 'PCI-DSS',
+          },
+        ],
+      },
+    });
+
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Ubiquiti',
+      vendorDomain: 'ui.com',
+      sourceUrl,
+      firecrawlClient: makeFirecrawlMock(scrape),
+    });
+
+    expect(result).not.toBeNull();
+    expect(result).toHaveLength(3);
+    expect(result?.map((c) => c.type).sort()).toEqual([
+      'ISO 27001',
+      'PCI DSS',
+      'SOC 2 Type II',
+    ]);
+    expect(result?.every((c) => c.status === 'verified')).toBe(true);
+
+    // 1 initial + 4 sections = 5 scrape calls
+    expect(scrape).toHaveBeenCalledTimes(5);
+
+    // First call should be the source URL with a wait action.
+    expect(scrape).toHaveBeenNthCalledWith(
+      1,
+      sourceUrl,
+      expect.objectContaining({
+        formats: expect.arrayContaining(['markdown', 'links']),
+        onlyMainContent: false,
+      }),
+    );
+
+    // AI extraction called once with combined markdown.
+    expect(generateObjectMock).toHaveBeenCalledTimes(1);
+    const aiCall = generateObjectMock.mock.calls[0][0];
+    expect(aiCall.prompt).toContain('Cloud Security');
+    expect(aiCall.prompt).toContain('PCI-DSS');
+  });
+
+  it('continues with remaining sections when one scrape fails', async () => {
+    const scrape: ScrapeMock = jest
+      .fn()
+      .mockResolvedValueOnce({
+        markdown: '# Landing',
+        links: [
+          'https://acme.com/trust#one',
+          'https://acme.com/trust#two',
+        ],
+      })
+      .mockRejectedValueOnce(new Error('network timeout'))
+      .mockResolvedValueOnce({
+        markdown: '# Two\nWe are SOC 2 Type II verified.',
+      });
+
+    generateObjectMock.mockResolvedValueOnce({
+      object: {
+        certifications: [
+          {
+            type: 'SOC 2 Type II',
+            status: 'verified',
+            evidence_snippet: 'SOC 2 Type II verified',
+          },
+        ],
+      },
+    });
+
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: 'https://acme.com/trust',
+      firecrawlClient: makeFirecrawlMock(scrape),
+    });
+
+    expect(result).toEqual([
+      expect.objectContaining({ type: 'SOC 2 Type II', status: 'verified' }),
+    ]);
+  });
+
+  it('returns null when the initial scrape fails', async () => {
+    const scrape: ScrapeMock = jest
+      .fn()
+      .mockRejectedValueOnce(new Error('network error'));
+
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: 'https://acme.com/trust',
+      firecrawlClient: makeFirecrawlMock(scrape),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it('returns null when AI extraction throws', async () => {
+    const scrape: ScrapeMock = jest.fn().mockResolvedValueOnce({
+      markdown: '# Trust center content',
+      links: [],
+    });
+    // First generateObject call is identifySidebarTabs; return no tabs so
+    // the flow proceeds straight to cert extraction.
+    generateObjectMock.mockResolvedValueOnce({ object: { tabLabels: [] } });
+    generateObjectMock.mockRejectedValueOnce(new Error('model error'));
+
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: 'https://acme.com/trust',
+      firecrawlClient: makeFirecrawlMock(scrape),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it('drops extracted certs whose evidence_snippet is empty', async () => {
+    const scrape: ScrapeMock = jest.fn().mockResolvedValueOnce({
+      markdown: '# Trust',
+      links: [],
+    });
+
+    generateObjectMock.mockResolvedValueOnce({ object: { tabLabels: [] } });
+    generateObjectMock.mockResolvedValueOnce({
+      object: {
+        certifications: [
+          {
+            type: 'SOC 2 Type II',
+            status: 'verified',
+            evidence_snippet: 'SOC 2 Type II report available on request',
+          },
+          { type: 'Totally Made Up Cert', status: 'verified', evidence_snippet: '' },
+        ],
+      },
+    });
+
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: 'https://acme.com/trust',
+      firecrawlClient: makeFirecrawlMock(scrape),
+    });
+
+    expect(result).toHaveLength(1);
+    expect(result?.[0].type).toBe('SOC 2 Type II');
+  });
+
+  it('runs AI extraction on initial markdown when there are no sidebar sections', async () => {
+    const scrape: ScrapeMock = jest.fn().mockResolvedValueOnce({
+      markdown:
+        '# Trust\nWe hold SOC 2 Type II and ISO 27001 certifications.',
+      links: [],
+    });
+
+    generateObjectMock.mockResolvedValueOnce({ object: { tabLabels: [] } });
+    generateObjectMock.mockResolvedValueOnce({
+      object: {
+        certifications: [
+          {
+            type: 'SOC 2 Type II',
+            status: 'verified',
+            evidence_snippet: 'SOC 2 Type II',
+          },
+          {
+            type: 'ISO 27001',
+            status: 'verified',
+            evidence_snippet: 'ISO 27001',
+          },
+        ],
+      },
+    });
+
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: 'https://acme.com/trust',
+      firecrawlClient: makeFirecrawlMock(scrape),
+    });
+
+    expect(scrape).toHaveBeenCalledTimes(1);
+    expect(result?.map((c) => c.type).sort()).toEqual([
+      'ISO 27001',
+      'SOC 2 Type II',
+    ]);
+  });
+
+  it('discovers SPA tab labels via LLM and scrapes each by clicking text', async () => {
+    const scrape: ScrapeMock = jest
+      .fn()
+      .mockResolvedValueOnce({
+        markdown:
+          '# Secure by Design\nPhilosophy\nNDAA Compliance\nCloud Security',
+        links: [], // No sidebar anchors — triggers tab-label discovery
+      })
+      .mockResolvedValueOnce({
+        markdown: '# Philosophy\nWe believe in edge-first security.',
+      })
+      .mockResolvedValueOnce({
+        markdown: '# Cloud Security\nSOC 2 Type II, ISO 27001, PCI-DSS.',
+      });
+
+    // First LLM call: sidebar tabs. Second: cert extraction.
+    generateObjectMock.mockResolvedValueOnce({
+      object: { tabLabels: ['Philosophy', 'Cloud Security'] },
+    });
+    generateObjectMock.mockResolvedValueOnce({
+      object: {
+        certifications: [
+          {
+            type: 'SOC 2 Type II',
+            status: 'verified',
+            evidence_snippet: 'SOC 2 Type II',
+          },
+          {
+            type: 'ISO 27001',
+            status: 'verified',
+            evidence_snippet: 'ISO 27001',
+          },
+        ],
+      },
+    });
+
+    const result = await deepScrapeTrustPortal({
+      vendorName: 'Ubiquiti',
+      vendorDomain: 'ui.com',
+      sourceUrl: 'https://ui.com/trust-center',
+      firecrawlClient: makeFirecrawlMock(scrape),
+    });
+
+    // 1 initial + 2 tab-label scrapes = 3 scrape calls
+    expect(scrape).toHaveBeenCalledTimes(3);
+
+    // Each tab scrape must use executeJavascript click-by-text actions.
+    const tabCall = scrape.mock.calls[1];
+    const actions =
+      (tabCall[1] as { actions?: Array<{ type: string; script?: string }> })
+        ?.actions ?? [];
+    const jsAction = actions.find((a) => a.type === 'executeJavascript');
+    expect(jsAction?.script).toBeDefined();
+    expect(jsAction?.script).toContain('"Philosophy"');
+
+    expect(result?.map((c) => c.type).sort()).toEqual([
+      'ISO 27001',
+      'SOC 2 Type II',
+    ]);
+  });
+
+  it('escapes CSS special characters in anchor selectors', async () => {
+    // Use a backslash in the anchor: `\` is a CSS special character that must
+    // be escaped as `\\` inside attribute values, and it survives URL parsing
+    // (unlike `"` which browsers percent-encode to `%22` in the fragment).
+    const scrape: ScrapeMock = jest
+      .fn()
+      .mockResolvedValueOnce({
+        markdown: '# Landing',
+        links: ['https://acme.com/trust#weird\\section'],
+      })
+      .mockResolvedValueOnce({ markdown: '# Weird\nWe are ISO 27001 certified.' });
+
+    generateObjectMock.mockResolvedValueOnce({
+      object: {
+        certifications: [
+          {
+            type: 'ISO 27001',
+            status: 'verified',
+            evidence_snippet: 'ISO 27001 certified',
+          },
+        ],
+      },
+    });
+
+    await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl: 'https://acme.com/trust',
+      firecrawlClient: makeFirecrawlMock(scrape),
+    });
+
+    // The second call is the section scrape. Its selector should contain the
+    // escaped backslash (`\\`) not the raw single backslash.
+    const sectionCall = scrape.mock.calls[1];
+    const actions = (sectionCall[1] as { actions?: Array<{ type: string; selector?: string }> })?.actions ?? [];
+    const clickAction = actions.find((a) => a.type === 'click');
+    expect(clickAction?.selector).toBeDefined();
+    // cssEscapeAttr converts `\` → `\\`, so the selector contains `\\section`
+    expect(clickAction?.selector).toContain('#weird\\\\section');
+    // Raw single backslash should NOT appear unescaped in the selector string
+    expect(clickAction?.selector).not.toMatch(/#weird\\[^\\]/);
+  });
+
+  it('scrapes every section exactly once when section count exceeds concurrency bound', async () => {
+    const anchors = Array.from({ length: 8 }, (_, i) => `#section-${i}`);
+    const sourceUrl = 'https://acme.com/trust';
+
+    const scrape: ScrapeMock = jest.fn(async (url: string) => {
+      if (url === sourceUrl) {
+        return {
+          markdown: '# Landing',
+          links: anchors.map((a) => `${sourceUrl}${a}`),
+        };
+      }
+      return { markdown: `# ${url}\nplaceholder` };
+    }) as ScrapeMock;
+
+    generateObjectMock.mockResolvedValueOnce({
+      object: {
+        certifications: [
+          {
+            type: 'SOC 2 Type II',
+            status: 'verified',
+            evidence_snippet: 'SOC 2 Type II',
+          },
+        ],
+      },
+    });
+
+    await deepScrapeTrustPortal({
+      vendorName: 'Acme',
+      vendorDomain: 'acme.com',
+      sourceUrl,
+      firecrawlClient: makeFirecrawlMock(scrape),
+    });
+
+    // 1 initial + 8 sections = 9 scrape calls
+    expect(scrape).toHaveBeenCalledTimes(9);
+
+    // Each section URL should have been requested exactly once.
+    const sectionCalls = scrape.mock.calls
+      .slice(1)
+      .map((call) => call[0] as string);
+    expect(new Set(sectionCalls).size).toBe(8);
+    for (const anchor of anchors) {
+      expect(sectionCalls).toContain(`${sourceUrl}${anchor}`);
+    }
+  });
+});
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape.ts
new file mode 100644
index 0000000000..dfd759b6cb
--- /dev/null
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/trust-portal-deep-scrape.ts
@@ -0,0 +1,293 @@
+import Firecrawl from '@mendable/firecrawl-js';
+import { logger } from '@trigger.dev/sdk';
+import { anthropic } from '@ai-sdk/anthropic';
+import { generateObject } from 'ai';
+import { z } from 'zod';
+import type {
+  VendorRiskAssessmentCertification,
+  VendorRiskAssessmentCertificationStatus,
+} from './agent-types';
+import { isKnownThirdPartyPortalHost } from './url-validation';
+import {
+  discoverSectionUrls,
+  MAX_SECTION_URLS,
+  type DeepScrapeSection,
+} from './trust-portal-deep-scrape-sections';
+import { identifySidebarTabs } from './trust-portal-deep-scrape-tabs';
+import {
+  buildInitialScrapeOptions,
+  buildSectionScrapeOptions,
+} from './trust-portal-deep-scrape-scrape-options';
+
+const EXTRACTION_MODEL = 'claude-sonnet-4-6';
+const SECTION_CONCURRENCY = 5;
+const MARKDOWN_TRUNCATE_LIMIT = 200_000;
+
+const certificationExtractionSchema = z.object({
+  certifications: z.array(z.object({
+    type: z.string().describe(
+      'Canonical certification name, e.g. "SOC 2 Type II", "ISO 27001", "PCI DSS", "ISO 27017", "FedRAMP", "HIPAA", "GDPR", "ISO 42001"',
+    ),
+    status: z.enum(['verified', 'expired', 'not_certified', 'unknown']).describe(
+      'verified when the page lists this framework as current; expired only if explicitly said so; not_certified only if the page explicitly says so; unknown otherwise',
+    ),
+    issued_at: z.string().optional().nullable(),
+    expires_at: z.string().optional().nullable(),
+    evidence_snippet: z.string().describe(
+      'Short quote from the markdown (< 200 chars) that supports this certification. Must be present in the markdown verbatim.',
+    ),
+  })).default([]),
+});
+
+type ScrapeResponse = { markdown?: string; links?: string[] };
+
+function truncateMarkdown(input: string): string {
+  if (input.length <= MARKDOWN_TRUNCATE_LIMIT) return input;
+  logger.warn('Trust portal combined markdown truncated for extraction', {
+    originalLength: input.length,
+    limit: MARKDOWN_TRUNCATE_LIMIT,
+  });
+  return input.slice(0, MARKDOWN_TRUNCATE_LIMIT);
+}
+
+function buildExtractionPrompt(args: {
+  vendorName: string;
+  combinedMarkdown: string;
+}): string {
+  return `You are extracting security and compliance certifications from a vendor's trust center page.
+
+Vendor: ${args.vendorName}
+
+Rules:
+- Only return certifications that are explicitly listed in the markdown below.
+- Never invent certifications. If a certification is not mentioned, do not include it.
+- Mark status as "verified" when the page lists it as a current/active framework (including badge callouts and "we are certified" language).
+- Mark status as "expired" only when the page explicitly says the certification has lapsed.
+- Mark status as "not_certified" only when the page explicitly says the vendor is not certified.
+- Otherwise use "unknown".
+- Normalize the type name to canonical form (e.g. "Soc 2 Type II" → "SOC 2 Type II", "ISO/IEC 27001:2013" → "ISO 27001", "PCI-DSS" → "PCI DSS").
+- Always include evidence_snippet with a verbatim quote from the markdown. Certifications without an evidence_snippet will be discarded.
+
+Markdown from the trust portal and its sections:
+
+${args.combinedMarkdown}`;
+}
+
+
+async function mapWithConcurrency<T, R>(
+  items: T[],
+  concurrency: number,
+  worker: (item: T) => Promise<R>,
+): Promise<Array<PromiseSettledResult<R>>> {
+  const results: Array<PromiseSettledResult<R>> = new Array(items.length);
+  let cursor = 0;
+  const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
+    while (true) {
+      const index = cursor++;
+      if (index >= items.length) return;
+      try {
+        results[index] = { status: 'fulfilled', value: await worker(items[index]) };
+      } catch (reason) {
+        results[index] = { status: 'rejected', reason };
+      }
+    }
+  });
+  await Promise.all(runners);
+  return results;
+}
+
+export type DeepScrapeParams = {
+  vendorName: string;
+  vendorDomain: string;
+  sourceUrl: string | null;
+  firecrawlClient: Firecrawl;
+};
+
+export async function deepScrapeTrustPortal(
+  params: DeepScrapeParams,
+): Promise<VendorRiskAssessmentCertification[] | null> {
+  const { vendorName, vendorDomain, sourceUrl, firecrawlClient } = params;
+
+  if (!sourceUrl) return null;
+
+  let source: URL;
+  try {
+    source = new URL(sourceUrl);
+  } catch {
+    return null;
+  }
+
+  const host = source.hostname.toLowerCase();
+  if (isKnownThirdPartyPortalHost(host)) {
+    logger.info(
+      'Trust portal deep-scrape skipped: third-party portal host already handled by agent',
+      { vendorName, host },
+    );
+    return null;
+  }
+
+  const onVendorDomain =
+    host === vendorDomain || host.endsWith(`.${vendorDomain}`);
+  if (!onVendorDomain) {
+    logger.info(
+      'Trust portal deep-scrape skipped: source URL is not on vendor domain',
+      { vendorName, host, vendorDomain },
+    );
+    return null;
+  }
+
+  logger.info('Trust portal deep-scrape starting', {
+    vendorName,
+    sourceUrl,
+  });
+  // 1. Initial scrape
+  let initial: ScrapeResponse;
+  try {
+    initial = (await firecrawlClient.scrape(
+      sourceUrl,
+      buildInitialScrapeOptions() as unknown as Record<string, unknown>,
+    )) as ScrapeResponse;
+  } catch (error) {
+    logger.warn('Trust portal deep-scrape: initial scrape failed', {
+      vendorName,
+      sourceUrl,
+      error: error instanceof Error ? error.message : String(error),
+    });
+    return null;
+  }
+
+  const initialMarkdown = initial.markdown ?? '';
+  const links = Array.isArray(initial.links) ? initial.links : [];
+  logger.info('Trust portal deep-scrape: initial scrape returned', {
+    vendorName,
+    sourceUrl,
+    markdownLength: initialMarkdown.length,
+    linkCount: links.length,
+  });
+  // 2. Discover sections
+  const urlSections = discoverSectionUrls({ sourceUrl, links });
+
+  // 2a. If URL-based discovery found nothing (SPA sidebar with no hrefs),
+  // ask an LLM to identify tab labels from the initial markdown and
+  // synthesize click-by-text sections.
+  const tabSections: DeepScrapeSection[] =
+    urlSections.length === 0 && initialMarkdown.trim().length > 0
+      ? (await identifySidebarTabs({ vendorName, initialMarkdown })).map(
+          (tabLabel) => ({
+            url: sourceUrl,
+            anchor: null,
+            label: tabLabel,
+            tabLabel,
+          }),
+        )
+      : [];
+
+  const seenLabels = new Set<string>();
+  const sections: DeepScrapeSection[] = [];
+  for (const s of [...urlSections, ...tabSections]) {
+    const key = s.label.trim().toLowerCase();
+    if (!key || seenLabels.has(key)) continue;
+    seenLabels.add(key);
+    sections.push(s);
+    if (sections.length >= MAX_SECTION_URLS) break;
+  }
+
+  logger.info('Trust portal deep-scrape: sections discovered', {
+    vendorName,
+    sectionCount: sections.length,
+    urlSectionCount: urlSections.length,
+    tabSectionCount: tabSections.length,
+    sections: sections.map((s) => s.label),
+  });
+  // 3. Per-section scrapes (bounded concurrency)
+  const sectionResults = await mapWithConcurrency(
+    sections,
+    SECTION_CONCURRENCY,
+    async (section) => {
+      const response = (await firecrawlClient.scrape(
+        section.url,
+        buildSectionScrapeOptions(section) as unknown as Record<
+          string,
+          unknown
+        >,
+      )) as ScrapeResponse;
+      return { section, markdown: response.markdown ?? '' };
+    },
+  );
+
+  const sectionChunks: string[] = [];
+  for (const [index, result] of sectionResults.entries()) {
+    if (result.status === 'fulfilled') {
+      const { section, markdown } = result.value;
+      if (markdown.trim().length > 0) {
+        sectionChunks.push(
+          `\n\n---\n# Section: ${section.label}\n\n${markdown}`,
+        );
+      }
+    } else {
+      logger.warn('Trust portal deep-scrape: section scrape failed', {
+        vendorName,
+        section: sections[index].label,
+        error:
+          result.reason instanceof Error
+            ? result.reason.message
+            : String(result.reason),
+      });
+    }
+  }
+
+  const combinedMarkdown = truncateMarkdown(
+    [initialMarkdown, ...sectionChunks].join(''),
+  );
+
+  if (combinedMarkdown.trim().length === 0) {
+    logger.warn(
+      'Trust portal deep-scrape: combined markdown is empty, skipping extraction',
+      { vendorName, sourceUrl },
+    );
+    return null;
+  }
+  // 4. AI extraction
+  type ExtractedCert = {
+    type: string; status: VendorRiskAssessmentCertificationStatus;
+    issued_at?: string | null; expires_at?: string | null; evidence_snippet: string;
+  };
+  let extracted: { certifications: ExtractedCert[] };
+  try {
+    const { object } = await generateObject({
+      model: anthropic(EXTRACTION_MODEL),
+      schema: certificationExtractionSchema,
+      prompt: buildExtractionPrompt({ vendorName, combinedMarkdown }),
+    });
+    extracted = object;
+  } catch (error) {
+    logger.warn('Trust portal deep-scrape: AI extraction failed', {
+      vendorName,
+      error: error instanceof Error ? error.message : String(error),
+    });
+    return null;
+  }
+
+  const certifications: VendorRiskAssessmentCertification[] =
+    extracted.certifications
+      .filter(
+        (c) => c.evidence_snippet && c.evidence_snippet.trim().length > 0,
+      )
+      .map((c) => ({
+        type: c.type,
+        status: c.status,
+        issuedAt: c.issued_at ?? null,
+        expiresAt: c.expires_at ?? null,
+        url: null,
+      }));
+
+  logger.info('Trust portal deep-scrape: completed', {
+    vendorName,
+    certificationCount: certifications.length,
+    sectionCount: sections.length,
+    initialMarkdownLength: initialMarkdown.length,
+    combinedMarkdownLength: combinedMarkdown.length,
+  });
+
+  return certifications.length > 0 ? certifications : null;
+}
diff --git a/apps/api/src/trigger/vendor/vendor-risk-assessment/url-validation.ts b/apps/api/src/trigger/vendor/vendor-risk-assessment/url-validation.ts
index 454daf6bf4..18bff82c12 100644
--- a/apps/api/src/trigger/vendor/vendor-risk-assessment/url-validation.ts
+++ b/apps/api/src/trigger/vendor/vendor-risk-assessment/url-validation.ts
@@ -2,7 +2,7 @@ import { logger } from '@trigger.dev/sdk';
 import { getDomain } from 'tldts';
 
 // Well-known trust portal domains that vendors use to host their security pages
-const TRUSTED_PORTAL_DOMAINS = [
+export const TRUSTED_PORTAL_DOMAINS = [
   'trust.page', // SafeBase
   'vanta.com', // Vanta trust centers
   'drata.com', // Drata trust centers
@@ -100,3 +100,16 @@ export function validateVendorUrl(
     return null;
   }
 }
+
+/**
+ * Returns true if the given hostname matches (or is a subdomain of)
+ * a known third-party trust portal (SafeBase, Vanta, Drata, etc.).
+ * Used to gate the trust-portal deep-scrape pass: those portals are
+ * already handled well by the Firecrawl Agent, so we skip them.
+ */
+export function isKnownThirdPartyPortalHost(hostname: string): boolean {
+  const lower = hostname.toLowerCase();
+  return TRUSTED_PORTAL_DOMAINS.some(
+    (portal) => lower === portal || lower.endsWith(`.${portal}`),
+  );
+}