diff --git a/.devcontainer/claude-web-import-index.sh b/.devcontainer/claude-web-import-index.sh new file mode 100755 index 0000000000..cb9b7de783 --- /dev/null +++ b/.devcontainer/claude-web-import-index.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# Restore the realm index from a CI-built cache instead of indexing live. +# +# CI's `cache-index` job (.github/workflows/ci.yaml) indexes every realm and +# uploads a `pg_dump --data-only` of boxel_index / realm_versions / realm_meta +# as the `boxel-index-cache` artifact. Importing it turns the multi-minute +# prerender indexing into a seconds-long SQL restore. +# +# This is the gh-free sibling of scripts/import-cached-index.sh: this cloud +# session cannot reach api.github.com directly (it 403s — only the Claude +# GitHub MCP integration can read Actions), so this script imports from a +# LOCAL cache file rather than calling `gh run download`. A Claude session +# fetches the artifact via the Actions API (MCP) and drops it at the default +# path below; `gh` is still used as a fallback for devs who have it. +# +# Exit 0 = index restored (caller should boot with +# REALM_SERVER_FULL_INDEX_ON_STARTUP=false). +# Exit 1 = nothing imported (DB already warm, no cache, or import failed); +# caller should let the realm-server index live. +set -uo pipefail + +REPO="cardstack/boxel" +DB_NAME="${PGDATABASE:-boxel}" +CACHE_FILE="${BOXEL_INDEX_CACHE_FILE:-$HOME/.local/share/boxel/index-cache/boxel-index-cache.sql.gz}" + +# Already warm? The realm-server persists its index in boxel-pg; if the volume +# survived from a previous session there's nothing to restore. +ROW_COUNT=$(docker exec boxel-pg psql -U postgres -d "$DB_NAME" -tAc \ + "SELECT COUNT(*) FROM realm_versions" 2>/dev/null) || ROW_COUNT="" +if [ -n "$ROW_COUNT" ] && [ "$ROW_COUNT" -gt 0 ] 2>/dev/null; then + echo "[index-cache] DB already has index data ($ROW_COUNT realm versions); skipping import." + exit 1 +fi + +# Fall back to `gh` when a local cache file isn't present and the CLI exists. +if [ ! -f "$CACHE_FILE" ] && command -v gh >/dev/null 2>&1; then + RUN_ID=$(gh run list -w ci.yaml -b main -s success -L 1 \ + --json databaseId -q '.[0].databaseId' -R "$REPO" 2>/dev/null) || RUN_ID="" + if [ -n "$RUN_ID" ]; then + echo "[index-cache] Downloading cache from CI run $RUN_ID via gh…" + mkdir -p "$(dirname "$CACHE_FILE")" + gh run download "$RUN_ID" -n boxel-index-cache \ + -D "$(dirname "$CACHE_FILE")" -R "$REPO" 2>/dev/null || true + fi +fi + +if [ ! -f "$CACHE_FILE" ]; then + echo "[index-cache] No cache file at $CACHE_FILE (and no gh download); will index live." + echo "[index-cache] To use a cache, fetch the boxel-index-cache artifact from a" + echo "[index-cache] successful main CI run into that path (a Claude session can do" + echo "[index-cache] this via the GitHub Actions API; raw api.github.com is blocked here)." + exit 1 +fi + +# The data-only dump needs the schema to exist, so migrate first. Idempotent. +echo "[index-cache] Migrating schema before restore…" +if ! mise exec -- pnpm --dir=packages/realm-server migrate >/dev/null 2>&1; then + echo "[index-cache] Migration failed; will index live." >&2 + exit 1 +fi + +echo "[index-cache] Restoring index from $CACHE_FILE …" +docker exec boxel-pg psql -U postgres -d "$DB_NAME" --quiet --no-psqlrc -c \ + "TRUNCATE boxel_index, realm_versions, realm_meta" || { echo "[index-cache] truncate failed" >&2; exit 1; } + +# The cache stores https://localhost:4201/... URLs, which is exactly the +# standard-dev runtime origin — no remapping needed (unlike env mode). +if gunzip -c "$CACHE_FILE" \ + | docker exec -i boxel-pg psql -U postgres -d "$DB_NAME" --quiet --no-psqlrc -v ON_ERROR_STOP=1; then + RESTORED=$(docker exec boxel-pg psql -U postgres -d "$DB_NAME" -tAc \ + "SELECT COUNT(*) FROM realm_versions" 2>/dev/null) + echo "[index-cache] Restored ($RESTORED realm versions). Realm server will boot without a full index." + exit 0 +fi + +echo "[index-cache] Import failed; truncating partial data and indexing live." >&2 +docker exec boxel-pg psql -U postgres -d "$DB_NAME" --quiet --no-psqlrc -c \ + "TRUNCATE boxel_index, realm_versions, realm_meta" >/dev/null 2>&1 || true +exit 1 diff --git a/.devcontainer/claude-web-setup.sh b/.devcontainer/claude-web-setup.sh new file mode 100755 index 0000000000..ff2cb4beba --- /dev/null +++ b/.devcontainer/claude-web-setup.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# Provisioning for running the Boxel stack in "Claude Code on the web" +# (claude.ai/code). Point the cloud environment's *Setup Script* at this file. +# +# The cloud VM runs the whole stack on localhost, so this just uses the repo's +# STANDARD dev tooling (`mise run dev`): the realm is at https://localhost:4201, +# the migration-seeded permissions already match that localhost default, and +# the worker/prerender reach it directly. No reverse proxy, TLS shim, or URL +# rewriting is needed — it's normal local dev, provisioned for a headless +# root cloud VM (see the synapse root/no-IPv6 handling in +# packages/matrix/support/synapse/index.ts). +# +# This script only PROVISIONS (deps + mkcert + dev cert + CA bundle + source +# realms). Start the stack PER SESSION (services don't persist in the cached +# snapshot) with the companion start script, which sets the env vars this +# environment needs and registers Matrix users on a fresh Synapse: +# +# .devcontainer/claude-web-start.sh +# +# It runs `mise run dev-all` (NOT `mise run dev`): the cloud VM is headless, so +# the host app must run in-process here. `dev` starts only the backend and +# leaves the host to a second terminal that this environment doesn't have — +# the prerender then waits forever for https://localhost:4200 and the whole +# stack fails. `dev-all` brings up the host first, then the same backend. +# +# Cloud environment settings to set in the claude.ai/code UI: +# - Network access: "Full" (or a custom allowlist) — needed for OpenRouter, +# GitHub, Docker Hub, and the icon CDN (boxel-icons.boxel.ai). +# - RAM ceiling is ~16 GB, so the catalog realm (by far the heaviest to index, +# ~1000+ files) is skipped via SKIP_CATALOG to stay within budget. The +# boxel-homepage realm lives in a private repo this VM can't clone, so it's +# skipped too (SKIP_BOXEL_HOMEPAGE) — both are set by the start script. +set -euo pipefail + +# Toolchain — mise pins the exact node/pnpm/ts-node from .mise.toml. +if ! command -v mise >/dev/null 2>&1; then + curl https://mise.run | MISE_INSTALL_PATH="$HOME/.local/bin/mise" sh + export PATH="$HOME/.local/bin:$PATH" +fi +eval "$(mise activate bash)" +mise trust +mise install + +# Dependencies. +mise exec -- pnpm install --frozen-lockfile + +# Build the boxel-icons + boxel-ui addons (in dependency order). The host app's +# vite build imports per-icon modules from @cardstack/boxel-icons/dist, which +# `pnpm install` does not produce — without this the host fails to build with +# "Cannot find module '@cardstack/boxel-icons/...'" and never serves. +mise run build:ui + +# mkcert provisions the local-dev CA + leaf cert; infra:ensure-dev-cert fails +# hard if it's missing. The base cloud image doesn't ship it, so install it +# (and libnss3-tools, which mkcert -install needs to write the NSS trust DB). +if ! command -v mkcert >/dev/null 2>&1; then + SUDO="" + [ "$(id -u)" -ne 0 ] && command -v sudo >/dev/null 2>&1 && SUDO="sudo" + $SUDO apt-get update -y + $SUDO apt-get install -y mkcert libnss3-tools +fi + +# Local-dev TLS cert: standard dev serves HTTPS on localhost and env-vars.sh +# treats the cert as mandatory. Provisioning it here also lets Node (via +# NODE_EXTRA_CA_CERTS, set by env-vars.sh) and the prerender's headless Chrome +# trust https://localhost — and because localhost IS an https-loopback, +# browser-manager.ts auto-adds --ignore-certificate-errors (no extra config). +mise run infra:ensure-dev-cert + +# Combined CA bundle. This cloud environment routes outbound HTTPS through an +# agent proxy and pre-sets NODE_EXTRA_CA_CERTS to the proxy's CA bundle. Node +# reads NODE_EXTRA_CA_CERTS as a SINGLE file (not a list), and env-vars.sh +# only points it at mkcert's rootCA when it's unset — so the proxy value wins +# and Node never trusts the mkcert leaf. The realm-server's startup fetch of +# the host (https://localhost:4200) then fails with +# UNABLE_TO_VERIFY_LEAF_SIGNATURE and it crash-loops. Concatenate the proxy +# bundle and mkcert's rootCA into one file so Node trusts BOTH the proxy +# (outbound) and the local leaf (loopback); the start script exports +# NODE_EXTRA_CA_CERTS at it. No-op when the env doesn't pre-set a proxy CA. +if [ -n "${NODE_EXTRA_CA_CERTS:-}" ] && [ -f "${NODE_EXTRA_CA_CERTS}" ]; then + CAROOT="$(mkcert -CAROOT)" + COMBINED="$HOME/.local/share/boxel/dev-certs/combined-ca.pem" + cat "${NODE_EXTRA_CA_CERTS}" "${CAROOT}/rootCA.pem" > "$COMBINED" + echo "Wrote combined CA bundle (proxy + mkcert) to $COMBINED" +fi + +# Source realms live in separate repos; clone over HTTPS (no SSH key in the VM). +# Catalog is intentionally NOT cloned here — it's skipped at runtime to fit the +# memory budget. Add `pnpm --dir=packages/catalog catalog:setup` if you need it. +git config --global url."https://github.com/".insteadOf "git@github.com:" +mise exec -- pnpm --dir=packages/skills-realm skills:setup + +# Note: the first `mise run dev` pulls the Synapse/Postgres Docker images; the +# cloud snapshot caches them so later sessions start faster. + +echo "" +echo "Provisioning complete. Start the stack with:" +echo " .devcontainer/claude-web-start.sh" +echo "Realm: https://localhost:4201 Host: https://localhost:4200" diff --git a/.devcontainer/claude-web-start.sh b/.devcontainer/claude-web-start.sh new file mode 100755 index 0000000000..2cc4cfa7fd --- /dev/null +++ b/.devcontainer/claude-web-start.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# Per-session start for the Boxel stack in "Claude Code on the web". Run after +# .devcontainer/claude-web-setup.sh has provisioned the snapshot. Services do +# not persist in the cached snapshot, so this runs every session. +# +# What this handles that plain `mise run dev` does not, in this environment: +# - dev-all, not dev: the VM is headless, so the host app must run in-process +# (see the note in claude-web-setup.sh). +# - Docker: the daemon isn't running at session start; bring it up so the +# Synapse / Postgres / SMTP containers can launch. +# - CA bundle: point Node at the combined proxy+mkcert bundle so the +# realm-server can verify the host's mkcert leaf over loopback while still +# trusting the agent proxy for outbound HTTPS (see claude-web-setup.sh). +# - Matrix users: standard dev assumes the realm/bot users are already +# registered (full-reset does it). On this fresh Synapse they are not, so +# the realm-server's Matrix login 403s and it runs without broadcasting. +# ensure-synapse only auto-registers in environment mode, so do it here — +# BEFORE the stack boots, so the realm-server logs in cleanly. The +# registration script is idempotent (skips users that already exist). +# - Chromium sandbox: the prerender's headless Chrome can't sandbox as root, +# so PUPPETEER_DISABLE_SANDBOX makes its standby probe pass. +# - SKIP_CATALOG / SKIP_BOXEL_HOMEPAGE: fit the memory budget and skip the +# realm whose content repo this VM can't clone. +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$REPO_ROOT" + +export PATH="$HOME/.local/bin:$PATH" +eval "$(mise activate bash)" + +# Docker daemon: start it if the socket isn't responding. Containers and their +# images are cached in the snapshot, but the daemon process is not. +if ! docker info >/dev/null 2>&1; then + echo "[start] Starting Docker daemon…" + (dockerd >/tmp/dockerd.log 2>&1 &) + for _ in $(seq 1 30); do + docker info >/dev/null 2>&1 && break + sleep 1 + done + docker info >/dev/null 2>&1 || { echo "[start] Docker failed to start; see /tmp/dockerd.log" >&2; exit 1; } +fi + +# Trust both the agent proxy CA (outbound) and the mkcert leaf (loopback). +COMBINED="$HOME/.local/share/boxel/dev-certs/combined-ca.pem" +if [ -f "$COMBINED" ]; then + export NODE_EXTRA_CA_CERTS="$COMBINED" +fi + +# Register Matrix users on a fresh Synapse, once, before the stack boots, so +# the realm-server logs in cleanly instead of caching a failed session. +# register-all needs BOTH the Postgres container (it gates on `pg_isready`) +# and Synapse, so bring both up first; dev-all's own start:pg / start:matrix +# then see them already running and move on. +echo "[start] Ensuring Postgres + Synapse are up for Matrix user registration…" +mise run infra:ensure-pg +mise run infra:start-synapse +for _ in $(seq 1 60); do + curl -sf -o /dev/null --max-time 5 http://localhost:8008/_matrix/client/versions && break + sleep 2 +done +echo "[start] Registering Matrix users (idempotent)…" +mise exec -- pnpm --dir=packages/matrix register-all || true + +# Restore the realm index from the CI cache if one's available, so the stack +# comes up without re-rendering every card. On success, tell the realm-server +# to trust the imported index instead of doing a full index on startup. +FULL_INDEX_FLAG="" +if "$REPO_ROOT/.devcontainer/claude-web-import-index.sh"; then + FULL_INDEX_FLAG="REALM_SERVER_FULL_INDEX_ON_STARTUP=false" +fi + +echo "[start] Launching the stack (mise run dev-all)…" +exec env \ + SKIP_CATALOG=true \ + SKIP_BOXEL_HOMEPAGE=true \ + PUPPETEER_DISABLE_SANDBOX=true \ + ${FULL_INDEX_FLAG} \ + mise run dev-all diff --git a/packages/matrix/support/synapse/index.ts b/packages/matrix/support/synapse/index.ts index 8fe96c40d6..4a20af1876 100644 --- a/packages/matrix/support/synapse/index.ts +++ b/packages/matrix/support/synapse/index.ts @@ -23,6 +23,24 @@ import { export const SYNAPSE_IP_ADDRESS = '172.20.0.5'; export const SYNAPSE_PORT = 8008; +// Synapse's listeners bind to "::" (IPv6 dual-stack) by default. Hosts whose +// kernel lacks IPv6 (some minimal cloud VMs / containers) can't bind it and +// synapse dies at startup with "Address family not supported by protocol". We +// detect that here so the generated config can fall back to IPv4-only binding. +function hostHasIPv6(): boolean { + let interfaces = os.networkInterfaces(); + for (let name of Object.keys(interfaces)) { + for (let info of interfaces[name] ?? []) { + // Node has reported `family` as both the string 'IPv6' and the number 6 + // across versions; accept either. + if (info.family === 'IPv6' || (info.family as unknown) === 6) { + return true; + } + } + } + return false; +} + const registrationSecretFile = path.resolve( path.join(import.meta.dirname, '..', '..', 'registration_secret.txt'), ); @@ -242,6 +260,21 @@ export async function synapseStart( port: hostPort, publicBaseUrl: `http://localhost:${hostPort}`, }); + // On a host without IPv6, rewrite the generated config's listeners to bind + // IPv4 only — synapse is reached via localhost:8008 in dev regardless, so + // dropping the dual-stack "::" bind is transparent there but lets synapse + // start at all. Hosts with IPv6 keep the template's "::" untouched. + if (!hostHasIPv6()) { + let hsYaml = path.join(synCfg.configDir, 'homeserver.yaml'); + let contents = await fse.readFile(hsYaml, 'utf8'); + let patched = contents.replace( + /bind_addresses:\s*\[\s*"::"\s*\]/g, + 'bind_addresses: ["0.0.0.0"]', + ); + if (patched !== contents) { + await fse.writeFile(hsYaml, patched); + } + } containerName = opts?.containerName || (isEnvironmentMode() @@ -262,6 +295,13 @@ export async function synapseStart( '-e', 'PYTHONPATH=/custom/modules', ]; + // When the host runs as root (e.g. the Claude-web cloud VM), the synapse + // image would otherwise drop privileges to its default uid 991, which + // cannot write the root-owned config dir mounted at /data. Telling the + // image to stay as root (UID/GID=0) keeps it able to create media_store. + if (process.getuid?.() === 0) { + dockerParams.push('-e', 'UID=0', '-e', 'GID=0'); + } if (useDynamicHostPort) { // In dynamic-host-port mode multiple harnesses may run concurrently, so // we must not claim the shared fixed Synapse container IP.