Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions .devcontainer/claude-web-import-index.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env bash
# Restore the realm index from a CI-built cache instead of indexing live.
#
# CI's `cache-index` job (.github/workflows/ci.yaml) indexes every realm and
# uploads a `pg_dump --data-only` of boxel_index / realm_versions / realm_meta
# as the `boxel-index-cache` artifact. Importing it turns the multi-minute
# prerender indexing into a seconds-long SQL restore.
#
# This is the gh-free sibling of scripts/import-cached-index.sh: this cloud
# session cannot reach api.github.com directly (it 403s — only the Claude
# GitHub MCP integration can read Actions), so this script imports from a
# LOCAL cache file rather than calling `gh run download`. A Claude session
# fetches the artifact via the Actions API (MCP) and drops it at the default
# path below; `gh` is still used as a fallback for devs who have it.
#
# Exit 0 = index restored (caller should boot with
# REALM_SERVER_FULL_INDEX_ON_STARTUP=false).
# Exit 1 = nothing imported (DB already warm, no cache, or import failed);
# caller should let the realm-server index live.
set -uo pipefail

REPO="cardstack/boxel"
DB_NAME="${PGDATABASE:-boxel}"
CACHE_FILE="${BOXEL_INDEX_CACHE_FILE:-$HOME/.local/share/boxel/index-cache/boxel-index-cache.sql.gz}"

# Already warm? The realm-server persists its index in boxel-pg; if the volume
# survived from a previous session there's nothing to restore.
ROW_COUNT=$(docker exec boxel-pg psql -U postgres -d "$DB_NAME" -tAc \
"SELECT COUNT(*) FROM realm_versions" 2>/dev/null) || ROW_COUNT=""
if [ -n "$ROW_COUNT" ] && [ "$ROW_COUNT" -gt 0 ] 2>/dev/null; then
echo "[index-cache] DB already has index data ($ROW_COUNT realm versions); skipping import."
exit 1
fi

# Fall back to `gh` when a local cache file isn't present and the CLI exists.
if [ ! -f "$CACHE_FILE" ] && command -v gh >/dev/null 2>&1; then
RUN_ID=$(gh run list -w ci.yaml -b main -s success -L 1 \
--json databaseId -q '.[0].databaseId' -R "$REPO" 2>/dev/null) || RUN_ID=""
if [ -n "$RUN_ID" ]; then
echo "[index-cache] Downloading cache from CI run $RUN_ID via gh…"
mkdir -p "$(dirname "$CACHE_FILE")"
gh run download "$RUN_ID" -n boxel-index-cache \
-D "$(dirname "$CACHE_FILE")" -R "$REPO" 2>/dev/null || true
fi
fi

if [ ! -f "$CACHE_FILE" ]; then
echo "[index-cache] No cache file at $CACHE_FILE (and no gh download); will index live."
echo "[index-cache] To use a cache, fetch the boxel-index-cache artifact from a"
echo "[index-cache] successful main CI run into that path (a Claude session can do"
echo "[index-cache] this via the GitHub Actions API; raw api.github.com is blocked here)."
exit 1
fi

# The data-only dump needs the schema to exist, so migrate first. Idempotent.
echo "[index-cache] Migrating schema before restore…"
if ! mise exec -- pnpm --dir=packages/realm-server migrate >/dev/null 2>&1; then
echo "[index-cache] Migration failed; will index live." >&2
exit 1
fi

echo "[index-cache] Restoring index from $CACHE_FILE …"
docker exec boxel-pg psql -U postgres -d "$DB_NAME" --quiet --no-psqlrc -c \
"TRUNCATE boxel_index, realm_versions, realm_meta" || { echo "[index-cache] truncate failed" >&2; exit 1; }

# The cache stores https://localhost:4201/... URLs, which is exactly the
# standard-dev runtime origin — no remapping needed (unlike env mode).
if gunzip -c "$CACHE_FILE" \
| docker exec -i boxel-pg psql -U postgres -d "$DB_NAME" --quiet --no-psqlrc -v ON_ERROR_STOP=1; then
RESTORED=$(docker exec boxel-pg psql -U postgres -d "$DB_NAME" -tAc \
"SELECT COUNT(*) FROM realm_versions" 2>/dev/null)
echo "[index-cache] Restored ($RESTORED realm versions). Realm server will boot without a full index."
exit 0
fi

echo "[index-cache] Import failed; truncating partial data and indexing live." >&2
docker exec boxel-pg psql -U postgres -d "$DB_NAME" --quiet --no-psqlrc -c \
"TRUNCATE boxel_index, realm_versions, realm_meta" >/dev/null 2>&1 || true
exit 1
99 changes: 99 additions & 0 deletions .devcontainer/claude-web-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env bash
# Provisioning for running the Boxel stack in "Claude Code on the web"
# (claude.ai/code). Point the cloud environment's *Setup Script* at this file.
#
# The cloud VM runs the whole stack on localhost, so this just uses the repo's
# STANDARD dev tooling (`mise run dev`): the realm is at https://localhost:4201,
# the migration-seeded permissions already match that localhost default, and
# the worker/prerender reach it directly. No reverse proxy, TLS shim, or URL
# rewriting is needed — it's normal local dev, provisioned for a headless
# root cloud VM (see the synapse root/no-IPv6 handling in
# packages/matrix/support/synapse/index.ts).
#
# This script only PROVISIONS (deps + mkcert + dev cert + CA bundle + source
# realms). Start the stack PER SESSION (services don't persist in the cached
# snapshot) with the companion start script, which sets the env vars this
# environment needs and registers Matrix users on a fresh Synapse:
#
# .devcontainer/claude-web-start.sh
#
# It runs `mise run dev-all` (NOT `mise run dev`): the cloud VM is headless, so
# the host app must run in-process here. `dev` starts only the backend and
# leaves the host to a second terminal that this environment doesn't have —
# the prerender then waits forever for https://localhost:4200 and the whole
# stack fails. `dev-all` brings up the host first, then the same backend.
#
# Cloud environment settings to set in the claude.ai/code UI:
# - Network access: "Full" (or a custom allowlist) — needed for OpenRouter,
# GitHub, Docker Hub, and the icon CDN (boxel-icons.boxel.ai).
# - RAM ceiling is ~16 GB, so the catalog realm (by far the heaviest to index,
# ~1000+ files) is skipped via SKIP_CATALOG to stay within budget. The
# boxel-homepage realm lives in a private repo this VM can't clone, so it's
# skipped too (SKIP_BOXEL_HOMEPAGE) — both are set by the start script.
set -euo pipefail

# Toolchain — mise pins the exact node/pnpm/ts-node from .mise.toml.
if ! command -v mise >/dev/null 2>&1; then
curl https://mise.run | MISE_INSTALL_PATH="$HOME/.local/bin/mise" sh
export PATH="$HOME/.local/bin:$PATH"
fi
eval "$(mise activate bash)"
mise trust
mise install

# Dependencies.
mise exec -- pnpm install --frozen-lockfile

# Build the boxel-icons + boxel-ui addons (in dependency order). The host app's
# vite build imports per-icon modules from @cardstack/boxel-icons/dist, which
# `pnpm install` does not produce — without this the host fails to build with
# "Cannot find module '@cardstack/boxel-icons/...'" and never serves.
mise run build:ui

# mkcert provisions the local-dev CA + leaf cert; infra:ensure-dev-cert fails
# hard if it's missing. The base cloud image doesn't ship it, so install it
# (and libnss3-tools, which mkcert -install needs to write the NSS trust DB).
if ! command -v mkcert >/dev/null 2>&1; then
SUDO=""
[ "$(id -u)" -ne 0 ] && command -v sudo >/dev/null 2>&1 && SUDO="sudo"
$SUDO apt-get update -y
$SUDO apt-get install -y mkcert libnss3-tools
fi

# Local-dev TLS cert: standard dev serves HTTPS on localhost and env-vars.sh
# treats the cert as mandatory. Provisioning it here also lets Node (via
# NODE_EXTRA_CA_CERTS, set by env-vars.sh) and the prerender's headless Chrome
# trust https://localhost — and because localhost IS an https-loopback,
# browser-manager.ts auto-adds --ignore-certificate-errors (no extra config).
mise run infra:ensure-dev-cert

# Combined CA bundle. This cloud environment routes outbound HTTPS through an
# agent proxy and pre-sets NODE_EXTRA_CA_CERTS to the proxy's CA bundle. Node
# reads NODE_EXTRA_CA_CERTS as a SINGLE file (not a list), and env-vars.sh
# only points it at mkcert's rootCA when it's unset — so the proxy value wins
# and Node never trusts the mkcert leaf. The realm-server's startup fetch of
# the host (https://localhost:4200) then fails with
# UNABLE_TO_VERIFY_LEAF_SIGNATURE and it crash-loops. Concatenate the proxy
# bundle and mkcert's rootCA into one file so Node trusts BOTH the proxy
# (outbound) and the local leaf (loopback); the start script exports
# NODE_EXTRA_CA_CERTS at it. No-op when the env doesn't pre-set a proxy CA.
if [ -n "${NODE_EXTRA_CA_CERTS:-}" ] && [ -f "${NODE_EXTRA_CA_CERTS}" ]; then
CAROOT="$(mkcert -CAROOT)"
COMBINED="$HOME/.local/share/boxel/dev-certs/combined-ca.pem"
cat "${NODE_EXTRA_CA_CERTS}" "${CAROOT}/rootCA.pem" > "$COMBINED"
echo "Wrote combined CA bundle (proxy + mkcert) to $COMBINED"
fi

# Source realms live in separate repos; clone over HTTPS (no SSH key in the VM).
# Catalog is intentionally NOT cloned here — it's skipped at runtime to fit the
# memory budget. Add `pnpm --dir=packages/catalog catalog:setup` if you need it.
git config --global url."https://github.com/".insteadOf "git@github.com:"
mise exec -- pnpm --dir=packages/skills-realm skills:setup

# Note: the first `mise run dev` pulls the Synapse/Postgres Docker images; the
# cloud snapshot caches them so later sessions start faster.

echo ""
echo "Provisioning complete. Start the stack with:"
echo " .devcontainer/claude-web-start.sh"
echo "Realm: https://localhost:4201 Host: https://localhost:4200"
79 changes: 79 additions & 0 deletions .devcontainer/claude-web-start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env bash
# Per-session start for the Boxel stack in "Claude Code on the web". Run after
# .devcontainer/claude-web-setup.sh has provisioned the snapshot. Services do
# not persist in the cached snapshot, so this runs every session.
#
# What this handles that plain `mise run dev` does not, in this environment:
# - dev-all, not dev: the VM is headless, so the host app must run in-process
# (see the note in claude-web-setup.sh).
# - Docker: the daemon isn't running at session start; bring it up so the
# Synapse / Postgres / SMTP containers can launch.
# - CA bundle: point Node at the combined proxy+mkcert bundle so the
# realm-server can verify the host's mkcert leaf over loopback while still
# trusting the agent proxy for outbound HTTPS (see claude-web-setup.sh).
# - Matrix users: standard dev assumes the realm/bot users are already
# registered (full-reset does it). On this fresh Synapse they are not, so
# the realm-server's Matrix login 403s and it runs without broadcasting.
# ensure-synapse only auto-registers in environment mode, so do it here —
# BEFORE the stack boots, so the realm-server logs in cleanly. The
# registration script is idempotent (skips users that already exist).
# - Chromium sandbox: the prerender's headless Chrome can't sandbox as root,
# so PUPPETEER_DISABLE_SANDBOX makes its standby probe pass.
# - SKIP_CATALOG / SKIP_BOXEL_HOMEPAGE: fit the memory budget and skip the
# realm whose content repo this VM can't clone.
set -euo pipefail

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$REPO_ROOT"

export PATH="$HOME/.local/bin:$PATH"
eval "$(mise activate bash)"

# Docker daemon: start it if the socket isn't responding. Containers and their
# images are cached in the snapshot, but the daemon process is not.
if ! docker info >/dev/null 2>&1; then
echo "[start] Starting Docker daemon…"
(dockerd >/tmp/dockerd.log 2>&1 &)
for _ in $(seq 1 30); do
docker info >/dev/null 2>&1 && break
sleep 1
done
docker info >/dev/null 2>&1 || { echo "[start] Docker failed to start; see /tmp/dockerd.log" >&2; exit 1; }
fi

# Trust both the agent proxy CA (outbound) and the mkcert leaf (loopback).
COMBINED="$HOME/.local/share/boxel/dev-certs/combined-ca.pem"
if [ -f "$COMBINED" ]; then
export NODE_EXTRA_CA_CERTS="$COMBINED"
fi

# Register Matrix users on a fresh Synapse, once, before the stack boots, so
# the realm-server logs in cleanly instead of caching a failed session.
# register-all needs BOTH the Postgres container (it gates on `pg_isready`)
# and Synapse, so bring both up first; dev-all's own start:pg / start:matrix
# then see them already running and move on.
echo "[start] Ensuring Postgres + Synapse are up for Matrix user registration…"
mise run infra:ensure-pg
mise run infra:start-synapse
for _ in $(seq 1 60); do
curl -sf -o /dev/null --max-time 5 http://localhost:8008/_matrix/client/versions && break
sleep 2
done
echo "[start] Registering Matrix users (idempotent)…"
mise exec -- pnpm --dir=packages/matrix register-all || true

# Restore the realm index from the CI cache if one's available, so the stack
# comes up without re-rendering every card. On success, tell the realm-server
# to trust the imported index instead of doing a full index on startup.
FULL_INDEX_FLAG=""
if "$REPO_ROOT/.devcontainer/claude-web-import-index.sh"; then
FULL_INDEX_FLAG="REALM_SERVER_FULL_INDEX_ON_STARTUP=false"
fi

echo "[start] Launching the stack (mise run dev-all)…"
exec env \
SKIP_CATALOG=true \
SKIP_BOXEL_HOMEPAGE=true \
PUPPETEER_DISABLE_SANDBOX=true \
${FULL_INDEX_FLAG} \
mise run dev-all
40 changes: 40 additions & 0 deletions packages/matrix/support/synapse/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@ import {
export const SYNAPSE_IP_ADDRESS = '172.20.0.5';
export const SYNAPSE_PORT = 8008;

// Synapse's listeners bind to "::" (IPv6 dual-stack) by default. Hosts whose
// kernel lacks IPv6 (some minimal cloud VMs / containers) can't bind it and
// synapse dies at startup with "Address family not supported by protocol". We
// detect that here so the generated config can fall back to IPv4-only binding.
function hostHasIPv6(): boolean {
let interfaces = os.networkInterfaces();
for (let name of Object.keys(interfaces)) {
for (let info of interfaces[name] ?? []) {
// Node has reported `family` as both the string 'IPv6' and the number 6
// across versions; accept either.
if (info.family === 'IPv6' || (info.family as unknown) === 6) {
return true;
}
}
}
return false;
}

const registrationSecretFile = path.resolve(
path.join(import.meta.dirname, '..', '..', 'registration_secret.txt'),
);
Expand Down Expand Up @@ -242,6 +260,21 @@ export async function synapseStart(
port: hostPort,
publicBaseUrl: `http://localhost:${hostPort}`,
});
// On a host without IPv6, rewrite the generated config's listeners to bind
// IPv4 only — synapse is reached via localhost:8008 in dev regardless, so
// dropping the dual-stack "::" bind is transparent there but lets synapse
// start at all. Hosts with IPv6 keep the template's "::" untouched.
if (!hostHasIPv6()) {
let hsYaml = path.join(synCfg.configDir, 'homeserver.yaml');
let contents = await fse.readFile(hsYaml, 'utf8');
let patched = contents.replace(
/bind_addresses:\s*\[\s*"::"\s*\]/g,
'bind_addresses: ["0.0.0.0"]',
);
if (patched !== contents) {
await fse.writeFile(hsYaml, patched);
}
}
containerName =
opts?.containerName ||
(isEnvironmentMode()
Expand All @@ -262,6 +295,13 @@ export async function synapseStart(
'-e',
'PYTHONPATH=/custom/modules',
];
// When the host runs as root (e.g. the Claude-web cloud VM), the synapse
// image would otherwise drop privileges to its default uid 991, which
// cannot write the root-owned config dir mounted at /data. Telling the
// image to stay as root (UID/GID=0) keeps it able to create media_store.
if (process.getuid?.() === 0) {
dockerParams.push('-e', 'UID=0', '-e', 'GID=0');
}
if (useDynamicHostPort) {
// In dynamic-host-port mode multiple harnesses may run concurrently, so
// we must not claim the shared fixed Synapse container IP.
Expand Down