From 86e6c88052846426292309255e769920f60f81f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Thu, 21 May 2026 16:26:27 -0700 Subject: [PATCH 1/2] feat: embed spark inside bt --- Cargo.lock | 48 ++++++++++-- Cargo.toml | 1 + build.rs | 91 ++++++++++++++++++++++ src/main.rs | 7 ++ src/spark.rs | 173 +++++++++++++++++++++++++++++++++++++++++ src/utils/fs_atomic.rs | 8 ++ src/utils/mod.rs | 2 +- 7 files changed, 321 insertions(+), 9 deletions(-) create mode 100644 src/spark.rs diff --git a/Cargo.lock b/Cargo.lock index 0b42b699..27fbbbbb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -531,6 +531,7 @@ dependencies = [ "serde_json 1.0.149", "sha2", "strip-ansi-escapes", + "tar", "tempfile", "tokio", "toml", @@ -1093,7 +1094,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1102,6 +1103,16 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "filetime" +version = "0.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c287a33c7f0a620c38e641e7f60827713987b3c0f26e8ddc9462cc69cf75759" +dependencies = [ + "cfg-if", + "libc", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -1473,7 +1484,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.2", "tokio", "tower-service", "tracing", @@ -2285,7 +2296,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.2", "thiserror 2.0.18", "tokio", "tracing", @@ -2322,9 +2333,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -2592,7 +2603,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3064,6 +3075,17 @@ dependencies = [ "syn", ] +[[package]] +name = "tar" +version = "0.4.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6221d9a6003c78398e3b239969f352578258df48c8eb051caadae0015bc840" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.26.0" @@ -3074,7 +3096,7 @@ dependencies = [ "getrandom 0.4.1", "once_cell", "rustix 1.1.4", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3721,7 +3743,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -4123,6 +4145,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix 1.1.4", +] + [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index 16c878e1..200e26f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ dirs = "5" pathdiff = "0.2.3" glob = "0.3" flate2 = "1.1.2" +tar = "0.4" tempfile = "3" uuid = { version = "1.21.0", features = ["v4"] } diff --git a/build.rs b/build.rs index 06392f8f..7f445349 100644 --- a/build.rs +++ b/build.rs @@ -1,4 +1,6 @@ use std::env; +use std::fs; +use std::path::{Path, PathBuf}; use std::process::Command; fn non_empty_env(name: &str) -> Option { @@ -66,4 +68,93 @@ fn main() { } else { println!("cargo:rerun-if-changed=.git/HEAD"); } + + stage_spark_assets(); +} + +fn stage_spark_assets() { + println!("cargo:rerun-if-env-changed=BT_SPARK_DIR"); + println!("cargo:rerun-if-env-changed=BT_SPARK_EMBED"); + + let out_dir = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR is set by cargo")); + let spark_out = out_dir.join("spark"); + fs::create_dir_all(&spark_out).expect("create OUT_DIR/spark"); + + let cli_dest = spark_out.join("cli.mjs"); + let harness_dest = spark_out.join("harness.tgz"); + let hash_dest = spark_out.join("asset_hash"); + let embedded_marker = spark_out.join("embedded"); + + let embed_enabled = non_empty_env("BT_SPARK_EMBED") + .map(|value| !matches!(value.as_str(), "0" | "false" | "no" | "off")) + .unwrap_or(true); + + if !embed_enabled { + write_placeholder_assets(&cli_dest, &harness_dest, &hash_dest, &embedded_marker); + println!("cargo:warning=BT_SPARK_EMBED disabled; `bt spark` will be a stub"); + return; + } + + let manifest_dir = PathBuf::from( + env::var_os("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR is set by cargo"), + ); + let spark_dir = non_empty_env("BT_SPARK_DIR") + .map(PathBuf::from) + .unwrap_or_else(|| manifest_dir.join("..").join("spark")); + + let cli_src = spark_dir.join("packages/spark/dist/cli.mjs"); + let harness_src = spark_dir.join("dist-sea-build/harness.tgz"); + + if !cli_src.exists() || !harness_src.exists() { + write_placeholder_assets(&cli_dest, &harness_dest, &hash_dest, &embedded_marker); + println!( + "cargo:warning=spark assets not found under {}; build spark with `pnpm build:sea` to embed (looked for {} and {})", + spark_dir.display(), + cli_src.display(), + harness_src.display(), + ); + return; + } + + println!("cargo:rerun-if-changed={}", cli_src.display()); + println!("cargo:rerun-if-changed={}", harness_src.display()); + + fs::copy(&cli_src, &cli_dest).unwrap_or_else(|err| panic!("copy spark cli.mjs: {err}")); + fs::copy(&harness_src, &harness_dest) + .unwrap_or_else(|err| panic!("copy spark harness.tgz: {err}")); + + let hash = hash_files(&[cli_dest.as_path(), harness_dest.as_path()]); + fs::write(&hash_dest, &hash).expect("write asset_hash"); + fs::write(&embedded_marker, b"1").expect("write embedded marker"); +} + +fn write_placeholder_assets(cli: &Path, harness: &Path, hash: &Path, marker: &Path) { + fs::write(cli, b"").expect("write empty cli.mjs placeholder"); + fs::write(harness, b"").expect("write empty harness.tgz placeholder"); + fs::write(hash, b"unembedded").expect("write asset_hash placeholder"); + if marker.exists() { + fs::remove_file(marker).expect("remove stale embedded marker"); + } +} + +fn hash_files(paths: &[&Path]) -> String { + use std::io::Read; + + // Tiny FNV-1a 64-bit, ample for cache-key purposes (no crypto needs here). + let mut hash: u64 = 0xcbf29ce484222325; + for path in paths { + let mut file = fs::File::open(path).expect("open asset for hashing"); + let mut buf = [0u8; 8192]; + loop { + let n = file.read(&mut buf).expect("read asset for hashing"); + if n == 0 { + break; + } + for byte in &buf[..n] { + hash ^= u64::from(*byte); + hash = hash.wrapping_mul(0x100000001b3); + } + } + } + format!("{hash:016x}") } diff --git a/src/main.rs b/src/main.rs index 0a34fb4d..b8b89948 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,6 +23,7 @@ mod scorers; mod self_update; mod setup; mod source_language; +mod spark; mod sql; mod status; mod switch; @@ -79,6 +80,7 @@ Additional docs Manage workflow docs for coding agents self Self-management commands setup Configure Braintrust setup flows + spark Run the embedded spark wizard status Show current org and project context Flags @@ -161,6 +163,8 @@ enum Commands { Switch(CLIArgs), /// Show current org and project context Status(CLIArgs), + /// Run the embedded spark wizard + Spark(CLIArgs), // /// View and modify config // Config(CLIArgs), } @@ -189,6 +193,7 @@ impl Commands { Commands::Util(cmd) => &cmd.base, Commands::Switch(cmd) => &cmd.base, Commands::Status(cmd) => &cmd.base, + Commands::Spark(cmd) => &cmd.base, } } @@ -215,6 +220,7 @@ impl Commands { Commands::Util(cmd) => &mut cmd.base, Commands::Switch(cmd) => &mut cmd.base, Commands::Status(cmd) => &mut cmd.base, + Commands::Spark(cmd) => &mut cmd.base, } } @@ -293,6 +299,7 @@ fn try_main() -> Result<()> { Commands::SelfCommand(cmd) => self_update::run(cmd.base, cmd.args).await?, Commands::Switch(cmd) => switch::run(cmd.base, cmd.args).await?, Commands::Status(cmd) => status::run(cmd.base, cmd.args).await?, + Commands::Spark(cmd) => spark::run(cmd.base, cmd.args).await?, } Ok(()) }); diff --git a/src/spark.rs b/src/spark.rs new file mode 100644 index 00000000..2bf746cb --- /dev/null +++ b/src/spark.rs @@ -0,0 +1,173 @@ +use std::ffi::OsString; +use std::io::Cursor; +use std::path::{Path, PathBuf}; + +use anyhow::{bail, Context, Result}; +use clap::Args; +use flate2::read::GzDecoder; +use tar::Archive; +use tokio::process::Command; + +use crate::args::BaseArgs; +use crate::utils::write_bytes_atomic; + +const CLI_MJS: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/spark/cli.mjs")); +const HARNESS_TGZ: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/spark/harness.tgz")); +const ASSET_HASH: &str = include_str!(concat!(env!("OUT_DIR"), "/spark/asset_hash")); + +const BT_VERSION: &str = env!("CARGO_PKG_VERSION"); + +#[derive(Debug, Clone, Args)] +#[command( + about = "Run the embedded spark wizard", + after_help = "Arguments after `bt spark` are forwarded verbatim to the spark CLI.\n\ + Example: bt spark --help", + disable_help_flag = true, + trailing_var_arg = true, + allow_hyphen_values = true +)] +pub struct SparkArgs { + /// Arguments forwarded to the embedded spark CLI. + #[arg(num_args = 0.., value_name = "SPARK_ARG")] + pub forwarded: Vec, +} + +pub async fn run(base: BaseArgs, args: SparkArgs) -> Result<()> { + if !is_embedded() { + bail!( + "spark was not embedded in this build. Build spark first (`pnpm build:sea` in ../spark), \ + or rebuild bt with BT_SPARK_DIR pointing to a built spark checkout." + ); + } + + let cache_dir = resolve_cache_dir()?; + let cli_path = cache_dir.join("cli.mjs"); + let harness_root = cache_dir.join("spark-harness"); + let harness_bin = harness_root.join("bin").join("spark-harness.mjs"); + + materialize_cli(&cli_path)?; + materialize_harness(&cache_dir, &harness_root, &harness_bin)?; + + let mut command = Command::new("node"); + command.arg(&cli_path); + for arg in &args.forwarded { + command.arg(arg); + } + + forward_braintrust_env(&mut command, &base); + command.env("BT_WIZARD_HARNESS_BIN", &harness_bin); + + let status = command + .status() + .await + .with_context(|| node_spawn_error_hint())?; + + if !status.success() { + let code = status.code().unwrap_or(1); + std::process::exit(code); + } + Ok(()) +} + +fn is_embedded() -> bool { + !CLI_MJS.is_empty() && !HARNESS_TGZ.is_empty() +} + +fn resolve_cache_dir() -> Result { + let base = dirs::cache_dir() + .or_else(|| dirs::home_dir().map(|h| h.join(".cache"))) + .context("could not resolve a user cache directory for spark")?; + let asset_hash = ASSET_HASH.trim(); + Ok(base + .join("bt") + .join("spark") + .join(format!("{BT_VERSION}-{asset_hash}"))) +} + +fn materialize_cli(cli_path: &Path) -> Result<()> { + if cli_matches(cli_path) { + return Ok(()); + } + if let Some(parent) = cli_path.parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("failed to create spark cache dir {}", parent.display()))?; + } + write_bytes_atomic(cli_path, CLI_MJS) + .with_context(|| format!("failed to write spark cli.mjs at {}", cli_path.display())) +} + +fn cli_matches(cli_path: &Path) -> bool { + let Ok(metadata) = std::fs::metadata(cli_path) else { + return false; + }; + metadata.len() == CLI_MJS.len() as u64 +} + +fn materialize_harness(cache_dir: &Path, harness_root: &Path, harness_bin: &Path) -> Result<()> { + if harness_bin.is_file() { + return Ok(()); + } + + std::fs::create_dir_all(cache_dir) + .with_context(|| format!("failed to create spark cache dir {}", cache_dir.display()))?; + + let staging = tempfile::Builder::new() + .prefix(".spark-harness-stage-") + .tempdir_in(cache_dir) + .with_context(|| format!("failed to create staging dir in {}", cache_dir.display()))?; + + let decoder = GzDecoder::new(Cursor::new(HARNESS_TGZ)); + let mut archive = Archive::new(decoder); + archive + .unpack(staging.path()) + .context("failed to unpack embedded spark harness tarball")?; + + let staged_root = staging.path().join("spark-harness"); + if !staged_root.is_dir() { + bail!("embedded spark harness archive did not contain a spark-harness/ root directory"); + } + + if harness_root.exists() { + return Ok(()); + } + + match std::fs::rename(&staged_root, harness_root) { + Ok(()) => Ok(()), + Err(_) if harness_bin.is_file() => Ok(()), + Err(err) => Err(err).with_context(|| { + format!( + "failed to move staged harness into {}", + harness_root.display() + ) + }), + } +} + +fn forward_braintrust_env(command: &mut Command, base: &BaseArgs) { + if let Some(api_url) = &base.api_url { + command.env("BRAINTRUST_API_URL", api_url); + } + if let Some(app_url) = &base.app_url { + command.env("BRAINTRUST_APP_URL", app_url); + } + if let Some(api_key) = &base.api_key { + command.env("BRAINTRUST_API_KEY", api_key); + } + if let Some(profile) = &base.profile { + command.env("BRAINTRUST_PROFILE", profile); + } + if let Some(org) = &base.org_name { + command.env("BRAINTRUST_ORG_NAME", org); + } + if let Some(project) = &base.project { + command.env("BRAINTRUST_DEFAULT_PROJECT", project); + } + if let Some(ca_cert) = base.ca_cert() { + command.env("BRAINTRUST_CA_CERT", ca_cert); + } +} + +fn node_spawn_error_hint() -> String { + "failed to spawn `node` for the embedded spark CLI; install Node.js (>= 22) and ensure `node` is on PATH" + .to_string() +} diff --git a/src/utils/fs_atomic.rs b/src/utils/fs_atomic.rs index b54ecc04..38e38485 100644 --- a/src/utils/fs_atomic.rs +++ b/src/utils/fs_atomic.rs @@ -3,6 +3,14 @@ use std::path::Path; use anyhow::{Context, Result}; pub fn write_text_atomic(path: &Path, contents: &str) -> Result<()> { + write_atomic_inner(path, contents.as_bytes()) +} + +pub fn write_bytes_atomic(path: &Path, contents: &[u8]) -> Result<()> { + write_atomic_inner(path, contents) +} + +fn write_atomic_inner(path: &Path, contents: &[u8]) -> Result<()> { let parent = path.parent().ok_or_else(|| { anyhow::anyhow!( "cannot atomically write {} because it has no parent directory", diff --git a/src/utils/mod.rs b/src/utils/mod.rs index fcc4fcfd..9bd41438 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -6,7 +6,7 @@ mod json_object; mod plurals; pub use duration::parse_duration_to_seconds; -pub use fs_atomic::write_text_atomic; +pub use fs_atomic::{write_bytes_atomic, write_text_atomic}; pub use git::GitRepo; pub(crate) use ids::new_uuid_id; pub(crate) use json_object::lookup_object_path; From 3c2d8ddb959ace618475328c95d63d7ada748c82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Thu, 21 May 2026 16:30:06 -0700 Subject: [PATCH 2/2] chore: remove clippy warnings --- src/spark.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/spark.rs b/src/spark.rs index 2bf746cb..8d8182d2 100644 --- a/src/spark.rs +++ b/src/spark.rs @@ -57,10 +57,7 @@ pub async fn run(base: BaseArgs, args: SparkArgs) -> Result<()> { forward_braintrust_env(&mut command, &base); command.env("BT_WIZARD_HARNESS_BIN", &harness_bin); - let status = command - .status() - .await - .with_context(|| node_spawn_error_hint())?; + let status = command.status().await.context(NODE_SPAWN_HINT)?; if !status.success() { let code = status.code().unwrap_or(1); @@ -167,7 +164,5 @@ fn forward_braintrust_env(command: &mut Command, base: &BaseArgs) { } } -fn node_spawn_error_hint() -> String { - "failed to spawn `node` for the embedded spark CLI; install Node.js (>= 22) and ensure `node` is on PATH" - .to_string() -} +const NODE_SPAWN_HINT: &str = + "failed to spawn `node` for the embedded spark CLI; install Node.js (>= 22) and ensure `node` is on PATH";