From 09f05058a07e243b5b6811173427e9faae5e8501 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Wed, 10 Jun 2026 04:31:03 +0000 Subject: [PATCH] fix(python): restore deterministic teardown for async-callback machinery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PRs #2007/#2008 fixed the TM-PY-030 deadlocks and exit crash by making teardown hands-off (shutdown_background, no loop close), trading deterministic cleanup for forward progress. This restores determinism while the interpreter is alive and keeps hands-off behavior only where CPython makes determinism impossible (interpreter finalization). Protocol: - An atexit handler registered at module import sets INTERPRETER_AT_EXIT. atexit runs at the very start of Py_FinalizeEx, strictly before the phase in which native threads may no longer attach, so the flag cleanly separates 'interpreter alive' from 'process exiting'. - Each private-loop callback runs as a published asyncio.Task. Teardown cancels it via call_soon_threadsafe(task.cancel) and a closing flag rejects queued-but-unstarted items, so joins are bounded by cooperative cancellation instead of full callback duration. - PyPrivateAsyncLoop::shutdown (Drop) joins its worker thread, which closes its asyncio loop before exiting — fds freed before drop returns. - PyRuntime::drop joins the tokio blocking pool again (the pre-#2007 semantics) instead of shutdown_background. - Every join runs through join_without_gil: detach first when PyGILState_Check reports the dropping thread attached. This removes the GIL deadlock rather than avoiding the join. - Pyclass Drop impls (ScriptedTool/Bash/BashTool) cancel in-flight callbacks through an engine registry of live per-session loops before the rt field drop joins the pool. - At exit: threads skip Python entirely (flag check, no Python::attach), runtime falls back to shutdown_background, OS reclaims resources. Verification: - New tests/test_teardown_determinism.py: exact native-thread-count and fd-count stability across tool churn (joins are synchronous in drop), bounded cancellation of abandoned callbacks, and 10x subprocess interpreter-exit checks for both clean and abandoned-callback exits. - Race-sensitive suites looped 20x, concurrent stress (8 threads x 40 mixed iterations incl. timeout+drop churn) looped 10x, langgraph example 40x: zero hangs, zero aborts. - Full bashkit-python suite: 705 passed, 1 skipped. just pre-pr green. --- crates/bashkit-python/src/lib.rs | 329 +++++++++++++++--- .../tests/test_async_callbacks.py | 25 +- .../tests/test_teardown_determinism.py | 159 +++++++++ specs/python-package.md | 14 + specs/threat-model.md | 53 +-- 5 files changed, 506 insertions(+), 74 deletions(-) create mode 100644 crates/bashkit-python/tests/test_teardown_determinism.py diff --git a/crates/bashkit-python/src/lib.rs b/crates/bashkit-python/src/lib.rs index 75e70564..5c838c43 100644 --- a/crates/bashkit-python/src/lib.rs +++ b/crates/bashkit-python/src/lib.rs @@ -59,7 +59,7 @@ use std::collections::{HashMap, HashSet}; use std::future::Future; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::sync::{Arc, Mutex as StdMutex, RwLock}; +use std::sync::{Arc, Mutex as StdMutex, OnceLock, RwLock, Weak}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use tokio::runtime::Runtime; use tokio::sync::{Mutex, oneshot}; @@ -179,17 +179,66 @@ enum PyFileMount { const PY_FILE_PROVIDER_TYPE_ERROR_PREFIX: &str = "__bashkit_py_file_provider_type_error__:"; +// ============================================================================ +// Interpreter-exit boundary and deterministic teardown (TM-PY-030) +// +// Decision: teardown is fully deterministic while the interpreter is alive +// (workers joined, asyncio loops closed, tokio blocking pool joined before +// drop returns) and hands-off once the interpreter begins exiting. The +// boundary is an `atexit` handler registered at module import: CPython runs +// atexit callbacks at the very start of `Py_FinalizeEx`, strictly before the +// finalization phase in which native threads may no longer attach (attaching +// then aborts the process on CPython < 3.13 — see TM-PY-030 variant 3). +// After the flag flips, threads skip Python entirely and the OS reclaims +// resources at process exit; before it flips, the interpreter is fully alive +// and every attach/join below is safe. +// ============================================================================ + +static INTERPRETER_AT_EXIT: AtomicBool = AtomicBool::new(false); + +fn interpreter_at_exit() -> bool { + INTERPRETER_AT_EXIT.load(Ordering::Acquire) +} + +/// Registered with `atexit` at module import; not part of the public API. +#[pyfunction] +fn _mark_interpreter_at_exit() { + INTERPRETER_AT_EXIT.store(true, Ordering::Release); +} + +/// Whether the current thread is attached to the Python interpreter (holds +/// the GIL). Used by teardown paths to decide whether a blocking join must +/// be wrapped in a detach. +fn thread_attached_to_python() -> bool { + // SAFETY: PyGILState_Check only reads thread-local interpreter state and + // is documented as callable from any thread at any time. + unsafe { pyo3::ffi::PyGILState_Check() == 1 } +} + +/// Run `f` (a blocking join) without holding the GIL, so threads that must +/// attach to finish can make progress. Detaches first when the calling +/// thread is attached (the pyclass-dealloc case); runs `f` directly when it +/// is not. Callers must check `interpreter_at_exit()` first: once the +/// interpreter is exiting, joining threads that may touch Python is unsafe. +fn join_without_gil(f: F) { + if thread_attached_to_python() { + Python::attach(|py| py.detach(f)); + } else { + f(); + } +} + /// Pyclass-held handle to the shared per-instance tokio runtime. /// -/// THREAT[TM-PY-030]: dropping the last handle shuts the runtime down with -/// `shutdown_background()` instead of tokio's default blocking shutdown. -/// Pyclass dealloc runs while attached to the Python interpreter (GIL -/// held), and the default `Runtime::drop` joins in-flight blocking tasks. -/// An abandoned (timed-out) Python callback task must re-attach — acquire -/// the GIL — to finish, so joining it from dealloc deadlocks the whole -/// process (seen as a 6 h CI hang in -/// `test_async_callback_execute_sync_honors_timeout`). Background shutdown -/// lets such tasks finish on their own once the GIL is released. +/// THREAT[TM-PY-030]: while the interpreter is alive, dropping the last +/// handle joins the runtime's blocking pool deterministically with the GIL +/// released (`join_without_gil`), so in-flight callback tasks that must +/// re-attach to finish can do so — restoring deterministic cleanup without +/// the GIL deadlock that a blocking join while attached produced (a 6 h CI +/// hang in `test_async_callback_execute_sync_honors_timeout`). Once the +/// interpreter is exiting, joining is unsafe (threads attaching during +/// finalization abort the process on CPython < 3.13), so the drop falls +/// back to `shutdown_background()` and the OS reclaims resources. struct PyRuntime(Option>); impl Clone for PyRuntime { @@ -207,8 +256,13 @@ impl std::ops::Deref for PyRuntime { impl Drop for PyRuntime { fn drop(&mut self) { - if let Some(rt) = self.0.take().and_then(Arc::into_inner) { + let Some(rt) = self.0.take().and_then(Arc::into_inner) else { + return; + }; + if interpreter_at_exit() { rt.shutdown_background(); + } else { + join_without_gil(move || drop(rt)); } } } @@ -2187,14 +2241,31 @@ struct PrivateLoopWorkItem { result_tx: std::sync::mpsc::SyncSender>>, } +// The lazily spawned worker behind a PyPrivateAsyncLoop: channel sender plus +// the join handle used for deterministic teardown. +struct PrivateLoopWorker { + tx: std::sync::mpsc::SyncSender, + handle: std::thread::JoinHandle<()>, +} + struct PyPrivateAsyncLoop { - // Sender to the dedicated worker thread that owns the asyncio event loop. - // Lazily initialised on first use; `None` before the first call. + // Dedicated worker thread that owns the asyncio event loop. Lazily + // started on first use; `None` before the first call and after shutdown. // Decision: single dedicated thread per PyPrivateAsyncLoop instance so that // all run_until_complete calls happen on the thread that created the loop, // satisfying asyncio's thread-affinity requirement (review comment on // spawn_blocking scheduling successive callbacks on different OS threads). - worker_tx: StdMutex>>, + worker: StdMutex>, + // The worker's asyncio loop, set once by the worker thread after it + // creates the loop. Read by cancel_inflight() to schedule a threadsafe + // task cancellation; never run from any other thread. + event_loop: Arc>>, + // The asyncio.Task currently driven by run_until_complete, if any. + // Published by the worker around each item so teardown can cancel it. + current_task: Arc>>>, + // Set by shutdown(); the worker rejects queued-but-unstarted items so a + // teardown join is bounded by the (cancelled) in-flight item only. + closing: Arc, // Cached Python helper for background-thread fallback (Jupyter/IPython compatibility). bg_thread_runner: StdMutex>>, } @@ -2202,7 +2273,10 @@ struct PyPrivateAsyncLoop { impl PyPrivateAsyncLoop { fn new() -> Arc { Arc::new(Self { - worker_tx: StdMutex::new(None), + worker: StdMutex::new(None), + event_loop: Arc::new(OnceLock::new()), + current_task: Arc::new(StdMutex::new(None)), + closing: Arc::new(AtomicBool::new(false)), bg_thread_runner: StdMutex::new(None), }) } @@ -2213,58 +2287,139 @@ impl PyPrivateAsyncLoop { // run_until_complete calls to a single thread, matching asyncio's thread- // affinity contract. fn ensure_worker_tx(&self) -> PyResult> { - let mut guard = self.worker_tx.lock().expect("private loop worker tx lock"); - if let Some(ref tx) = *guard { - return Ok(tx.clone()); + let mut guard = self.worker.lock().expect("private loop worker lock"); + if let Some(ref worker) = *guard { + return Ok(worker.tx.clone()); + } + if self.closing.load(Ordering::Acquire) { + return Err(PyRuntimeError::new_err("private loop is shutting down")); } let (tx, rx) = std::sync::mpsc::sync_channel::(0); - std::thread::Builder::new() + let event_loop_slot = self.event_loop.clone(); + let current_task = self.current_task.clone(); + let closing = self.closing.clone(); + let handle = std::thread::Builder::new() .name("bashkit-py-loop".into()) .spawn(move || { // Create the event loop on this thread and keep it here for its // entire lifetime; this is the only thread that calls run_until_complete. let event_loop: Py = Python::attach(|py| { - py.import("asyncio") + let event_loop = py + .import("asyncio") .and_then(|a| a.call_method0("new_event_loop")) - .map(|l| l.unbind()) - .expect("asyncio.new_event_loop()") + .expect("asyncio.new_event_loop()"); + // Publish the loop so cancel_inflight() can schedule a + // threadsafe cancellation from other threads. + let _ = event_loop_slot.set(event_loop.clone().unbind()); + event_loop.unbind() }); while let Ok(item) = rx.recv() { + if closing.load(Ordering::Acquire) { + // Teardown started: reject without touching the + // awaitable so the join is not extended by queued + // work. Dropping the item's Py refs unattached is + // safe (pyo3 defers the decrefs). + let _ = item.result_tx.send(Err(PyRuntimeError::new_err( + "private loop is shutting down", + ))); + continue; + } let result = Python::attach(|py| { - // context.run() propagates ContextVars captured at execute_sync() - // call time into the coroutine (background threads start empty). - item.context + // Create the task under the captured context so + // ContextVars propagate (asyncio.Task snapshots the + // current context at creation), then publish it so + // teardown can cancel a long-running callback. + let task = item.context.bind(py).call_method1( + "run", + ( + event_loop.bind(py).getattr("create_task")?, + item.awaitable.bind(py), + ), + )?; + *current_task.lock().expect("private loop task slot") = + Some(task.clone().unbind()); + let result = event_loop .bind(py) - .call_method1( - "run", - ( - event_loop.bind(py).getattr("run_until_complete")?, - item.awaitable.bind(py), - ), - ) - .map(|v| v.unbind()) + .call_method1("run_until_complete", (task,)); + *current_task.lock().expect("private loop task slot") = None; + result.map(|v| v.unbind()) }); // Ignore send errors: the caller timed out and moved on. let _ = item.result_tx.send(result); } - // THREAT[TM-PY-030]: do NOT touch Python on the exit path. - // The worker wakes here because the engine was gc'd, and that - // gc commonly runs inside Py_Finalize — attaching then crashes - // CPython (PyGILState_Release fatal: SIGABRT at interpreter - // exit). Even Python::try_attach cannot detect finalization - // before 3.13. Dropping `event_loop` without attaching is safe - // (pyo3 defers the decref); the loop is closed by asyncio's - // BaseEventLoop.__del__ when the deferred decref runs, or - // reclaimed by the OS at process exit. + // THREAT[TM-PY-030]: while the interpreter is alive, close the + // loop deterministically (releases its epoll/self-pipe fds + // before the teardown join returns). Once the interpreter is + // exiting, do NOT touch Python: the worker often wakes here + // because the engine was gc'd inside Py_Finalize, and + // attaching then crashes CPython (PyGILState_Release fatal; + // Python::try_attach cannot detect finalization before 3.13). + // The atexit-set flag flips strictly before that phase. + if !interpreter_at_exit() { + Python::attach(|py| { + let _ = event_loop.bind(py).call_method0("close"); + }); + } + // Unattached drop is safe either way (deferred decref). drop(event_loop); }) .map_err(|e| { PyRuntimeError::new_err(format!("failed to spawn private loop thread: {e}")) })?; - *guard = Some(tx.clone()); - Ok(tx) + let tx_clone = tx.clone(); + *guard = Some(PrivateLoopWorker { tx, handle }); + Ok(tx_clone) + } + + /// Cancel the asyncio task currently running on the worker, if any. + /// Cooperative: the callback observes `asyncio.CancelledError` at its + /// next await point. Callers must ensure the interpreter is alive. + fn cancel_inflight(&self) { + let Some(event_loop) = self.event_loop.get() else { + return; + }; + Python::attach(|py| { + let task = self + .current_task + .lock() + .expect("private loop task slot") + .as_ref() + .map(|t| t.clone_ref(py)); + let Some(task) = task else { return }; + // call_soon_threadsafe is the only thread-safe entry point into a + // loop running on another thread. Errors (loop already closed, + // task already done) mean there is nothing left to cancel. + if let Ok(cancel) = task.bind(py).getattr("cancel") { + let _ = event_loop + .bind(py) + .call_method1("call_soon_threadsafe", (cancel,)); + } + }); + } + + /// Deterministic teardown: stop accepting work, cancel the in-flight + /// callback, and join the worker (which closes its loop) before + /// returning. Idempotent. At interpreter exit this degrades to dropping + /// the channel — joining is unsafe then (the worker may no longer attach) + /// and the OS reclaims everything at process exit. + fn shutdown(&self) { + self.closing.store(true, Ordering::Release); + let worker = self.worker.lock().expect("private loop worker lock").take(); + let Some(PrivateLoopWorker { tx, handle }) = worker else { + return; + }; + drop(tx); + if interpreter_at_exit() { + return; + } + self.cancel_inflight(); + // THREAT[TM-PY-030]: the worker needs the GIL to finish the cancelled + // item and close its loop, so the join must not hold it. + join_without_gil(move || { + let _ = handle.join(); + }); } fn bg_thread_runner(&self, py: Python<'_>) -> PyResult> { @@ -2355,8 +2510,19 @@ def _run(coro, ctx): } } +impl Drop for PyPrivateAsyncLoop { + fn drop(&mut self) { + self.shutdown(); + } +} + struct PyCallbackEngine { shared_private_async_loop: Arc, + // Live per-session private loops (PySyncLoopMode::PerSession), tracked so + // pyclass Drop can cancel in-flight callbacks it cannot reach through the + // session Arcs (an abandoned timed-out callback task holds its own + // session Arc, so the loop's Drop alone would wait out the callback). + session_private_loops: StdMutex>>, caller: Py, } @@ -2364,10 +2530,38 @@ impl PyCallbackEngine { fn new(py: Python<'_>) -> PyResult> { Ok(Arc::new(Self { shared_private_async_loop: PyPrivateAsyncLoop::new(), + session_private_loops: StdMutex::new(Vec::new()), caller: create_context_callback_caller(py)?, })) } + fn register_session_loop(&self, private_loop: &Arc) { + let mut loops = self + .session_private_loops + .lock() + .expect("session private loops lock"); + loops.retain(|w| w.strong_count() > 0); + loops.push(Arc::downgrade(private_loop)); + } + + /// Cancel every in-flight private-loop callback owned by this engine. + /// Called from pyclass Drop (interpreter alive) BEFORE the tokio runtime + /// join, so teardown is bounded by cooperative cancellation instead of + /// full callback duration. + fn cancel_inflight_callbacks(&self) { + self.shared_private_async_loop.cancel_inflight(); + let loops: Vec> = { + let guard = self + .session_private_loops + .lock() + .expect("session private loops lock"); + guard.iter().filter_map(Weak::upgrade).collect() + }; + for private_loop in loops { + private_loop.cancel_inflight(); + } + } + fn invoke( &self, py: Python<'_>, @@ -2397,7 +2591,13 @@ impl PyCallbackSession { ) -> PyResult> { let private_async_loop = match sync_loop_mode { PySyncLoopMode::SharedAcrossSessions => engine.shared_private_async_loop.clone(), - PySyncLoopMode::PerSession => PyPrivateAsyncLoop::new(), + PySyncLoopMode::PerSession => { + let private_loop = PyPrivateAsyncLoop::new(); + // Track it so pyclass Drop can cancel in-flight callbacks + // even when an abandoned task still holds the session Arc. + engine.register_session_loop(&private_loop); + private_loop + } }; Ok(Arc::new(Self { state: StdMutex::new(capture_callback_state( @@ -3388,6 +3588,15 @@ pub struct PyBash { network: Option, } +// THREAT[TM-PY-030]: see the equivalent Drop on ScriptedTool. +impl Drop for PyBash { + fn drop(&mut self) { + if !interpreter_at_exit() { + self.builtin_engine.cancel_inflight_callbacks(); + } + } +} + impl PyBash { fn reject_external_handler_reentry(&self) -> PyResult<()> { reject_external_handler_reentry_depth(Some(&self.external_handler_reentry_depth)) @@ -4239,6 +4448,15 @@ pub struct BashTool { network: Option, } +// THREAT[TM-PY-030]: see the equivalent Drop on ScriptedTool. +impl Drop for BashTool { + fn drop(&mut self) { + if !interpreter_at_exit() { + self.builtin_engine.cancel_inflight_callbacks(); + } + } +} + impl BashTool { fn build_live_builder(&self, py: Python<'_>) -> PyResult { let mut builder = Bash::builder(); @@ -5006,6 +5224,19 @@ pub struct ScriptedTool { timeout_seconds: Option, } +// THREAT[TM-PY-030]: cancel in-flight (possibly abandoned timed-out) +// callbacks BEFORE the `rt` field drop joins the tokio blocking pool, so +// teardown is bounded by cooperative cancellation instead of full callback +// duration. At interpreter exit the runtime drop degrades to background +// shutdown on its own, so there is nothing to cancel. +impl Drop for ScriptedTool { + fn drop(&mut self) { + if !interpreter_at_exit() { + self.callback_engine.cancel_inflight_callbacks(); + } + } +} + impl ScriptedTool { /// Build a Rust ScriptedTool from stored Python config. /// @@ -5417,6 +5648,14 @@ fn _bashkit(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("BashError", m.py().get_type::())?; m.add_function(wrap_pyfunction!(create_langchain_tool_spec, m)?)?; m.add_function(wrap_pyfunction!(get_version, m)?)?; + m.add_function(wrap_pyfunction!(_mark_interpreter_at_exit, m)?)?; + // THREAT[TM-PY-030]: atexit runs at the very start of Py_FinalizeEx, + // strictly before the finalization phase in which native threads may no + // longer attach. The flag it sets is the boundary between deterministic + // teardown (interpreter alive) and hands-off teardown (process exiting). + m.py() + .import("atexit")? + .call_method1("register", (m.getattr("_mark_interpreter_at_exit")?,))?; Ok(()) } diff --git a/crates/bashkit-python/tests/test_async_callbacks.py b/crates/bashkit-python/tests/test_async_callbacks.py index 430d3067..0df23f93 100644 --- a/crates/bashkit-python/tests/test_async_callbacks.py +++ b/crates/bashkit-python/tests/test_async_callbacks.py @@ -69,16 +69,21 @@ def test_dealloc_during_inflight_callback_does_not_deadlock(): """Dropping the tool while a timed-out callback still runs must not hang. Regression for TM-PY-030 (2): pyclass dealloc runs with the GIL held and - drops the tool's tokio runtime. The default runtime drop joins in-flight - blocking tasks, and the abandoned callback task needs the GIL to finish — - deadlocking the whole interpreter. Runtime shutdown must not block. + joins the tool's tokio runtime; the abandoned callback task needs the GIL + to finish, so a join while attached deadlocked the whole interpreter. + Teardown must release the GIL around the join and bound it by cancelling + the in-flight callback, so dealloc returns promptly either way. """ - callback_done = threading.Event() + settled = threading.Event() async def slow(params, stdin=None): - await asyncio.sleep(0.25) - callback_done.set() + try: + await asyncio.sleep(0.25) + finally: + # Runs on completion AND on cancellation (deterministic teardown + # cancels abandoned callbacks rather than awaiting them). + settled.set() return "late\n" tool = ScriptedTool("api", timeout_seconds=0.05) @@ -88,12 +93,14 @@ async def slow(params, stdin=None): assert r.exit_code == 1 # Dealloc the tool (and its runtime) while the abandoned callback is - # still sleeping on the private-loop worker thread. + # still sleeping on the private-loop worker thread. This must neither + # deadlock nor wait out the sleep. + begin = time.monotonic() del tool gc.collect() + assert time.monotonic() - begin < 5.0, "teardown blocked on abandoned callback" - # The orphaned callback finishes on its own once the GIL is free. - assert callback_done.wait(timeout=2.0), "slow callback did not finish within 2 s" + assert settled.wait(timeout=2.0), "slow callback neither finished nor cancelled" def test_async_callback_sync_execute(): diff --git a/crates/bashkit-python/tests/test_teardown_determinism.py b/crates/bashkit-python/tests/test_teardown_determinism.py new file mode 100644 index 00000000..d405f864 --- /dev/null +++ b/crates/bashkit-python/tests/test_teardown_determinism.py @@ -0,0 +1,159 @@ +"""Teardown determinism tests (TM-PY-030). + +While the interpreter is alive, dropping a tool must deterministically +release everything it owns BEFORE the drop returns: the private-loop worker +thread is joined, its asyncio loop is closed (fds freed), and the tokio +blocking pool is joined. Abandoned timed-out callbacks are cancelled +cooperatively rather than awaited to completion. At interpreter exit the +machinery goes hands-off instead (covered by the subprocess tests). +""" + +import asyncio +import gc +import os +import subprocess +import sys +import threading +import time + +import pytest + +from bashkit import ScriptedTool + +PROC_AVAILABLE = os.path.exists("/proc/self/task") + + +def _native_thread_count() -> int: + return len(os.listdir("/proc/self/task")) + + +def _fd_count() -> int: + return len(os.listdir("/proc/self/fd")) + + +async def _ok(params, stdin=None): + await asyncio.sleep(0) + return "ok\n" + + +def _make_tool() -> ScriptedTool: + t = ScriptedTool("api") + t.add_tool("hit", "Hit", callback=_ok) + return t + + +@pytest.mark.skipif(not PROC_AVAILABLE, reason="requires /proc") +def test_threads_joined_deterministically_after_drop(): + """del tool returns only after worker + runtime threads are joined.""" + # Warm up imports/allocators so the baseline is stable. + t = _make_tool() + assert t.execute_sync("hit").exit_code == 0 + del t + gc.collect() + + baseline = _native_thread_count() + for _ in range(5): + t = _make_tool() + assert t.execute_sync("hit").exit_code == 0 + del t + gc.collect() + # Exact equality, immediately: joins are synchronous in drop. + assert _native_thread_count() == baseline + + +@pytest.mark.skipif(not PROC_AVAILABLE, reason="requires /proc") +def test_fds_stable_across_tool_churn(): + """Asyncio loop fds are released by drop, not by a later gc pass.""" + t = _make_tool() + assert t.execute_sync("hit").exit_code == 0 + del t + gc.collect() + + baseline = _fd_count() + for _ in range(20): + t = _make_tool() + assert t.execute_sync("hit").exit_code == 0 + del t + gc.collect() + assert _fd_count() <= baseline + + +def test_dropped_tool_cancels_abandoned_callback(): + """Teardown is bounded by cancellation, not callback duration.""" + started = threading.Event() + cancelled = threading.Event() + + async def slow(params, stdin=None): + started.set() + try: + await asyncio.sleep(30) + except asyncio.CancelledError: + cancelled.set() + raise + return "late\n" + + t = ScriptedTool("api", timeout_seconds=0.05) + t.add_tool("slow", "Slow", callback=slow) + r = t.execute_sync("slow") + assert r.exit_code == 1 + assert started.wait(timeout=2.0), "callback never started" + + begin = time.monotonic() + del t + gc.collect() + elapsed = time.monotonic() - begin + + # Without cancellation this would take the full 30 s sleep. + assert elapsed < 5.0, f"teardown took {elapsed:.1f}s — callback not cancelled" + assert cancelled.wait(timeout=2.0), "callback did not observe CancelledError" + + +_EXIT_SCRIPT_CLEAN = """ +import asyncio +from bashkit import ScriptedTool + +async def cb(params, stdin=None): + await asyncio.sleep(0) + return "ok\\n" + +# Module-level: never deleted, torn down by interpreter finalization. +t = ScriptedTool("api") +t.add_tool("hit", "Hit", callback=cb) +assert t.execute_sync("hit").exit_code == 0 +""" + +_EXIT_SCRIPT_ABANDONED = """ +import asyncio +from bashkit import ScriptedTool + +async def slow(params, stdin=None): + await asyncio.sleep(30) + return "late\\n" + +# Exit immediately with the abandoned callback still in flight. +t = ScriptedTool("api", timeout_seconds=0.05) +t.add_tool("slow", "Slow", callback=slow) +assert t.execute_sync("slow").exit_code == 1 +""" + + +@pytest.mark.parametrize("script", [_EXIT_SCRIPT_CLEAN, _EXIT_SCRIPT_ABANDONED], ids=["clean", "abandoned"]) +def test_interpreter_exit_does_not_crash(script): + """Module-level tools torn down at Py_Finalize must not abort (SIGABRT).""" + # Replicate this (parent) interpreter's exact import surface in the child. + # The parent imports `bashkit` (and its compiled `_bashkit`) successfully — + # every test in this process proves it — so whatever path entry makes that + # work is on the parent's `sys.path`. A bare `python -c` child starts from + # a clean slate and, under CI's in-place source-tree build, finds + # `__init__.py` without `_bashkit` (ModuleNotFoundError). Prepending the + # parent's `sys.path` to the child guarantees identical resolution, + # independent of where `__init__.py` vs `_bashkit` happen to live. + preamble = "import sys; sys.path[:0] = %r\n" % [p for p in sys.path if p] + child = preamble + script + for _ in range(10): + proc = subprocess.run( + [sys.executable, "-c", child], + capture_output=True, + timeout=60, + ) + assert proc.returncode == 0, proc.stderr.decode() diff --git a/specs/python-package.md b/specs/python-package.md index 9a0f89b1..ef24012a 100644 --- a/specs/python-package.md +++ b/specs/python-package.md @@ -312,6 +312,20 @@ to a daemon thread whose `run_until_complete` call is wrapped in `context.run()` so ContextVars propagate correctly despite the thread switch. The helper is cached on the `PyPrivateAsyncLoop` to avoid repeated module compilation. +**Teardown determinism** (TM-PY-030): while the interpreter is alive, dropping +the last reference to a `Bash`/`BashTool`/`ScriptedTool` deterministically +releases everything it owns *before* the drop returns — in-flight private-loop +callbacks are cancelled cooperatively (each runs as an `asyncio.Task`; +cancellation raises `asyncio.CancelledError` at the next await point), the +private-loop worker thread is joined and closes its event loop (freeing its +fds), and the tokio runtime's blocking pool is joined. All joins release the +GIL first, so teardown cannot deadlock against callbacks that need to attach. +Callbacks that block without awaiting (e.g. `time.sleep` inside `async def`) +cannot be cancelled mid-section; teardown then waits for the current section +to reach an await point or return. At interpreter exit (boundary: an `atexit` +handler registered at module import), teardown goes hands-off — native threads +must not touch a finalizing CPython — and the OS reclaims resources. + **ContextVar propagation**: ContextVars set before `execute()` or `execute_sync()` are captured at call time and replayed inside each callback invocation regardless of which mechanism is used. diff --git a/specs/threat-model.md b/specs/threat-model.md index 4ac136ea..d7cc0f08 100644 --- a/specs/threat-model.md +++ b/specs/threat-model.md @@ -1745,7 +1745,7 @@ caller's GIL hold. | TM-PY-026 | reset() discards security config | `BashTool.reset()` creates new `Bash` with bare builder, dropping all configured limits | `PyBash::reset` and `BashTool::reset` rebuild via `replace_live_bash_with_builder` + `build_live_builder`, which preserves the original limits, env, and registered builtins | **MITIGATED** | | TM-PY-027 | Unbounded recursion in JSON conversion | `py_to_json`/`json_to_py` recurse without depth limit on nested dicts/lists | `json_to_py_inner`, `py_to_json_inner`, and the MontyObject converters all carry a `depth` arg; depth > `MAX_NESTING_DEPTH = 64` raises `ValueError("… nesting depth exceeds maximum of 64")` | **MITIGATED** | -| TM-PY-030 | GIL deadlock / exit crash via async-callback private loop | Private-loop dispatch blocked on a rendezvous channel while attached (GIL held); pyclass dealloc joined in-flight blocking tasks that must re-attach to finish (froze the whole process, observed as a 6 h CI hang); worker thread attached during interpreter finalization to close its loop (SIGABRT at process exit) | Dispatch detaches around both the send and the receive; `PyRuntime` drop shuts the tokio runtime down with `shutdown_background()` instead of a blocking join; worker exit path never touches Python (loop closed via `BaseEventLoop.__del__`) | **MITIGATED** | +| TM-PY-030 | GIL deadlock / exit crash via async-callback private loop | Private-loop dispatch blocked on a rendezvous channel while attached (GIL held); pyclass dealloc joined in-flight blocking tasks that must re-attach to finish (froze the whole process, observed as a 6 h CI hang); worker thread attached during interpreter finalization to close its loop (SIGABRT at process exit) | Deterministic teardown protocol: dispatch detaches around send/receive; in-flight callbacks are cancelled and workers/runtime joined with the GIL released while the interpreter is alive; once the atexit-set exit flag flips, threads skip Python and the OS reclaims resources | **MITIGATED** | **TM-PY-026** (mitigated): `PyBash::reset` and `BashTool::reset` (`crates/bashkit-python/src/lib.rs`) rebuild the inner `Bash` via `replace_live_bash_with_builder` + `build_live_builder`, which @@ -1758,25 +1758,38 @@ MontyObject converters in `crates/bashkit-python/src/lib.rs` carry a `depth: usi At `depth > MAX_NESTING_DEPTH = 64`, conversion raises a Python `ValueError` instead of recursing. Coverage: `tests/_security_advanced.py::JsonConversionBoundariesTests`. -**TM-PY-030** (mitigated): two deadlock variants in the async-callback private-loop -machinery (`crates/bashkit-python/src/lib.rs`). (1) `PyPrivateAsyncLoop::run_awaitable` -sent work to the dedicated worker thread over a `sync_channel(0)` rendezvous while -attached; on first use the worker must attach (acquire the GIL) to create its asyncio -loop before it can `recv()`, so dispatcher and worker waited on each other. The send -and receive now both run inside `py.detach(...)`. (2) Pyclass dealloc runs attached -and dropped the last `Arc`; tokio's default `Runtime::drop` joins in-flight -blocking tasks, and an abandoned (timed-out) callback task must re-attach to finish — -freezing the entire interpreter. The `PyRuntime` handle now shuts the runtime down -with `shutdown_background()` on last drop. (3) The private-loop worker thread called -`Python::attach` on its exit path to close its asyncio loop; the worker usually wakes -because the engine was gc'd, and that gc commonly runs inside `Py_Finalize` — -attaching during finalization fatals CPython (`PyGILState_Release`, SIGABRT at -interpreter exit; `Python::try_attach` cannot detect finalization before 3.13). The -worker exit path no longer touches Python: the loop's `Py` ref is dropped unattached -(deferred decref) and the loop is closed by `BaseEventLoop.__del__`. Regression tests: -`tests/test_async_callbacks.py::test_async_callback_execute_sync_honors_timeout`, -`…::test_dealloc_during_inflight_callback_does_not_deadlock`; variant (3) is covered -by the `langgraph_async_tool.py` example run in the Python CI Examples job. +**TM-PY-030** (mitigated): three crash/deadlock variants in the async-callback +private-loop machinery (`crates/bashkit-python/src/lib.rs`), resolved by a +deterministic teardown protocol. (1) `PyPrivateAsyncLoop::run_awaitable` sent work to +the dedicated worker thread over a `sync_channel(0)` rendezvous while attached; on +first use the worker must attach (acquire the GIL) to create its asyncio loop before +it can `recv()`, so dispatcher and worker waited on each other. The send and receive +both run inside `py.detach(...)`. (2) Pyclass dealloc runs attached and dropped the +last `Arc`; tokio's default `Runtime::drop` joins in-flight blocking tasks, +and an abandoned (timed-out) callback task must re-attach to finish — freezing the +entire interpreter. (3) The private-loop worker attached on its exit path while the +interpreter was finalizing — `Py_Finalize` gc is what usually wakes it — which fatals +CPython (`PyGILState_Release`, SIGABRT; `Python::try_attach` cannot detect +finalization before 3.13). + +Teardown protocol: an `atexit` handler registered at module import sets +`INTERPRETER_AT_EXIT`; atexit runs at the very start of `Py_FinalizeEx`, strictly +before the phase in which native threads may no longer attach, so the flag cleanly +splits two regimes. While the interpreter is alive, teardown is fully deterministic: +pyclass `Drop` cancels in-flight callbacks (each callback runs as a published +`asyncio.Task`; cancellation goes through `call_soon_threadsafe(task.cancel)`, and a +`closing` flag rejects queued-but-unstarted items), `PyPrivateAsyncLoop::shutdown` +joins the worker — which closes its loop before exiting, freeing its fds — and +`PyRuntime::drop` joins the tokio blocking pool; every join runs via +`join_without_gil` (detach first when `PyGILState_Check` says the dropping thread is +attached), eliminating the GIL deadlock. Once the flag is set, threads skip Python +entirely and the OS reclaims resources at process exit — the only regime in which +deterministic cleanup is impossible by CPython's own rules. Regression tests: +`tests/test_teardown_determinism.py` (exact thread-count and fd-count determinism, +bounded cancellation, interpreter-exit subprocess stress) plus +`tests/test_async_callbacks.py::test_async_callback_execute_sync_honors_timeout` and +`…::test_dealloc_during_inflight_callback_does_not_deadlock`; variant (3) is also +covered by the `langgraph_async_tool.py` example run in the Python CI Examples job. | TM-PY-029 | Host clock information disclosure | `datetime.date.today()` / `datetime.datetime.now()` expose host system time and timezone | Intentional — required for correct datetime semantics | **ACCEPTED** |