From 63d6f93634cf8581cec59544e4eb54c9b3bbf031 Mon Sep 17 00:00:00 2001 From: Gavin Aguiar Date: Tue, 16 Jun 2026 15:23:07 -0500 Subject: [PATCH 1/5] Make vendored google.protobuf a conditional fallback --- eng/scripts/vendor_deps.py | 110 +++++- workers/azure_functions_worker/__init__.py | 145 +++++++- .../_vendored/README.md | 74 ++-- .../bindings/nullable_converters.py | 13 +- workers/azure_functions_worker/loader.py | 13 +- workers/tests/test_setup.py | 24 +- .../unittests/test_nullable_converters.py | 2 +- .../tests/unittests/test_vendored_protobuf.py | 318 +++++++++++++----- 8 files changed, 550 insertions(+), 149 deletions(-) diff --git a/eng/scripts/vendor_deps.py b/eng/scripts/vendor_deps.py index be17f67f..adfa2cb4 100644 --- a/eng/scripts/vendor_deps.py +++ b/eng/scripts/vendor_deps.py @@ -73,28 +73,121 @@ def _resolve_package_root(dotted_name: str) -> Path: return origin.parent +def _safe_makedirs(path: Path) -> None: + """Create ``path`` and any missing parents, tolerating bind-mount races. + + Docker-on-Windows bind mounts can leave stale metadata after a previous + ``rmtree`` so that ``Path.exists()``, ``os.mkdir`` and even kernel + ``mkdir`` disagree. We work around it by: + + 1. Trying ``os.makedirs(exist_ok=True)`` (the common case). + 2. If that fails, walking parents and creating each segment individually + via ``os.mkdir`` while swallowing ``FileExistsError``. + 3. As a last resort (POSIX only), shelling out to ``mkdir -p``. + """ + import os + import subprocess + + str_path = str(path) + try: + os.makedirs(str_path, exist_ok=True) + if path.is_dir(): + return + except OSError: + pass + + # Per-segment fallback. + segments: list[Path] = [] + cur = path + while cur.parent != cur: + segments.append(cur) + cur = cur.parent + for seg in reversed(segments): + try: + os.mkdir(str(seg)) + except FileExistsError: + continue + except FileNotFoundError: + break + if path.is_dir(): + return + + # Final fallback: kernel mkdir -p (POSIX only; on Windows the prior + # steps are sufficient because there is no bind-mount weirdness). + if os.name == "posix": + rc = subprocess.run( + ["mkdir", "-p", str_path], capture_output=True, text=True + ) + if rc.returncode == 0 and path.is_dir(): + return + raise RuntimeError( + f"Could not create directory {str_path!r}: {rc.stderr.strip()}" + ) + raise RuntimeError(f"Could not create directory {str_path!r}") + + +def _safe_rmtree(path: Path) -> None: + """Remove ``path`` recursively, falling back to ``rm -rf`` on POSIX. + + Docker Desktop on Windows occasionally leaves stale cache entries after + ``shutil.rmtree`` so that subsequent ``mkdir`` calls fail with confusing + errors. Doing the removal through the kernel as a final step appears to + flush the cache reliably. + + Raises ``OSError`` if the directory still exists after both attempts. + Silent failure here would let stale files from an older vendoring + survive the re-copy (the import-rewriter only touches files it copies) + and ship in the artifact. + """ + import os + import subprocess + + last_err: Exception | None = None + if path.exists(): + try: + shutil.rmtree(path) + except OSError as e: + last_err = e + if os.name == "posix": + # Kernel-level ``rm -rf`` even if Python thought the path was already + # gone; this nudges Docker Desktop into invalidating its cache. + subprocess.run( + ["rm", "-rf", str(path)], + capture_output=True, text=True, check=False, + ) + if path.exists(): + # Both attempts failed to actually remove the tree. Surface the + # original error so callers don't silently merge new files on + # top of a stale tree. + raise OSError( + f"Failed to remove {path!s}; stale files would be shipped. " + f"Original error: {last_err!r}" + ) + + + def _copy_package(src: Path, dst: Path) -> int: """Copy ``src`` to ``dst`` recursively, skipping native extensions. Returns the number of Python source files copied. """ if dst.exists(): - shutil.rmtree(dst) - dst.mkdir(parents=True) + _safe_rmtree(dst) + _safe_makedirs(dst) copied = 0 for path in src.rglob("*"): rel = path.relative_to(src) target = dst / rel if path.is_dir(): - target.mkdir(parents=True, exist_ok=True) + _safe_makedirs(target) continue if path.suffix in NATIVE_EXTENSION_SUFFIXES: continue # Drop bytecode caches; they will be regenerated. if "__pycache__" in path.parts: continue - target.parent.mkdir(parents=True, exist_ok=True) + _safe_makedirs(target.parent) shutil.copy2(path, target) if path.suffix == ".py": copied += 1 @@ -263,6 +356,15 @@ def vendor_package(dotted_name: str, target_root: Path) -> dict: parts = dotted_name.split(".") dst = target_root.joinpath(*parts) + # Wipe the top-level (e.g. ``_vendored/google``) entirely before + # re-vendoring. Removing only the leaf (``_vendored/google/protobuf``) + # leaves the parent in a half-state that confuses ``Path.mkdir`` on + # Docker-on-Windows bind mounts (``parent.is_dir()`` can return + # ``False`` for a parent that exists, causing the recursive + # ``parents=True`` path to raise ``FileExistsError``). + top_level = target_root / parts[0] + _safe_rmtree(top_level) + copied = _copy_package(src, dst) _ensure_namespace_packages(target_root, dotted_name) files_rewritten, rewrites = _rewrite_tree( diff --git a/workers/azure_functions_worker/__init__.py b/workers/azure_functions_worker/__init__.py index 9f1693e4..a2b449a3 100644 --- a/workers/azure_functions_worker/__init__.py +++ b/workers/azure_functions_worker/__init__.py @@ -1,13 +1,140 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. import os +import sys -# Runtime requirement, not a build-time/pipeline setting: protobuf selects its -# implementation when google.protobuf modules are imported in this worker -# process. Our vendored azure_functions_worker._vendored.google.protobuf tree -# deliberately contains only pure-Python files (native extensions are not -# copied), so force protobuf to use its pure-Python implementation at import -# time. This setdefault runs before any vendored protobuf import because -# azure_functions_worker/__init__.py is the package top-level module and -# executes before submodules, including _vendored.*. -os.environ.setdefault("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", "python") +# --------------------------------------------------------------------------- +# Protobuf runtime selection +# --------------------------------------------------------------------------- +# +# The worker's generated ``*_pb2.py`` stubs import ``google.protobuf`` +# at the top level. Two scenarios: +# +# 1. The function app does NOT ship its own ``google.protobuf``. The +# top-level lookup resolves to the protobuf install that ships with +# the worker runtime (under ``worker_deps_path`` on Azure Functions), +# which is guaranteed compatible with the worker's pb2 stubs and +# includes the fast ``upb`` C extension. Nothing to do here. +# +# 2. The function app DOES ship ``google.protobuf`` in +# ``.python_packages``. On Azure Functions the customer's path +# precedes the worker's on ``sys.path``, so a top-level +# ``import google.protobuf`` resolves to the customer's copy. If the +# customer pinned an older protobuf (the common case is 4.x) the +# worker's pb2 stubs fail to load — for example ``from +# google.protobuf import runtime_version`` does not exist before +# protobuf 5.27. To insulate the worker from the customer's pin we: +# a. Pre-import the vendored ``google.protobuf`` modules and +# register them in ``sys.modules`` under their top-level names +# so subsequent ``from google.protobuf import X`` resolves to +# the vendored copy. +# b. Force the vendored copy onto its pure-Python implementation +# via ``PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`` so the +# vendored ``api_implementation`` does not try to load the +# customer's ``google._upb._message`` C extension (which would +# be incompatible with vendored protobuf and unsafe to load +# alongside another ``_upb`` instance). +# +# Side effect of scenario 2: customer code that does ``import +# google.protobuf`` later in the process will resolve to the vendored +# copy rather than the customer's pinned copy. This trade-off is +# necessary because protobuf's runtime assumes a single coherent +# ``google.protobuf`` package per process. +# +# Detection cost: a single ``os.path.isdir`` call (and at most one +# ``importlib.util.find_spec`` fallback) at worker startup. Zero per +# invocation. + + +def _customer_ships_protobuf() -> bool: + """Return True if the function app ships its own google.protobuf + (directly or as a transitive dependency). + + We only check the canonical Azure Functions deployment layout — + ``/.python_packages/lib/site-packages/google/ + protobuf``. We deliberately do *not* fall back to a generic + ``importlib.util.find_spec`` lookup because that would also match + the worker's own protobuf install (which is always on ``sys.path`` + and is not "customer protobuf"). A false positive there would + activate the pure-Python vendored fallback for every function app, + erasing the perf benefit of running the worker on ``upb``. + """ + script_root = os.environ.get("AzureWebJobsScriptRoot") + if not script_root: + return False + candidate = os.path.join( + script_root, + ".python_packages", + "lib", + "site-packages", + "google", + "protobuf", + ) + return os.path.isdir(candidate) + + +def _activate_vendored_protobuf() -> None: + """Pre-import the vendored protobuf modules and alias them under + the top-level ``google.protobuf`` names so the worker's pb2 stubs + resolve to the vendored copy instead of the customer's pinned one. + """ + try: + import importlib + + # Alias only the protobuf-specific names. Do NOT alias the + # top-level ``google`` package: the vendored ``google`` is a + # regular package whose ``__path__`` covers only our vendored + # tree, so aliasing it would shadow every other ``google.*`` + # the customer ships (``google.cloud.*``, ``google.auth``, + # ``google.api_core``, etc.). Those packages are the most + # common reason a customer ends up with protobuf in their + # dependencies in the first place, so breaking them would + # defeat the purpose of the fallback. ``from google.protobuf + # import X`` short-circuits on ``sys.modules["google.protobuf"]`` + # without consulting ``sys.modules["google"]``, so aliasing + # only the leaves is sufficient. + modules_to_alias = ( + "google.protobuf", + "google.protobuf.internal", + ) + for top_name in modules_to_alias: + vendored_name = "azure_functions_worker._vendored." + top_name + mod = importlib.import_module(vendored_name) + # Force the alias even if something already populated + # ``sys.modules`` for the top-level name. The whole point + # of activation is "the customer's protobuf must not be + # what the worker's pb2 stubs see"; ``setdefault`` would + # let an early customer import keep the slot. + sys.modules[top_name] = mod + except ImportError: + # Vendored tree may be absent in some dev workflows (before + # ``vendor_deps.py`` has been run). Stay quiet here; the next + # worker import will surface a clearer error. + return + + +if _customer_ships_protobuf(): + # Force the vendored copy onto pure-Python BEFORE pre-importing + # any of its modules, so that vendored ``api_implementation`` + # doesn't try to load a (potentially incompatible) ``_upb``. + os.environ.setdefault( + "PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", "python" + ) + _activate_vendored_protobuf() + if os.environ.get("AZURE_FUNCTIONS_PYTHON_PROTOBUF_DEBUG"): + # Opt-in diagnostic. Off by default to avoid polluting + # customer logs / log scrapers that treat stderr as warning. + print( + "[azure_functions_worker] Customer ships google.protobuf; " + "activated vendored pure-Python fallback.", + file=sys.stderr, + ) +# else: nothing to do. Worker's pb2 stubs will resolve top-level +# google.protobuf to the worker's own protobuf install and use upb +# naturally. We deliberately do NOT log on the no-op path: it would +# run on every worker startup for the entire fleet and provides no +# actionable signal to customers. + + +del _customer_ships_protobuf +del _activate_vendored_protobuf diff --git a/workers/azure_functions_worker/_vendored/README.md b/workers/azure_functions_worker/_vendored/README.md index 5eda4f9a..cbdd71e6 100644 --- a/workers/azure_functions_worker/_vendored/README.md +++ b/workers/azure_functions_worker/_vendored/README.md @@ -1,26 +1,48 @@ # `_vendored` -This directory holds third-party Python packages that are vendored into the -Azure Functions Python worker so the worker is isolated from versions a -customer ships in their own `requirements.txt`. +This directory holds a private copy of `google.protobuf` that the worker +falls back to when the function app ships its own `google.protobuf`. ## Why we vendor The worker uses generated `*_pb2.py` protobuf stubs that, starting with `protobuf >= 5.27`, import symbols such as `runtime_version` from -`google.protobuf`. If a customer pins an older `protobuf` (for example, 4.x), -the customer's copy of `google.protobuf` shadows the worker's expected -version and the worker fails to start with `ImportError`. +`google.protobuf`. If a customer pins an older `protobuf` (for example, +4.x) in their `.python_packages`, the customer's copy of +`google.protobuf` shadows the worker's expected version on `sys.path` +and the worker fails to start with `ImportError`. -To prevent that, the worker imports protobuf from -`azure_functions_worker._vendored.google.protobuf` instead of the top-level -`google.protobuf`. The customer's `protobuf` package is still installed -alongside the worker but never used by worker code. +To prevent that, when the worker detects the customer ships +`google.protobuf`, it pre-imports this vendored copy and registers it +in `sys.modules` under the top-level `google.protobuf` names. The +worker's pb2 stubs (which use ordinary `from google.protobuf import X` +imports) then resolve to the vendored copy, and the customer's pinned +version doesn't interfere with the worker. + +When the customer does **not** ship `google.protobuf`, this vendored +tree is not used at all — the worker's pb2 stubs resolve to the +protobuf install that ships with the worker runtime, which uses the +fast `upb` C extension natively. Most function apps fall in this +branch and pay no runtime cost for vendoring. + +The selection happens once at worker startup in +`azure_functions_worker/__init__.py`. See `_customer_ships_protobuf()` +there. + +## Pure-Python only + +The vendored copy is shipped as pure Python — no native extensions are +copied. When it is activated, the worker also sets +`PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python` so the vendored +`api_implementation` doesn't try to load a `_upb._message` C extension +(loading one alongside the customer's own `_upb` instance is unsafe). +This pure-Python mode is slower than `upb` on the gRPC hot path, but +the cost is only paid by function apps that actually ship protobuf. ## How it is populated This directory is **empty in source control** (only this `README.md` and -`.gitignore` are committed). The actual vendored packages are produced at +`.gitignore` are committed). The actual vendored package is produced at build time by: ``` @@ -29,28 +51,22 @@ python eng/scripts/vendor_deps.py --target workers/azure_functions_worker/_vendo That script: -1. Copies the source of each vendored package (currently `google.protobuf`) - from the active Python environment into this directory. +1. Copies the source of `google.protobuf` from the active Python + environment into this directory. Native extensions are skipped. 2. Rewrites every absolute import inside the copied files from - `google.protobuf...` to `azure_functions_worker._vendored.google.protobuf...` - so the vendored copy is fully self-contained. + `google.protobuf...` to + `azure_functions_worker._vendored.google.protobuf...` so the + vendored copy is self-contained. 3. Writes a top-level `google/__init__.py` so the `google` segment is a regular package (rather than a namespace package) under `_vendored`. -`invoke build-protos` calls this script automatically before regenerating the -`*_pb2.py` stubs, and the pack pipeline (`eng/pack/scripts/*`) copies the -result into the published artifact under -`deps/azure_functions_worker/_vendored/`. +`invoke build-protos` calls this script automatically before +regenerating the `*_pb2.py` stubs, and the pack pipeline +(`eng/pack/scripts/*`) copies the result into the published artifact +under `deps/azure_functions_worker/_vendored/`. ## Do not edit by hand -Any file under `google/` is regenerated on every build. Edits made directly -to those files will be overwritten. Changes that affect vendoring belong in -`eng/scripts/vendor_deps.py`. - -## Pure-Python protobuf - -The worker forces the pure-Python protobuf implementation by setting -`PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python` in -`azure_functions_worker/__init__.py`. This avoids the need to vendor the -protobuf C extension across the full Python × OS × architecture matrix. +Any file under `google/` is regenerated on every build. Edits made +directly to those files will be overwritten. Changes that affect +vendoring belong in `eng/scripts/vendor_deps.py`. diff --git a/workers/azure_functions_worker/bindings/nullable_converters.py b/workers/azure_functions_worker/bindings/nullable_converters.py index 67207f4d..822d9801 100644 --- a/workers/azure_functions_worker/bindings/nullable_converters.py +++ b/workers/azure_functions_worker/bindings/nullable_converters.py @@ -1,11 +1,14 @@ from datetime import datetime from typing import Optional, Union -# Use the vendored Timestamp so the value can be assigned to a vendored -# protobuf field (``NullableTimestamp.value``). A ``Timestamp`` from the -# customer-facing ``google.protobuf`` namespace lives in a different -# descriptor pool and would be rejected at assignment time. -from azure_functions_worker._vendored.google.protobuf.timestamp_pb2 import ( +# Import from top-level ``google.protobuf``. In the no-customer-protobuf +# branch this resolves to the worker's own protobuf (with upb); in the +# customer-ships-protobuf branch the bootstrap in +# ``azure_functions_worker/__init__.py`` aliases it to the vendored +# pure-Python copy. Either way the worker uses a single descriptor pool +# end-to-end (matching the pb2 stubs), avoiding cross-pool serialization +# on the hot path that runs for every invocation. +from google.protobuf.timestamp_pb2 import ( Timestamp, ) diff --git a/workers/azure_functions_worker/loader.py b/workers/azure_functions_worker/loader.py index 229d74d5..c392e24d 100644 --- a/workers/azure_functions_worker/loader.py +++ b/workers/azure_functions_worker/loader.py @@ -12,11 +12,14 @@ from os import PathLike, fspath from typing import Dict, Optional -# Worker code references _vendored.google.protobuf instead of -# importing directly from google.protobuf. -from azure_functions_worker._vendored.google.protobuf.duration_pb2 import ( - Duration, -) +# Import from top-level ``google.protobuf``. In the no-customer-protobuf +# branch this resolves to the worker's own protobuf (with upb); in the +# customer-ships-protobuf branch the bootstrap in +# ``azure_functions_worker/__init__.py`` aliases it to the vendored +# pure-Python copy. Either way the worker uses a single descriptor pool +# end-to-end (matching the pb2 stubs), avoiding cross-pool serialization +# on the hot path. +from google.protobuf.duration_pb2 import Duration from . import bindings, functions, protos from .bindings.retrycontext import RetryPolicy diff --git a/workers/tests/test_setup.py b/workers/tests/test_setup.py index f305b5c1..59f6fb7c 100644 --- a/workers/tests/test_setup.py +++ b/workers/tests/test_setup.py @@ -38,12 +38,13 @@ WEBHOST_GIT_REPO = "https://github.com/Azure/azure-functions-host/archive" WEBHOST_TAG_PREFIX = "v4." WORKER_DIR = "azure_functions_worker" if sys.version_info.minor < 13 else "proxy_worker" -# The worker's generated protobuf stubs must resolve `google.protobuf` -# to the worker's vendored copy, not whatever the customer ships in -# their `.python_packages`. The proxy worker (Python 3.13+) is a -# separate worker with its own dependency isolation and is unaffected, -# so we only rewrite for azure_functions_worker. -REWRITE_PROTOBUF = WORKER_DIR == "azure_functions_worker" +# The worker's generated protobuf stubs continue to import top-level +# ``google.protobuf``. ``azure_functions_worker/__init__.py`` decides at +# package-import time whether to redirect those imports to the vendored +# copy (via ``sys.modules`` aliases) based on whether the customer ships +# their own protobuf. Build-time rewriting of the stubs is no longer +# needed, so this flag stays False. +REWRITE_PROTOBUF = False def get_webhost_version() -> str: @@ -321,9 +322,14 @@ def vendor_deps(c, target=None): """Vendor third-party deps into azure_functions_worker._vendored. Copies the currently-installed ``google.protobuf`` package into - ``azure_functions_worker/_vendored/google/protobuf/`` and rewrites its - internal imports so the worker resolves protobuf from the vendored copy - regardless of any version the customer ships in ``.python_packages``. + ``azure_functions_worker/_vendored/google/protobuf/`` (pure-Python + only — native extensions are skipped) and rewrites its internal + imports so the vendored copy is fully self-contained. The worker + only uses the vendored copy when the customer ships their own + ``google.protobuf``; otherwise the worker uses the protobuf install + on its own ``sys.path``. The decision is made at runtime in + ``azure_functions_worker/__init__.py``. + Safe to re-run; the script is idempotent. Skipped for the proxy worker (Python >= 3.13) which has its own diff --git a/workers/tests/unittests/test_nullable_converters.py b/workers/tests/unittests/test_nullable_converters.py index 9d959f00..f08c7ce7 100644 --- a/workers/tests/unittests/test_nullable_converters.py +++ b/workers/tests/unittests/test_nullable_converters.py @@ -7,7 +7,7 @@ # a NullableTimestamp whose ``value`` is built from the vendored Timestamp, # so equality comparison only succeeds when the expected value is also # constructed from the same descriptor pool. -from azure_functions_worker._vendored.google.protobuf.timestamp_pb2 import ( +from google.protobuf.timestamp_pb2 import ( Timestamp, ) diff --git a/workers/tests/unittests/test_vendored_protobuf.py b/workers/tests/unittests/test_vendored_protobuf.py index 14f3ea9d..12e1894a 100644 --- a/workers/tests/unittests/test_vendored_protobuf.py +++ b/workers/tests/unittests/test_vendored_protobuf.py @@ -1,22 +1,24 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -"""End-to-end test for the protobuf vendoring fix. - -The bug this test guards against: on Linux Dedicated, the customer's -``.python_packages`` directory is placed before the worker's dependency -directory on ``sys.path``. If the customer pins an older ``protobuf`` -that lacks ``runtime_version`` (or any other newer symbol the worker's -generated stubs reference), the worker's protos fail to import and the -process exits with code 1. - -The fix: ``azure_functions_worker.protos`` imports protobuf via the -vendored copy at ``azure_functions_worker._vendored.google.protobuf``, -which is unaffected by whatever the customer ships. - -This test reproduces the failure by spawning a subprocess with a stub -``google.protobuf`` on ``sys.path`` ahead of everything else, then -importing the worker's protos. With the vendoring fix in place the -import must succeed. +"""End-to-end tests for the conditional vendored-protobuf activation. + +The worker keeps a private pure-Python copy of ``google.protobuf`` +under ``azure_functions_worker._vendored.google.protobuf`` that it +activates only when the customer ships their own ``google.protobuf``. +The selection is performed in ``azure_functions_worker/__init__.py``: + +* No customer protobuf -> do nothing. Worker's pb2 stubs resolve + top-level ``google.protobuf`` to the worker's own protobuf install + on ``sys.path``. No env var is set. Vendored tree is unused. + +* Customer protobuf present -> set + ``PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`` and register the + vendored modules under their top-level ``google.protobuf`` names so + the worker's pb2 stubs (which import top-level ``google.protobuf``) + receive the vendored copy. + +These tests cover both branches in subprocesses since the selection is +irreversible per process. """ import os @@ -38,24 +40,30 @@ def _vendored_protobuf_present() -> bool: return VENDORED_PROTOBUF.is_file() -class TestVendoredProtobuf(unittest.TestCase): - """Regression tests for the protobuf vendoring isolation.""" +class TestVendoredProtobufActivation(unittest.TestCase): + """Regression tests for the conditional vendored-protobuf bootstrap.""" def setUp(self): self.tmp_dir = tempfile.mkdtemp(prefix="vendored_pb_test_") self.addCleanup(shutil.rmtree, self.tmp_dir, ignore_errors=True) def _write_hostile_protobuf(self, root: Path) -> Path: - """Create a fake ``google.protobuf`` that mimics the customer's - broken pin. + """Create a fake ``google.protobuf`` under a simulated Azure + Functions ``.python_packages`` tree. + + Returns the script-root path (the parent of ``.python_packages``) + that should be passed via ``AzureWebJobsScriptRoot``. - Importing any submodule (or even the package itself, indirectly) - will raise ``ImportError`` for ``runtime_version`` — the same - symptom customers see in production. + The stub is missing every modern symbol the worker's pb2 stubs + reference (``runtime_version``, etc.), so any worker pb2 import + that resolved to this stub would crash. """ - pkg = root / "google" / "protobuf" + site_packages = ( + root / ".python_packages" / "lib" / "site-packages" + ) + pkg = site_packages / "google" / "protobuf" pkg.mkdir(parents=True) - (root / "google" / "__init__.py").write_text( + (site_packages / "google" / "__init__.py").write_text( "# Stub namespace package that mimics a customer's pinned\n" "# google.protobuf<5.27 install.\n", encoding="utf-8", @@ -66,7 +74,8 @@ def _write_hostile_protobuf(self, root: Path) -> Path: # Hostile stub: looks like a real google.protobuf # package but is missing every symbol the new protoc # output expects. Importing anything from this module - # would crash the worker prior to the vendoring fix. + # would crash the worker without the vendored-fallback + # bootstrap. __version__ = "4.25.3" """ ).lstrip(), @@ -74,24 +83,32 @@ def _write_hostile_protobuf(self, root: Path) -> Path: ) return root - def _run_in_subprocess( - self, code: str, extra_path: Path + def _run_subprocess( + self, + code: str, + *, + extra_path: Path = None, + script_root: str = None, ) -> subprocess.CompletedProcess: - """Run ``code`` in a fresh interpreter with ``extra_path`` first - on ``PYTHONPATH``.""" + """Run ``code`` in a fresh interpreter with the worker on + PYTHONPATH plus an optional ``extra_path`` prepended. + """ env = os.environ.copy() - # Place the hostile google.protobuf first, then the worker's - # source directory. This mirrors the Linux Dedicated sys.path - # order where customer .python_packages precedes worker deps. - path_parts = [str(extra_path), str(WORKER_ROOT)] + path_parts = [] + if extra_path is not None: + path_parts.append(str(extra_path)) + path_parts.append(str(WORKER_ROOT)) existing = env.get("PYTHONPATH", "") if existing: path_parts.append(existing) env["PYTHONPATH"] = os.pathsep.join(path_parts) - # Belt-and-braces: also force the pure-Python implementation in - # case the test environment has the C extension cached from the - # parent process. - env["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" + # Don't carry parent's protobuf impl forcing — the worker's + # __init__ sets it based on detection. + env.pop("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", None) + if script_root is not None: + env["AzureWebJobsScriptRoot"] = script_root + else: + env.pop("AzureWebJobsScriptRoot", None) return subprocess.run( [sys.executable, "-c", code], env=env, @@ -106,93 +123,220 @@ def _run_in_subprocess( "`python eng/scripts/vendor_deps.py --target " "workers/azure_functions_worker/_vendored` first.", ) - def test_worker_protos_import_with_hostile_customer_protobuf(self): - """The worker's protos must import even when a broken - ``google.protobuf`` shadows the worker's own protobuf.""" + def test_hostile_customer_protobuf_activates_vendored_fallback(self): + """When the customer ships an incompatible ``google.protobuf``, + the worker's ``__init__`` must detect that, set + ``PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python``, and alias + the vendored modules under the top-level ``google.protobuf`` + names so the worker's pb2 stubs (which import top-level + ``google.protobuf``) resolve to the vendored copy. + """ hostile_root = self._write_hostile_protobuf(Path(self.tmp_dir)) + hostile_site = ( + hostile_root / ".python_packages" / "lib" / "site-packages" + ) - # The subprocess imports the worker's protos AND constructs a - # message that exercises the descriptor pool. If the vendoring - # isn't in place the import line itself raises. code = textwrap.dedent( """ import sys - # Sanity check: the hostile google.protobuf must be the one - # that resolves at the top level. + # Sanity: the hostile google.protobuf must be discoverable + # at the top level *before* the worker's __init__ runs. import google.protobuf as _customer_pb assert getattr(_customer_pb, "__version__", None) == "4.25.3", ( - "test setup error: the hostile google.protobuf is not " - "first on sys.path" + "test setup error: hostile google.protobuf not on path" + ) + + # Importing the worker must detect the customer protobuf + # and activate the vendored fallback. + import os + import azure_functions_worker # noqa: F401 triggers detection + assert ( + os.environ.get("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") + == "python" + ), ( + "worker did not force pure-Python despite customer " + "protobuf being present" + ) + + # After bootstrap, top-level google.protobuf must resolve + # to the vendored copy — that's what redirects the pb2 + # stubs away from the hostile customer pin. + from azure_functions_worker._vendored.google import ( + protobuf as vendored_pb, + ) + assert sys.modules["google.protobuf"] is vendored_pb, ( + "bootstrap did not alias vendored google.protobuf" ) - # This is the operation that crashes today. + # Worker pb2 stubs must load successfully now that + # google.protobuf resolves to the vendored copy. from azure_functions_worker import protos msg = protos.StreamingMessage(request_id="abc") assert msg.request_id == "abc" + data = msg.SerializeToString() + roundtrip = protos.StreamingMessage() + roundtrip.ParseFromString(data) + assert roundtrip.request_id == "abc" + print("OK") + """ + ) - # The worker's protobuf must be a *different* module instance - # than the customer's. Otherwise the isolation is leaky. - from azure_functions_worker._vendored.google import ( - protobuf as worker_pb, - ) - assert worker_pb is not _customer_pb, ( - "worker and customer share the same google.protobuf " - "instance; vendoring did not isolate them" + result = self._run_subprocess( + code, + extra_path=hostile_site, + script_root=str(hostile_root), + ) + self.assertEqual( + result.returncode, + 0, + msg=( + "Worker failed under hostile customer protobuf.\n" + f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + ), + ) + self.assertIn("OK", result.stdout) + + def test_no_customer_protobuf_does_not_touch_environment(self): + """When the function app does not ship ``google.protobuf``, + the worker's ``__init__`` must not touch the protobuf + implementation env var and must not pre-import the vendored + copy. The worker's own protobuf install handles everything. + """ + empty_script_root = Path(self.tmp_dir) / "empty_app" + empty_script_root.mkdir() + # Create an empty .python_packages tree to make the detection + # fast-path check explicitly miss (rather than relying on + # AzureWebJobsScriptRoot being unset). + ( + empty_script_root / ".python_packages" / "lib" / "site-packages" + ).mkdir(parents=True) + + code = textwrap.dedent( + """ + import os + import sys + + # AzureWebJobsScriptRoot points at an app whose + # .python_packages does not contain google/protobuf. The + # detection's only check is os.path.isdir on that path, so + # it must return False. + import azure_functions_worker # noqa: F401 + + # The detection must not have set the env var. + assert ( + os.environ.get("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") + is None + ), ( + "worker forced pure-Python despite no customer " + "protobuf being present" ) + + # The vendored modules must not have been pre-imported + # under their top-level names. ``google.protobuf`` may + # still be in sys.modules if some unrelated worker import + # touched it, but if so it must NOT be the vendored copy. + top_pb = sys.modules.get("google.protobuf") + if top_pb is not None: + assert "_vendored" not in (top_pb.__file__ or ""), ( + "vendored protobuf was activated despite no " + "customer protobuf being present" + ) print("OK") """ ) - result = self._run_in_subprocess(code, hostile_root) + result = self._run_subprocess( + code, script_root=str(empty_script_root) + ) self.assertEqual( result.returncode, 0, msg=( - "Worker failed to import protos with a hostile customer " - f"protobuf on sys.path.\nSTDOUT:\n{result.stdout}\n" - f"STDERR:\n{result.stderr}" + "Worker failed under the no-customer-protobuf branch.\n" + f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" ), ) self.assertIn("OK", result.stdout) @unittest.skipUnless( _vendored_protobuf_present(), - "Vendored protobuf is not populated.", + "Vendored protobuf is not populated. Run " + "`python eng/scripts/vendor_deps.py --target " + "workers/azure_functions_worker/_vendored` first.", ) - def test_vendored_init_sets_pure_python_implementation(self): - """Importing anything under the vendored namespace must guarantee - that ``PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`` is set, - because the vendored protobuf is shipped without C extensions. - The env var is set by ``azure_functions_worker/__init__.py``, - which runs before any submodule (including ``_vendored``).""" + def test_activation_does_not_shadow_other_google_packages(self): + """Regression test for the ``google.cloud`` / ``google.auth`` + shadowing bug: aliasing ``sys.modules["google"]`` to the + vendored package broke every other ``google.*`` namespace + package the customer ships. The bootstrap must alias only the + protobuf-specific names so that, for example, + ``import google.cloud.foo`` still resolves to the customer's + ``google`` namespace package. + + This is the most important regression to guard against because + the most common reason customers end up with ``google.protobuf`` + in their dependency tree is precisely because they depend on + ``google-cloud-*``, ``google-auth``, or ``google-api-core``. + """ + # Build a fake customer site that includes both + # google/protobuf (to trigger activation) AND google/cloud/foo + # (to verify it stays importable after activation). + site = ( + Path(self.tmp_dir) + / ".python_packages" + / "lib" + / "site-packages" + ) + (site / "google" / "protobuf").mkdir(parents=True) + (site / "google" / "cloud" / "foo").mkdir(parents=True) + (site / "google" / "__init__.py").write_text( + "# Customer's google namespace package.\n", encoding="utf-8" + ) + (site / "google" / "protobuf" / "__init__.py").write_text( + '__version__ = "4.25.3"\n', encoding="utf-8" + ) + (site / "google" / "cloud" / "__init__.py").write_text( + "# Customer's google.cloud sub-package.\n", encoding="utf-8" + ) + (site / "google" / "cloud" / "foo" / "__init__.py").write_text( + 'CUSTOMER_VALUE = "preserved"\n', encoding="utf-8" + ) + code = textwrap.dedent( """ - import os - os.environ.pop("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", None) - import azure_functions_worker._vendored # noqa: F401 - assert os.environ.get( - "PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION" - ) == "python" + import sys + import azure_functions_worker # noqa: F401 triggers bootstrap + + # protobuf must come from the vendored copy. + from google.protobuf import timestamp_pb2 + assert "_vendored" in (timestamp_pb2.__file__ or ""), ( + "vendored protobuf was not activated" + ) + + # google.cloud.foo (customer-only) must still be reachable. + import google.cloud.foo as cust + assert cust.CUSTOMER_VALUE == "preserved", ( + "customer google.cloud.foo was shadowed by aliasing" + ) print("OK") """ ) - env = os.environ.copy() - env["PYTHONPATH"] = str(WORKER_ROOT) - # Avoid inheriting the parent's setting; we want to verify the - # azure_functions_worker package __init__ installs it. - env.pop("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", None) - result = subprocess.run( - [sys.executable, "-c", code], - env=env, - capture_output=True, - text=True, - timeout=30, + + result = self._run_subprocess( + code, + extra_path=site, + script_root=str(Path(self.tmp_dir)), ) self.assertEqual( result.returncode, 0, - msg=f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}", + msg=( + "Worker bootstrap shadowed customer's other google.* " + "packages after activating vendored protobuf.\n" + f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + ), ) + self.assertIn("OK", result.stdout) if __name__ == "__main__": From 8d576f6379166ec273da2ee706c9c7839e9a63e7 Mon Sep 17 00:00:00 2001 From: Gavin Aguiar Date: Fri, 19 Jun 2026 11:14:40 -0500 Subject: [PATCH 2/5] Fix for local env --- workers/azure_functions_worker/__init__.py | 58 +++++++--- workers/python/prodV4/worker.py | 5 + workers/python/test/worker.py | 5 + .../tests/unittests/test_vendored_protobuf.py | 105 ++++++++++++++++++ 4 files changed, 155 insertions(+), 18 deletions(-) diff --git a/workers/azure_functions_worker/__init__.py b/workers/azure_functions_worker/__init__.py index a2b449a3..79f3caeb 100644 --- a/workers/azure_functions_worker/__init__.py +++ b/workers/azure_functions_worker/__init__.py @@ -41,24 +41,46 @@ # necessary because protobuf's runtime assumes a single coherent # ``google.protobuf`` package per process. # -# Detection cost: a single ``os.path.isdir`` call (and at most one -# ``importlib.util.find_spec`` fallback) at worker startup. Zero per -# invocation. +# Detection cost: a single env-var lookup plus at most one +# ``os.path.isdir`` call at worker startup. Zero per invocation. +# +# Policy override via ``_AZFUNC_USE_VENDORED_PROTOBUF``: +# ``"1"`` — force activation. The launcher (``worker.py``) sets this +# in local-dev mode so we always isolate the worker from +# whatever protobuf version sits in the customer's venv. +# ``"0"`` — force no activation. Escape hatch for users who need to +# debug protobuf-version-specific behavior against the +# worker's bundled protobuf. +# unset — autodetect via the canonical Azure Functions layout +# (``.python_packages``). This is the production path; the +# override env var is not set in cloud launches. + +_USE_VENDORED_PROTOBUF_ENV = "_AZFUNC_USE_VENDORED_PROTOBUF" -def _customer_ships_protobuf() -> bool: - """Return True if the function app ships its own google.protobuf - (directly or as a transitive dependency). +def _should_use_vendored_protobuf() -> bool: + """Return True if the worker should activate its private pure-Python + ``google.protobuf`` fallback for this process. - We only check the canonical Azure Functions deployment layout — - ``/.python_packages/lib/site-packages/google/ - protobuf``. We deliberately do *not* fall back to a generic - ``importlib.util.find_spec`` lookup because that would also match - the worker's own protobuf install (which is always on ``sys.path`` - and is not "customer protobuf"). A false positive there would - activate the pure-Python vendored fallback for every function app, - erasing the perf benefit of running the worker on ``upb``. + The launcher (``worker.py``) is the policy layer: it knows whether + we are running in Azure or locally and sets + ``_AZFUNC_USE_VENDORED_PROTOBUF`` accordingly. If the env var is + unset (e.g. the worker was imported directly by a test or a + third-party host) we fall back to checking the canonical Azure + Functions deployment layout. + + We deliberately do *not* use a generic ``importlib.util.find_spec`` + lookup as a fallback because that would also match the worker's + own protobuf install (which is always on ``sys.path`` and is not + "customer protobuf"). A false positive there would activate the + pure-Python vendored fallback for every function app and erase + the perf benefit of running the worker on ``upb``. """ + override = os.environ.get(_USE_VENDORED_PROTOBUF_ENV) + if override == "1": + return True + if override == "0": + return False script_root = os.environ.get("AzureWebJobsScriptRoot") if not script_root: return False @@ -113,7 +135,7 @@ def _activate_vendored_protobuf() -> None: return -if _customer_ships_protobuf(): +if _should_use_vendored_protobuf(): # Force the vendored copy onto pure-Python BEFORE pre-importing # any of its modules, so that vendored ``api_implementation`` # doesn't try to load a (potentially incompatible) ``_upb``. @@ -125,8 +147,8 @@ def _activate_vendored_protobuf() -> None: # Opt-in diagnostic. Off by default to avoid polluting # customer logs / log scrapers that treat stderr as warning. print( - "[azure_functions_worker] Customer ships google.protobuf; " - "activated vendored pure-Python fallback.", + "[azure_functions_worker] Activated vendored pure-Python " + "protobuf fallback.", file=sys.stderr, ) # else: nothing to do. Worker's pb2 stubs will resolve top-level @@ -136,5 +158,5 @@ def _activate_vendored_protobuf() -> None: # actionable signal to customers. -del _customer_ships_protobuf +del _should_use_vendored_protobuf del _activate_vendored_protobuf diff --git a/workers/python/prodV4/worker.py b/workers/python/prodV4/worker.py index 512f7f68..fe0d26e4 100644 --- a/workers/python/prodV4/worker.py +++ b/workers/python/prodV4/worker.py @@ -66,6 +66,11 @@ def determine_user_pkg_paths(): # third-party user packages (in .venv) sys.path.insert(1, func_worker_dir) add_script_root_to_sys_path() + # In local dev we always activate the worker's private + # pure-Python ``google.protobuf`` fallback so the worker is + # isolated from whatever protobuf version sits in the + # user's environment + os.environ.setdefault("_AZFUNC_USE_VENDORED_PROTOBUF", "1") from azure_functions_worker import main main.main() diff --git a/workers/python/test/worker.py b/workers/python/test/worker.py index 95790083..33a987a0 100644 --- a/workers/python/test/worker.py +++ b/workers/python/test/worker.py @@ -17,6 +17,11 @@ def add_script_root_to_sys_path(): add_script_root_to_sys_path() minor_version = sys.version_info[1] if minor_version < 13: + # Local/test launch of the azure_functions_worker. Activate + # the worker's private pure-Python google.protobuf fallback + # so the worker is isolated from whatever protobuf version + # sits in the local environment. + os.environ.setdefault("_AZFUNC_USE_VENDORED_PROTOBUF", "1") from azure_functions_worker import main main.main() else: diff --git a/workers/tests/unittests/test_vendored_protobuf.py b/workers/tests/unittests/test_vendored_protobuf.py index 12e1894a..09ebaf9b 100644 --- a/workers/tests/unittests/test_vendored_protobuf.py +++ b/workers/tests/unittests/test_vendored_protobuf.py @@ -89,9 +89,15 @@ def _run_subprocess( *, extra_path: Path = None, script_root: str = None, + use_vendored_override: str = None, ) -> subprocess.CompletedProcess: """Run ``code`` in a fresh interpreter with the worker on PYTHONPATH plus an optional ``extra_path`` prepended. + + ``use_vendored_override`` sets ``_AZFUNC_USE_VENDORED_PROTOBUF`` + in the child env when provided (``"1"`` to force activation, + ``"0"`` to force no activation). When ``None`` (default), the + env var is unset and the child uses the autodetect path. """ env = os.environ.copy() path_parts = [] @@ -105,6 +111,11 @@ def _run_subprocess( # Don't carry parent's protobuf impl forcing — the worker's # __init__ sets it based on detection. env.pop("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", None) + # Don't carry the launcher's vendored-protobuf override — + # each test sets it (or leaves it unset) deliberately. + env.pop("_AZFUNC_USE_VENDORED_PROTOBUF", None) + if use_vendored_override is not None: + env["_AZFUNC_USE_VENDORED_PROTOBUF"] = use_vendored_override if script_root is not None: env["AzureWebJobsScriptRoot"] = script_root else: @@ -338,6 +349,100 @@ def test_activation_does_not_shadow_other_google_packages(self): ) self.assertIn("OK", result.stdout) + @unittest.skipUnless( + _vendored_protobuf_present(), + "Vendored protobuf is not populated. Run " + "`python eng/scripts/vendor_deps.py --target " + "workers/azure_functions_worker/_vendored` first.", + ) + def test_launcher_override_forces_activation(self): + """The local-dev launcher sets ``_AZFUNC_USE_VENDORED_PROTOBUF=1`` + before importing the worker so that the worker is isolated from + whatever protobuf version sits in the customer's venv. This test + simulates that path: env var set to ``"1"``, no ``.python_packages`` + layout, no customer protobuf on ``sys.path``. The worker must + still activate the vendored fallback. + """ + code = textwrap.dedent( + """ + import os + import sys + import azure_functions_worker # noqa: F401 triggers bootstrap + + assert ( + os.environ.get("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") + == "python" + ), "launcher override did not force pure-Python" + + from azure_functions_worker._vendored.google import ( + protobuf as vendored_pb, + ) + assert sys.modules["google.protobuf"] is vendored_pb, ( + "launcher override did not alias vendored google.protobuf" + ) + print("OK") + """ + ) + result = self._run_subprocess(code, use_vendored_override="1") + self.assertEqual( + result.returncode, + 0, + msg=( + "Worker did not honor _AZFUNC_USE_VENDORED_PROTOBUF=1.\n" + f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + ), + ) + self.assertIn("OK", result.stdout) + + def test_launcher_override_can_force_no_activation(self): + """``_AZFUNC_USE_VENDORED_PROTOBUF=0`` is the escape hatch for + users debugging protobuf-version-specific behavior against the + worker's bundled protobuf. It must skip activation even when + the canonical ``.python_packages`` layout would normally trigger + autodetect. + """ + hostile_root = self._write_hostile_protobuf(Path(self.tmp_dir)) + hostile_site = ( + hostile_root / ".python_packages" / "lib" / "site-packages" + ) + + code = textwrap.dedent( + """ + import os + import sys + import azure_functions_worker # noqa: F401 triggers bootstrap + + # Opt-out must beat autodetect: env var must remain unset + # and the vendored modules must not be aliased. + assert ( + os.environ.get("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") + is None + ), "opt-out did not prevent forcing pure-Python" + + top_pb = sys.modules.get("google.protobuf") + if top_pb is not None: + assert "_vendored" not in (top_pb.__file__ or ""), ( + "opt-out did not prevent vendored activation" + ) + print("OK") + """ + ) + result = self._run_subprocess( + code, + extra_path=hostile_site, + script_root=str(hostile_root), + use_vendored_override="0", + ) + self.assertEqual( + result.returncode, + 0, + msg=( + "Worker did not honor _AZFUNC_USE_VENDORED_PROTOBUF=0.\n" + f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + ), + ) + self.assertIn("OK", result.stdout) + if __name__ == "__main__": unittest.main() From 334190b4136c0c972ce356ecec748b94841fce5d Mon Sep 17 00:00:00 2001 From: Gavin Aguiar Date: Fri, 19 Jun 2026 11:58:32 -0500 Subject: [PATCH 3/5] Removed debug settings --- workers/azure_functions_worker/__init__.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/workers/azure_functions_worker/__init__.py b/workers/azure_functions_worker/__init__.py index 79f3caeb..4f7aba94 100644 --- a/workers/azure_functions_worker/__init__.py +++ b/workers/azure_functions_worker/__init__.py @@ -143,14 +143,6 @@ def _activate_vendored_protobuf() -> None: "PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", "python" ) _activate_vendored_protobuf() - if os.environ.get("AZURE_FUNCTIONS_PYTHON_PROTOBUF_DEBUG"): - # Opt-in diagnostic. Off by default to avoid polluting - # customer logs / log scrapers that treat stderr as warning. - print( - "[azure_functions_worker] Activated vendored pure-Python " - "protobuf fallback.", - file=sys.stderr, - ) # else: nothing to do. Worker's pb2 stubs will resolve top-level # google.protobuf to the worker's own protobuf install and use upb # naturally. We deliberately do NOT log on the no-op path: it would From 3ff7d1cbf35151c2a70d5438ab62070cee6dec31 Mon Sep 17 00:00:00 2001 From: Gavin Aguiar Date: Fri, 19 Jun 2026 12:26:51 -0500 Subject: [PATCH 4/5] Updated readme --- workers/azure_functions_worker/_vendored/README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/workers/azure_functions_worker/_vendored/README.md b/workers/azure_functions_worker/_vendored/README.md index cbdd71e6..90b2a2ba 100644 --- a/workers/azure_functions_worker/_vendored/README.md +++ b/workers/azure_functions_worker/_vendored/README.md @@ -26,9 +26,21 @@ fast `upb` C extension natively. Most function apps fall in this branch and pay no runtime cost for vendoring. The selection happens once at worker startup in -`azure_functions_worker/__init__.py`. See `_customer_ships_protobuf()` +`azure_functions_worker/__init__.py`. See `_should_use_vendored_protobuf()` there. +## Local development always uses the vendored copy + +When the worker runs locally (not in an Azure environment), the +launcher (`workers/python/prodV4/worker.py`, +`workers/python/test/worker.py`) sets +`_AZFUNC_USE_VENDORED_PROTOBUF=1` before importing +`azure_functions_worker`, which unconditionally activates the vendored +fallback. This keeps the local dev experience aligned with the +customer-ships-protobuf production path and isolates the worker from +whatever `protobuf` happens to be installed in the developer's venv. +The pure-Python perf cost only matters in production. + ## Pure-Python only The vendored copy is shipped as pure Python — no native extensions are From b73d56711f4e589aa8b800361c4d3f2e5791fe0f Mon Sep 17 00:00:00 2001 From: Gavin Aguiar Date: Fri, 19 Jun 2026 14:42:49 -0500 Subject: [PATCH 5/5] Test fix --- .../test_dependency_isolation_functions.py | 39 ++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/workers/tests/endtoend/test_dependency_isolation_functions.py b/workers/tests/endtoend/test_dependency_isolation_functions.py index 7ada0ae6..7df6604f 100644 --- a/workers/tests/endtoend/test_dependency_isolation_functions.py +++ b/workers/tests/endtoend/test_dependency_isolation_functions.py @@ -124,16 +124,45 @@ def test_paths_resolution(self): @skipIf(is_envvar_true('skipTest'), 'Running tests using an editable azure-functions package.') def test_loading_libraries_from_customers_package(self): - """Since the Python now loaded the customer's dependencies, the - libraries version should match the ones in - .python_packages_grpc_protobuf/ folder + """The worker vendors a private copy of ``google.protobuf`` and + activates it whenever the customer ships their own protobuf (or + when running locally). Activation aliases ``google.protobuf`` in + ``sys.modules`` to the vendored copy so the worker's pb2 stubs + never see a customer-pinned (potentially incompatible) + protobuf. Because protobuf's runtime requires a single coherent + ``google.protobuf`` package per process, customer code's + ``import google.protobuf`` also resolves to the vendored copy + — the customer's pinned version is deliberately shadowed. + + ``grpc`` is not vendored and is still loaded from the customer's + package, so its version assertion is preserved. """ r: Response = self.webhost.request('GET', 'report_dependencies') libraries = r.json()['libraries'] - self.assertEqual( - libraries['proto.expected.version'], libraries['proto.version'] + + # protobuf: functionapp pin (3.9.0) must NOT be what was loaded; + # the vendored fallback should have shadowed it. We verify + # both the version doesn't match the pin and the module path + # points into the worker's _vendored tree. + self.assertNotEqual( + libraries['proto.version'], + libraries['proto.expected.version'], + msg=( + "Customer's pinned protobuf was loaded; the vendored " + "fallback should have shadowed it for worker safety." + ), + ) + self.assertIn( + '_vendored', + libraries['proto.file'], + msg=( + "google.protobuf did not resolve to the worker's " + "vendored copy. proto.file=" + f"{libraries['proto.file']!r}" + ), ) + # grpc is not vendored self.assertEqual( libraries['grpc.expected.version'], libraries['grpc.version'] )