Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 106 additions & 4 deletions eng/scripts/vendor_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,28 +73,121 @@ def _resolve_package_root(dotted_name: str) -> Path:
return origin.parent


def _safe_makedirs(path: Path) -> None:
"""Create ``path`` and any missing parents, tolerating bind-mount races.

Docker-on-Windows bind mounts can leave stale metadata after a previous
``rmtree`` so that ``Path.exists()``, ``os.mkdir`` and even kernel
``mkdir`` disagree. We work around it by:

1. Trying ``os.makedirs(exist_ok=True)`` (the common case).
2. If that fails, walking parents and creating each segment individually
via ``os.mkdir`` while swallowing ``FileExistsError``.
3. As a last resort (POSIX only), shelling out to ``mkdir -p``.
"""
import os
import subprocess

str_path = str(path)
try:
os.makedirs(str_path, exist_ok=True)
if path.is_dir():
return
except OSError:
pass

# Per-segment fallback.
segments: list[Path] = []
cur = path
while cur.parent != cur:
segments.append(cur)
cur = cur.parent
for seg in reversed(segments):
try:
os.mkdir(str(seg))
except FileExistsError:
continue
except FileNotFoundError:
break
if path.is_dir():
return

# Final fallback: kernel mkdir -p (POSIX only; on Windows the prior
# steps are sufficient because there is no bind-mount weirdness).
if os.name == "posix":
rc = subprocess.run(
["mkdir", "-p", str_path], capture_output=True, text=True
)
if rc.returncode == 0 and path.is_dir():
return
raise RuntimeError(
f"Could not create directory {str_path!r}: {rc.stderr.strip()}"
)
raise RuntimeError(f"Could not create directory {str_path!r}")


def _safe_rmtree(path: Path) -> None:
"""Remove ``path`` recursively, falling back to ``rm -rf`` on POSIX.

Docker Desktop on Windows occasionally leaves stale cache entries after
``shutil.rmtree`` so that subsequent ``mkdir`` calls fail with confusing
errors. Doing the removal through the kernel as a final step appears to
flush the cache reliably.

Raises ``OSError`` if the directory still exists after both attempts.
Silent failure here would let stale files from an older vendoring
survive the re-copy (the import-rewriter only touches files it copies)
and ship in the artifact.
"""
import os
import subprocess

last_err: Exception | None = None
if path.exists():
try:
shutil.rmtree(path)
except OSError as e:
last_err = e
if os.name == "posix":
# Kernel-level ``rm -rf`` even if Python thought the path was already
# gone; this nudges Docker Desktop into invalidating its cache.
subprocess.run(
["rm", "-rf", str(path)],
capture_output=True, text=True, check=False,
)
if path.exists():
# Both attempts failed to actually remove the tree. Surface the
# original error so callers don't silently merge new files on
# top of a stale tree.
raise OSError(
f"Failed to remove {path!s}; stale files would be shipped. "
f"Original error: {last_err!r}"
)



def _copy_package(src: Path, dst: Path) -> int:
"""Copy ``src`` to ``dst`` recursively, skipping native extensions.

Returns the number of Python source files copied.
"""
if dst.exists():
shutil.rmtree(dst)
dst.mkdir(parents=True)
_safe_rmtree(dst)
_safe_makedirs(dst)

copied = 0
for path in src.rglob("*"):
rel = path.relative_to(src)
target = dst / rel
if path.is_dir():
target.mkdir(parents=True, exist_ok=True)
_safe_makedirs(target)
continue
if path.suffix in NATIVE_EXTENSION_SUFFIXES:
continue
# Drop bytecode caches; they will be regenerated.
if "__pycache__" in path.parts:
continue
target.parent.mkdir(parents=True, exist_ok=True)
_safe_makedirs(target.parent)
shutil.copy2(path, target)
if path.suffix == ".py":
copied += 1
Expand Down Expand Up @@ -263,6 +356,15 @@ def vendor_package(dotted_name: str, target_root: Path) -> dict:
parts = dotted_name.split(".")
dst = target_root.joinpath(*parts)

# Wipe the top-level (e.g. ``_vendored/google``) entirely before
# re-vendoring. Removing only the leaf (``_vendored/google/protobuf``)
# leaves the parent in a half-state that confuses ``Path.mkdir`` on
# Docker-on-Windows bind mounts (``parent.is_dir()`` can return
# ``False`` for a parent that exists, causing the recursive
# ``parents=True`` path to raise ``FileExistsError``).
top_level = target_root / parts[0]
_safe_rmtree(top_level)

copied = _copy_package(src, dst)
_ensure_namespace_packages(target_root, dotted_name)
files_rewritten, rewrites = _rewrite_tree(
Expand Down
159 changes: 150 additions & 9 deletions workers/azure_functions_worker/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,154 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import os
import sys

# Runtime requirement, not a build-time/pipeline setting: protobuf selects its
# implementation when google.protobuf modules are imported in this worker
# process. Our vendored azure_functions_worker._vendored.google.protobuf tree
# deliberately contains only pure-Python files (native extensions are not
# copied), so force protobuf to use its pure-Python implementation at import
# time. This setdefault runs before any vendored protobuf import because
# azure_functions_worker/__init__.py is the package top-level module and
# executes before submodules, including _vendored.*.
os.environ.setdefault("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", "python")
# ---------------------------------------------------------------------------
# Protobuf runtime selection
# ---------------------------------------------------------------------------
#
# The worker's generated ``*_pb2.py`` stubs import ``google.protobuf``
# at the top level. Two scenarios:
#
# 1. The function app does NOT ship its own ``google.protobuf``. The
# top-level lookup resolves to the protobuf install that ships with
# the worker runtime (under ``worker_deps_path`` on Azure Functions),
# which is guaranteed compatible with the worker's pb2 stubs and
# includes the fast ``upb`` C extension. Nothing to do here.
#
# 2. The function app DOES ship ``google.protobuf`` in
# ``.python_packages``. On Azure Functions the customer's path
# precedes the worker's on ``sys.path``, so a top-level
# ``import google.protobuf`` resolves to the customer's copy. If the
# customer pinned an older protobuf (the common case is 4.x) the
# worker's pb2 stubs fail to load — for example ``from
# google.protobuf import runtime_version`` does not exist before
# protobuf 5.27. To insulate the worker from the customer's pin we:
# a. Pre-import the vendored ``google.protobuf`` modules and
# register them in ``sys.modules`` under their top-level names
# so subsequent ``from google.protobuf import X`` resolves to
# the vendored copy.
# b. Force the vendored copy onto its pure-Python implementation
# via ``PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`` so the
# vendored ``api_implementation`` does not try to load the
# customer's ``google._upb._message`` C extension (which would
# be incompatible with vendored protobuf and unsafe to load
# alongside another ``_upb`` instance).
#
# Side effect of scenario 2: customer code that does ``import
# google.protobuf`` later in the process will resolve to the vendored
# copy rather than the customer's pinned copy. This trade-off is
# necessary because protobuf's runtime assumes a single coherent
# ``google.protobuf`` package per process.
#
# Detection cost: a single env-var lookup plus at most one
# ``os.path.isdir`` call at worker startup. Zero per invocation.
#
# Policy override via ``_AZFUNC_USE_VENDORED_PROTOBUF``:
# ``"1"`` — force activation. The launcher (``worker.py``) sets this
# in local-dev mode so we always isolate the worker from
# whatever protobuf version sits in the customer's venv.
# ``"0"`` — force no activation. Escape hatch for users who need to
# debug protobuf-version-specific behavior against the
# worker's bundled protobuf.
# unset — autodetect via the canonical Azure Functions layout
# (``.python_packages``). This is the production path; the
# override env var is not set in cloud launches.

_USE_VENDORED_PROTOBUF_ENV = "_AZFUNC_USE_VENDORED_PROTOBUF"


def _should_use_vendored_protobuf() -> bool:
"""Return True if the worker should activate its private pure-Python
``google.protobuf`` fallback for this process.

The launcher (``worker.py``) is the policy layer: it knows whether
we are running in Azure or locally and sets
``_AZFUNC_USE_VENDORED_PROTOBUF`` accordingly. If the env var is
unset (e.g. the worker was imported directly by a test or a
third-party host) we fall back to checking the canonical Azure
Functions deployment layout.

We deliberately do *not* use a generic ``importlib.util.find_spec``
lookup as a fallback because that would also match the worker's
own protobuf install (which is always on ``sys.path`` and is not
"customer protobuf"). A false positive there would activate the
pure-Python vendored fallback for every function app and erase
the perf benefit of running the worker on ``upb``.
"""
override = os.environ.get(_USE_VENDORED_PROTOBUF_ENV)
if override == "1":
return True
if override == "0":
return False
script_root = os.environ.get("AzureWebJobsScriptRoot")
if not script_root:
return False
candidate = os.path.join(
script_root,
".python_packages",
"lib",
"site-packages",
"google",
"protobuf",
)
return os.path.isdir(candidate)


def _activate_vendored_protobuf() -> None:
"""Pre-import the vendored protobuf modules and alias them under
the top-level ``google.protobuf`` names so the worker's pb2 stubs
resolve to the vendored copy instead of the customer's pinned one.
"""
try:
import importlib

# Alias only the protobuf-specific names. Do NOT alias the
# top-level ``google`` package: the vendored ``google`` is a
# regular package whose ``__path__`` covers only our vendored
# tree, so aliasing it would shadow every other ``google.*``
# the customer ships (``google.cloud.*``, ``google.auth``,
# ``google.api_core``, etc.). Those packages are the most
# common reason a customer ends up with protobuf in their
# dependencies in the first place, so breaking them would
# defeat the purpose of the fallback. ``from google.protobuf
# import X`` short-circuits on ``sys.modules["google.protobuf"]``
# without consulting ``sys.modules["google"]``, so aliasing
# only the leaves is sufficient.
modules_to_alias = (
"google.protobuf",
"google.protobuf.internal",
)
for top_name in modules_to_alias:
vendored_name = "azure_functions_worker._vendored." + top_name
mod = importlib.import_module(vendored_name)
# Force the alias even if something already populated
# ``sys.modules`` for the top-level name. The whole point
# of activation is "the customer's protobuf must not be
# what the worker's pb2 stubs see"; ``setdefault`` would
# let an early customer import keep the slot.
sys.modules[top_name] = mod
except ImportError:
# Vendored tree may be absent in some dev workflows (before
# ``vendor_deps.py`` has been run). Stay quiet here; the next
# worker import will surface a clearer error.
return


if _should_use_vendored_protobuf():
# Force the vendored copy onto pure-Python BEFORE pre-importing
# any of its modules, so that vendored ``api_implementation``
# doesn't try to load a (potentially incompatible) ``_upb``.
os.environ.setdefault(
"PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", "python"
)
_activate_vendored_protobuf()
# else: nothing to do. Worker's pb2 stubs will resolve top-level
# google.protobuf to the worker's own protobuf install and use upb
# naturally. We deliberately do NOT log on the no-op path: it would
# run on every worker startup for the entire fleet and provides no
# actionable signal to customers.


del _should_use_vendored_protobuf
del _activate_vendored_protobuf
Loading
Loading