From c74b482d334dec4e944636df3635d57655844e1c Mon Sep 17 00:00:00 2001 From: Keele Date: Wed, 27 May 2026 13:09:00 -0500 Subject: [PATCH] hrw4u: Fix u4wrh emitting quoted header names in HRW4U output Header qualifiers that contain only characters legal in the HRW4U IDENT rule were being wrapped in double quotes at emission time, which the grammar rejects on re-parse. Quote a qualifier only when it actually contains characters that require quoting. Adds a parametrized unit test for Validator.unquote_if_ident covering IDENT-safe stripping, space-containing qualifiers, empty strings, and leading-digit rejection, symmetric with the existing needs_quotes and quote_if_needed tests. --- tools/hrw4u/src/common.py | 3 +++ tools/hrw4u/src/hrw_symbols.py | 3 ++- tools/hrw4u/src/validation.py | 12 ++++++++++++ tools/hrw4u/tests/data/vars/exceptions.txt | 3 +++ .../hrw4u/tests/data/vars/hyphen_header.input.txt | 5 +++++ .../hrw4u/tests/data/vars/hyphen_header.output.txt | 2 ++ tools/hrw4u/tests/test_errors.py | 14 ++++++++++++++ 7 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 tools/hrw4u/tests/data/vars/hyphen_header.input.txt create mode 100644 tools/hrw4u/tests/data/vars/hyphen_header.output.txt diff --git a/tools/hrw4u/src/common.py b/tools/hrw4u/src/common.py index 15f1885f4bd..47f5a7eff2b 100644 --- a/tools/hrw4u/src/common.py +++ b/tools/hrw4u/src/common.py @@ -50,6 +50,9 @@ class RegexPatterns: re.VERBOSE | re.DOTALL, ) + # Grammar IDENT rule from hrw4u.g4 (header names, qualifiers) + GRAMMAR_IDENT: Final = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_@.-]*$') + # Additional performance patterns IDENTIFIER: Final = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') WHITESPACE: Final = re.compile(r'\s+') diff --git a/tools/hrw4u/src/hrw_symbols.py b/tools/hrw4u/src/hrw_symbols.py index 0a0339b2d0d..503d65f5a50 100644 --- a/tools/hrw4u/src/hrw_symbols.py +++ b/tools/hrw4u/src/hrw_symbols.py @@ -233,6 +233,7 @@ def repl(match: re.Match) -> str: def _handle_set_rm_operation( self, cmd: str, toks: list[str], prefix: str, qualifier: str, section: SectionType | None = None) -> str: + qualifier = Validator.unquote_if_ident(qualifier) if cmd.startswith("rm-"): return f'{prefix}{qualifier} = ""' if len(toks) < 3: @@ -289,7 +290,7 @@ def _handle_statement_function(self, name: str, args: list[str], section: Sectio qargs = [status_code, self._rewrite_inline_percents(f'"{url_arg}"', section)] elif name == "add-header" and args: # Convert add-header command to += syntax for reverse mapping - header_name = args[0] + header_name = Validator.unquote_if_ident(args[0]) prefix = self.get_prefix_for_context("header_ops", section) prefixed_header = f"{prefix}{header_name}" diff --git a/tools/hrw4u/src/validation.py b/tools/hrw4u/src/validation.py index 1b6d35e4410..91ad29bdc64 100644 --- a/tools/hrw4u/src/validation.py +++ b/tools/hrw4u/src/validation.py @@ -228,6 +228,18 @@ def needs_quotes(value: str) -> bool: def quote_if_needed(value: str) -> str: return f'"{value}"' if Validator.needs_quotes(value) else value + @staticmethod + def unquote_if_ident(value: str) -> str: + """Strip surrounding quotes when the unquoted form is a valid grammar IDENT. + + See grammar/hrw4u.g4:72 (IDENT rule); regex at common.py:52 must stay in sync. + """ + if len(value) >= 2 and value.startswith('"') and value.endswith('"'): + inner = value[1:-1] + if RegexPatterns.GRAMMAR_IDENT.fullmatch(inner): + return inner + return value + @staticmethod def percent_block() -> Callable[[str], None]: diff --git a/tools/hrw4u/tests/data/vars/exceptions.txt b/tools/hrw4u/tests/data/vars/exceptions.txt index 64e57bf9bb0..8f7d194a85b 100644 --- a/tools/hrw4u/tests/data/vars/exceptions.txt +++ b/tools/hrw4u/tests/data/vars/exceptions.txt @@ -3,3 +3,6 @@ # # Explicit slot assignment syntax cannot be reversed explicit_slots.input: hrw4u + +# HRW accepts quoted header names but hrw4u emits them bare when IDENT-safe +hyphen_header.input: u4wrh diff --git a/tools/hrw4u/tests/data/vars/hyphen_header.input.txt b/tools/hrw4u/tests/data/vars/hyphen_header.input.txt new file mode 100644 index 00000000000..d98bfa302ee --- /dev/null +++ b/tools/hrw4u/tests/data/vars/hyphen_header.input.txt @@ -0,0 +1,5 @@ +REMAP { + if inbound.method == HEAD { + inbound.req.X-Blobstore-Authproxy-Head-Request = "true"; + } +} diff --git a/tools/hrw4u/tests/data/vars/hyphen_header.output.txt b/tools/hrw4u/tests/data/vars/hyphen_header.output.txt new file mode 100644 index 00000000000..29f271664d4 --- /dev/null +++ b/tools/hrw4u/tests/data/vars/hyphen_header.output.txt @@ -0,0 +1,2 @@ +cond %{METHOD} =HEAD [AND] + set-header "X-Blobstore-Authproxy-Head-Request" "true" diff --git a/tools/hrw4u/tests/test_errors.py b/tools/hrw4u/tests/test_errors.py index d3c096078cc..f4efe6d0b64 100644 --- a/tools/hrw4u/tests/test_errors.py +++ b/tools/hrw4u/tests/test_errors.py @@ -324,6 +324,20 @@ def test_quote_if_needed(self): assert Validator.quote_if_needed("simple") == "simple" assert Validator.quote_if_needed("has space") == '"has space"' + @pytest.mark.parametrize( + "value,expected", + [ + ('"X-Foo"', "X-Foo"), + ('"X Foo"', '"X Foo"'), + ("X-Foo", "X-Foo"), + ('""', '""'), + ('"@internal"', '"@internal"'), + ('"1foo"', '"1foo"'), + ], + ) + def test_unquote_if_ident(self, value, expected): + assert Validator.unquote_if_ident(value) == expected + class TestPlainTextFormatterParity: """The plain formatter must preserve current CLI output byte-for-byte."""