Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion tools/hrw4u/src/generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,15 @@ def generate_complete_reverse_resolution_map(self) -> dict[str, Any]:
reverse_map[name] = mapping

# Add context type mappings
from hrw4u.tables import CONTEXT_TYPE_MAP, FALLBACK_TAG_MAP
from hrw4u.tables import BARE_TAG_MAP, CONTEXT_TYPE_MAP, FALLBACK_TAG_MAP
reverse_map["CONTEXT_TYPE_MAP"] = CONTEXT_TYPE_MAP.copy()

# Add fallback tag mappings
reverse_map["FALLBACK_TAG_MAP"] = FALLBACK_TAG_MAP.copy()

# Add bare tag mappings
reverse_map["BARE_TAG_MAP"] = BARE_TAG_MAP.copy()

return reverse_map


Expand Down
2 changes: 2 additions & 0 deletions tools/hrw4u/src/hrw_symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,8 @@ def percent_to_ident_or_func(self, percent: str, section: SectionType | None) ->
special = self._resolve_ambiguous_exact(tag, section)
if special is not None:
return special, False
if (bare_map := tables.REVERSE_RESOLUTION_MAP.get("BARE_TAG_MAP")) and (bare_ident := bare_map.get(tag)) is not None:
return bare_ident, False
raise SymbolResolutionError(percent, "Missing payload for prefix condition")

result = self._resolve_fallback_tag(tag, payload, section)
Expand Down
9 changes: 3 additions & 6 deletions tools/hrw4u/src/hrw_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,10 @@ def _cached_percent_parsing(self, pct_text: str) -> tuple[str, str | None]:
return self.symbol_resolver.parse_percent_block(pct_text)

@lru_cache(maxsize=256)
def _cached_symbol_to_ident(self, pct_text: str, section_name: str) -> tuple[str, str]:
def _cached_symbol_to_ident(self, pct_text: str, section_name: str) -> tuple[str, bool]:
"""Cache expensive symbol resolution operations."""
try:
section = SectionType(section_name)
return self.symbol_resolver.percent_to_ident_or_func(pct_text, section)
except (ValueError, SymbolResolutionError):
return pct_text, ""
section = SectionType(section_name)
return self.symbol_resolver.percent_to_ident_or_func(pct_text, section)

#
# Helpers
Expand Down
9 changes: 9 additions & 0 deletions tools/hrw4u/src/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@
}

FALLBACK_TAG_MAP: dict[str, tuple[str, bool]] = {
"CLIENT-CERT": ("inbound.conn.client-cert.", False),
"CLIENT-CERT:SAN": ("inbound.conn.client-cert.SAN.", False),
"CLIENT-HEADER": ("inbound.req.", False),
"CLIENT-URL:QUERY": ("inbound.url.query.", False),
"COOKIE": ("inbound.cookie.", False),
Expand All @@ -151,6 +153,13 @@
"TO-URL:QUERY": ("to.url.query.", False)
}

# Reverse mapping for bare %{TAG} references (without payload) in raw HRW when the
# tag is not an exact condition and must resolve to a HRW4U identifier.
BARE_TAG_MAP: dict[str, str] = {
"CLIENT-URL": "inbound.url.url",
"SERVER-URL": "outbound.url.url",
}

# Context type to mapping name associations
CONTEXT_TYPE_MAP: dict[str, str | tuple[str, str]] = {
"header_condition": ("HEADER_CONTEXT_MAP", "inbound.resp."),
Expand Down
2 changes: 1 addition & 1 deletion tools/hrw4u/src/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class SuffixGroup(Enum):
CERT_FIELDS = frozenset(
{
"PEM", "pem", "SIG", "sig", "SUBJECT", "subject", "ISSUER", "issuer", "SERIAL", "serial", "NOT_BEFORE", "not_before",
"NOT_AFTER", "not_after", "VERSION", "version"
"NOT_AFTER", "not_after", "VERSION", "version", "APRN-RAW", "aprn-raw"
})
SAN_FIELDS = frozenset({"DNS", "dns", "IP", "ip", "EMAIL", "email", "URI", "uri"})
PLUGIN_CNTL_MAPPING = {"TIMEZONE": frozenset({"GMT", "LOCAL"}), "INBOUND_IP_SOURCE": frozenset({"PEER", "PROXY"})}
Expand Down
5 changes: 5 additions & 0 deletions tools/hrw4u/tests/data/conds/bare-client-cert-aprn.input.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
REMAP {
if inbound.conn.client-cert.APRN-RAW {
inbound.req.X-Cert = "{inbound.conn.client-cert.APRN-RAW}";
}
}
3 changes: 3 additions & 0 deletions tools/hrw4u/tests/data/conds/bare-client-cert-aprn.output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
cond %{REMAP_PSEUDO_HOOK}
cond %{CLIENT-CERT:APRN-RAW}
set-header X-Cert "%{CLIENT-CERT:APRN-RAW}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
REMAP {
if inbound.conn.client-cert.SAN.DNS {
inbound.req.X-Cert = "{inbound.conn.client-cert.SAN.DNS}";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
cond %{REMAP_PSEUDO_HOOK}
cond %{CLIENT-CERT:SAN:DNS}
set-header X-Cert "%{CLIENT-CERT:SAN:DNS}"
5 changes: 5 additions & 0 deletions tools/hrw4u/tests/data/conds/bare-client-url.input.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
REMAP {
if inbound.url.url == "https://download.swift.org/" {
inbound.status = 301;
}
}
3 changes: 3 additions & 0 deletions tools/hrw4u/tests/data/conds/bare-client-url.output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
cond %{REMAP_PSEUDO_HOOK}
cond %{CLIENT-URL} ="https://download.swift.org/"
set-status 301
6 changes: 6 additions & 0 deletions tools/hrw4u/tests/data/conds/exceptions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ implicit-cmp.input: u4wrh
double-negation.input: hrw4u
# Old format without explicit hook defaults to REMAP; hrw4u always emits explicit sections
no-hook.input: u4wrh
# Legacy raw HRW uses bare %{CLIENT-URL}; forward emits the canonical %{CLIENT-URL:URL} form
bare-client-url.input: u4wrh
# Legacy raw HRW uses bare %{CLIENT-CERT:...}; forward emits the canonical %{INBOUND:CLIENT-CERT:...} form
bare-client-cert-aprn.input: u4wrh
# Multi-colon bare %{CLIENT-CERT:SAN:DNS} exercises longest-match FALLBACK_TAG_MAP path
bare-client-cert-san-dns.input: u4wrh
86 changes: 86 additions & 0 deletions tools/hrw4u/tests/test_u4wrh_round_trip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import re
from pathlib import Path

import pytest
from antlr4 import InputStream, CommonTokenStream

from hrw4u.errors import ErrorCollector
from hrw4u.hrw4uLexer import hrw4uLexer
from hrw4u.hrw4uParser import hrw4uParser
from hrw4u.visitor import HRW4UVisitor
from u4wrh.hrw_visitor import HRWInverseVisitor
from u4wrh.u4wrhLexer import u4wrhLexer
from u4wrh.u4wrhParser import u4wrhParser

_RAW_PERCENT_RE = re.compile(r"%\{[^}]*\}")


def _hrw_to_hrw4u(hrw_text: str, filename: str) -> tuple[str, ErrorCollector]:
lexer = u4wrhLexer(InputStream(hrw_text))
stream = CommonTokenStream(lexer)
parser = u4wrhParser(stream)
tree = parser.program()
collector = ErrorCollector()
visitor = HRWInverseVisitor(filename=filename, error_collector=collector)
hrw4u_text = "\n".join(visitor.visit(tree))
return hrw4u_text, collector


def _hrw4u_to_hrw(hrw4u_text: str, filename: str) -> tuple[str, ErrorCollector]:
lexer = hrw4uLexer(InputStream(hrw4u_text))
stream = CommonTokenStream(lexer)
parser = hrw4uParser(stream)
tree = parser.program()
collector = ErrorCollector()
visitor = HRW4UVisitor(filename=filename, error_collector=collector)
hrw_text = "\n".join(visitor.visit(tree) or [])
return hrw_text, collector


@pytest.mark.reverse
@pytest.mark.parametrize(
"fixture_name",
[
"bare-client-url",
"bare-client-cert-aprn",
"bare-client-cert-san-dns",
],
)
def test_u4wrh_round_trip_no_raw_percent(fixture_name: str) -> None:
"""u4wrh must never emit raw %{...} in HRW4U output for legacy bare HRW tags.

Verifies both HRW -> HRW4U produces valid, error-free HRW4U AND that
HRW4U re-parses through hrw4u without errors (round-trip).
"""
fixture_dir = Path("tests/data/conds")
hrw_path = fixture_dir / f"{fixture_name}.output.txt"
hrw_text = hrw_path.read_text()

hrw4u_text, u4wrh_errors = _hrw_to_hrw4u(hrw_text, str(hrw_path))
assert not u4wrh_errors.has_errors(), (f"u4wrh produced errors for {fixture_name}:\n{u4wrh_errors.get_error_summary()}")

assert not _RAW_PERCENT_RE.search(hrw4u_text), (
f"u4wrh emitted raw %{{...}} in HRW4U output for {fixture_name}:\n{hrw4u_text}")

_, hrw4u_errors = _hrw4u_to_hrw(hrw4u_text, str(hrw_path))
assert not hrw4u_errors.has_errors(), (
f"hrw4u failed to re-parse u4wrh HRW4U output for {fixture_name}:\n"
f"HRW4U:\n{hrw4u_text}\nErrors:\n{hrw4u_errors.get_error_summary()}")