Skip to content

Commit 33fc4e9

Browse files
D3 gen_mcp_schema.py: fix two struct-parsing bugs
Bug 1 — comment brace stops struct regex prematurely: _STRUCT_RE uses [^}]+ which stops at the first '}' character. A struct body comment like '* Defaults: {0.01, 0.05}' contains a '}' that terminates the capture before the actual struct closing brace, causing the struct to be silently dropped from the parsed output. Fix: strip comments from the full header_text before running _STRUCT_RE (not just from the body after the fact). This recovers structs like finbench_monte_carlo_request_t which had exactly this pattern. Bug 2 — camelCase operation names not matched to snake_case struct names: find_request_struct tried '{prefix}{op_name}_request_t' literally, so 'portfolioVariance' never matched 'finbench_portfolio_variance_request_t'. Fix: add _camel_to_snake() helper (regex: insert '_' before each uppercase that follows a lowercase/digit, then lowercase) and extend the candidate list to include the snake_case variants at every prefix level. After both fixes, running the tool against financial_benchmark_service.h correctly parses 8 structs (was 7) and matches portfolioVariance and monteCarlo to their request structs. scenarioAnalysis remains a SKIP because finbench_scenario_request_t uses an abbreviated name — its hand-crafted services.xml schema is intentionally richer and kept. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 69a8585 commit 33fc4e9

1 file changed

Lines changed: 39 additions & 13 deletions

File tree

tools/gen_mcp_schema.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -124,22 +124,27 @@ def parse_structs(header_text: str) -> dict[str, dict]:
124124
Return {struct_name: {field_name: {"c_type": ..., "has_default": bool}}}.
125125
Only parses typedef struct { ... } name_t; blocks.
126126
127-
Block and line comments are stripped from the body before field parsing
128-
so that comment text containing ';' is not matched as a field.
127+
Block and line comments are stripped from the FULL header text before the
128+
struct regex runs so that a comment containing a '}' character (e.g.
129+
``* Defaults: {0.01, 0.05}``) does not prematurely terminate the
130+
[^}]+ body capture and cause the struct to be missed entirely.
129131
"""
130132
structs = {}
131-
for m in _STRUCT_RE.finditer(header_text):
133+
for m in _STRUCT_RE.finditer(_strip_comments(header_text)):
132134
body = m.group(1)
133135
name = m.group(2)
134136

135137
# Warn about potential nested struct/union — body regex stops at first '}'
136138
# so any nested block would already be truncated, but alert the user.
139+
# (Comments are already stripped from header_text before the struct regex
140+
# runs, so braces inside comments will not appear here.)
137141
if '{' in body:
138142
print(f" WARNING: struct '{name}' body contains '{{' — nested struct/union "
139143
f"members are not supported and may be missing from the schema.",
140144
file=sys.stderr)
141145

142-
# Strip comments before field parsing (F23 fix)
146+
# Comments were stripped from header_text before _STRUCT_RE ran;
147+
# strip again defensively in case body was extracted differently.
143148
clean_body = _strip_comments(body)
144149

145150
fields = {}
@@ -204,32 +209,53 @@ def build_json_schema(struct_fields: dict) -> dict:
204209
# ---------------------------------------------------------------------------
205210
# services.xml patcher
206211
# ---------------------------------------------------------------------------
212+
def _camel_to_snake(name: str) -> str:
213+
"""Convert camelCase / PascalCase to snake_case.
214+
215+
Examples:
216+
portfolioVariance → portfolio_variance
217+
monteCarlo → monte_carlo
218+
scenarioAnalysis → scenario_analysis
219+
generateTestData → generate_test_data
220+
"""
221+
# Insert underscore before each uppercase letter that follows a lowercase
222+
# letter or digit, then lowercase everything.
223+
result = re.sub(r'(?<=[a-z0-9])([A-Z])', r'_\1', name)
224+
return result.lower()
225+
226+
207227
def find_request_struct(structs: dict, op_name: str,
208228
prefix: str = "") -> str | None:
209229
"""
210230
Heuristically find the request struct for an operation name.
211231
212232
Tries (in order):
213-
{prefix}{op_name}_request_t
214-
{op_name}_request_t
215-
{op_name}_req_t
216-
Falls back to a case-insensitive substring search on all struct names.
233+
1. {prefix}{op_name}_request_t (as-is)
234+
2. {prefix}{snake(op_name)}_request_t (camelCase → snake_case)
235+
3. {op_name}_request_t / {op_name}_req_t (no prefix, as-is)
236+
4. {snake(op_name)}_request_t (no prefix, snake_case)
237+
5. Case-insensitive substring search on all struct names.
217238
"""
239+
snake = _camel_to_snake(op_name)
218240
candidates = []
219241
if prefix:
220242
candidates.append(f"{prefix}{op_name}_request_t")
243+
if snake != op_name:
244+
candidates.append(f"{prefix}{snake}_request_t")
221245
candidates += [
222246
f"{op_name}_request_t",
223247
f"{op_name}_req_t",
224248
]
249+
if snake != op_name:
250+
candidates.append(f"{snake}_request_t")
225251
for c in candidates:
226252
if c in structs:
227253
return c
228-
# Case-insensitive fallback
229-
op_lower = op_name.lower()
230-
for sname in structs:
231-
if op_lower in sname.lower() and "request" in sname.lower():
232-
return sname
254+
# Case-insensitive fallback — check both original and snake_case op name
255+
for op_lower in (op_name.lower(), snake):
256+
for sname in structs:
257+
if op_lower in sname.lower() and "request" in sname.lower():
258+
return sname
233259
return None
234260

235261

0 commit comments

Comments
 (0)