Skip to content

Commit 8db2e4d

Browse files
invalidclaude
andcommitted
feat: add Java and C/C++ language adapters, fix test timeouts
Register Java and C/C++ adapters in parser.js alongside existing adapters. Increase vitest timeout for scan and E2E tests to accommodate additional WASM grammar loading time. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b6efcc4 commit 8db2e4d

9 files changed

Lines changed: 753 additions & 2 deletions

File tree

cli/src/languages/cpp.js

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
import { LanguageAdapter } from './base.js';
2+
3+
/**
4+
* C / C++ language adapter.
5+
*
6+
* Walks a tree-sitter AST produced by either the C or C++ grammar and extracts
7+
* functions, #include imports, exports (non-static symbols), classes/structs,
8+
* and type declarations (struct, class, enum, typedef, namespace).
9+
*
10+
* A single adapter class handles both C and C++ because the C++ grammar is a
11+
* superset of C, and the extraction logic is largely identical.
12+
*/
13+
export class CppAdapter extends LanguageAdapter {
14+
constructor() {
15+
super('cpp');
16+
}
17+
18+
// ---------------------------------------------------------------------------
19+
// Functions
20+
// ---------------------------------------------------------------------------
21+
22+
/**
23+
* Extract function definitions.
24+
*
25+
* In C/C++ the AST node is `function_definition`. Its `declarator` field
26+
* is a `function_declarator` whose own `declarator` gives the function name
27+
* (possibly qualified with `::` in C++).
28+
*/
29+
extractFunctions(tree, sourceCode) {
30+
const functions = [];
31+
this._walkNodes(tree.rootNode, (node) => {
32+
if (node.type === 'function_definition') {
33+
const fn = this._parseFunctionDef(node, sourceCode);
34+
if (fn) functions.push(fn);
35+
}
36+
});
37+
return functions;
38+
}
39+
40+
/**
41+
* Parse a function_definition node into a structured record.
42+
*
43+
* The signature is reconstructed from the return type, declarator text
44+
* (which includes the name and parameter list), and any trailing qualifiers.
45+
*/
46+
_parseFunctionDef(node, sourceCode) {
47+
// Find the function_declarator – may be nested inside pointer_declarator
48+
// or reference_declarator.
49+
const funcDeclarator = this._findDescendantOfType(node, 'function_declarator');
50+
if (!funcDeclarator) return null;
51+
52+
const nameNode = funcDeclarator.childForFieldName('declarator');
53+
if (!nameNode) return null;
54+
55+
const name = nameNode.text;
56+
57+
// Build signature from everything before the body
58+
const body = node.childForFieldName('body');
59+
let signature;
60+
if (body) {
61+
const sigEnd = body.startIndex;
62+
signature = sourceCode.slice(node.startIndex, sigEnd).trim();
63+
} else {
64+
signature = sourceCode.slice(node.startIndex, node.endIndex).trim();
65+
}
66+
67+
return {
68+
name,
69+
signature,
70+
startLine: node.startPosition.row + 1,
71+
endLine: node.endPosition.row + 1,
72+
};
73+
}
74+
75+
// ---------------------------------------------------------------------------
76+
// Imports (#include directives)
77+
// ---------------------------------------------------------------------------
78+
79+
/**
80+
* Extract `#include` directives.
81+
*
82+
* - `#include <header>` → system_lib_string → isExternal: true
83+
* - `#include "header"` → string_literal → isExternal: false
84+
*/
85+
extractImports(tree, _sourceCode) {
86+
const imports = [];
87+
this._walkNodes(tree.rootNode, (node) => {
88+
if (node.type !== 'preproc_include') return;
89+
90+
const pathNode = this._findChildOfType(node, 'system_lib_string')
91+
|| this._findChildOfType(node, 'string_literal');
92+
if (!pathNode) return;
93+
94+
const isExternal = pathNode.type === 'system_lib_string';
95+
// Strip < > or " "
96+
const raw = pathNode.text.replace(/^[<"]|[>"]$/g, '');
97+
98+
imports.push({
99+
source: raw,
100+
symbols: [],
101+
isExternal,
102+
});
103+
});
104+
return imports;
105+
}
106+
107+
// ---------------------------------------------------------------------------
108+
// Exports (non-static function and type names)
109+
// ---------------------------------------------------------------------------
110+
111+
/**
112+
* In C/C++ there is no `export` keyword (ignoring C++20 modules).
113+
* By convention, all non-static functions and all type names at file /
114+
* namespace scope are considered "exported".
115+
*/
116+
extractExports(tree, _sourceCode) {
117+
const exports = [];
118+
119+
this._walkNodes(tree.rootNode, (node) => {
120+
// Non-static function definitions
121+
if (node.type === 'function_definition') {
122+
if (this._hasStorageClassStatic(node)) return;
123+
const funcDeclarator = this._findDescendantOfType(node, 'function_declarator');
124+
if (!funcDeclarator) return;
125+
const nameNode = funcDeclarator.childForFieldName('declarator');
126+
if (nameNode) {
127+
// Use the bare identifier (strip qualified names like Engine::start → start)
128+
const name = this._bareIdentifier(nameNode.text);
129+
exports.push(name);
130+
}
131+
}
132+
133+
// Struct / class / enum type names
134+
if (
135+
node.type === 'struct_specifier' ||
136+
node.type === 'class_specifier' ||
137+
node.type === 'enum_specifier'
138+
) {
139+
const nameNode = node.childForFieldName('name');
140+
if (nameNode) exports.push(nameNode.text);
141+
}
142+
143+
// Typedefs
144+
if (node.type === 'type_definition') {
145+
const declarator = this._findDescendantOfType(node, 'type_identifier');
146+
if (declarator) exports.push(declarator.text);
147+
}
148+
});
149+
150+
// Deduplicate (e.g. a struct may appear as both struct_specifier and in a typedef)
151+
return [...new Set(exports)];
152+
}
153+
154+
// ---------------------------------------------------------------------------
155+
// Classes (C++ classes and C structs)
156+
// ---------------------------------------------------------------------------
157+
158+
/**
159+
* Map C++ `class_specifier` and C/C++ `struct_specifier` nodes to the
160+
* "classes" output. Only named specifiers with a body are included (forward
161+
* declarations are skipped).
162+
*/
163+
extractClasses(tree, _sourceCode) {
164+
const classes = [];
165+
this._walkNodes(tree.rootNode, (node) => {
166+
if (node.type !== 'class_specifier' && node.type !== 'struct_specifier') return;
167+
168+
const nameNode = node.childForFieldName('name');
169+
if (!nameNode) return;
170+
171+
const body = node.childForFieldName('body');
172+
if (!body) return; // skip forward declarations
173+
174+
classes.push({
175+
name: nameNode.text,
176+
startLine: node.startPosition.row + 1,
177+
endLine: node.endPosition.row + 1,
178+
});
179+
});
180+
return classes;
181+
}
182+
183+
// ---------------------------------------------------------------------------
184+
// Types (struct, class, enum, typedef, namespace)
185+
// ---------------------------------------------------------------------------
186+
187+
extractTypes(tree, _sourceCode) {
188+
const types = [];
189+
this._walkNodes(tree.rootNode, (node) => {
190+
// struct
191+
if (node.type === 'struct_specifier') {
192+
const nameNode = node.childForFieldName('name');
193+
const body = node.childForFieldName('body');
194+
if (nameNode && body) {
195+
types.push({
196+
name: nameNode.text,
197+
kind: 'struct',
198+
startLine: node.startPosition.row + 1,
199+
endLine: node.endPosition.row + 1,
200+
});
201+
}
202+
}
203+
204+
// class (C++ only)
205+
if (node.type === 'class_specifier') {
206+
const nameNode = node.childForFieldName('name');
207+
const body = node.childForFieldName('body');
208+
if (nameNode && body) {
209+
types.push({
210+
name: nameNode.text,
211+
kind: 'class',
212+
startLine: node.startPosition.row + 1,
213+
endLine: node.endPosition.row + 1,
214+
});
215+
}
216+
}
217+
218+
// enum (including C++ enum class)
219+
if (node.type === 'enum_specifier') {
220+
const nameNode = node.childForFieldName('name');
221+
if (nameNode) {
222+
types.push({
223+
name: nameNode.text,
224+
kind: 'enum',
225+
startLine: node.startPosition.row + 1,
226+
endLine: node.endPosition.row + 1,
227+
});
228+
}
229+
}
230+
231+
// typedef
232+
if (node.type === 'type_definition') {
233+
const declarator = this._findDescendantOfType(node, 'type_identifier');
234+
if (declarator) {
235+
types.push({
236+
name: declarator.text,
237+
kind: 'typedef',
238+
startLine: node.startPosition.row + 1,
239+
endLine: node.endPosition.row + 1,
240+
});
241+
}
242+
}
243+
244+
// namespace (C++ only)
245+
if (node.type === 'namespace_definition') {
246+
const nameNode = node.childForFieldName('name');
247+
if (nameNode) {
248+
types.push({
249+
name: nameNode.text,
250+
kind: 'namespace',
251+
startLine: node.startPosition.row + 1,
252+
endLine: node.endPosition.row + 1,
253+
});
254+
}
255+
}
256+
});
257+
return types;
258+
}
259+
260+
// ---------------------------------------------------------------------------
261+
// Helpers
262+
// ---------------------------------------------------------------------------
263+
264+
/** Walk all nodes depth-first, calling visitor(node) for each. */
265+
_walkNodes(root, visitor) {
266+
const stack = [root];
267+
while (stack.length > 0) {
268+
const node = stack.pop();
269+
visitor(node);
270+
for (let i = node.childCount - 1; i >= 0; i--) {
271+
stack.push(node.child(i));
272+
}
273+
}
274+
}
275+
276+
/** Find the first direct child with the given type. */
277+
_findChildOfType(node, type) {
278+
for (let i = 0; i < node.childCount; i++) {
279+
const child = node.child(i);
280+
if (child.type === type) return child;
281+
}
282+
return null;
283+
}
284+
285+
/** Find the first descendant (BFS) with the given type. */
286+
_findDescendantOfType(node, type) {
287+
const queue = [];
288+
for (let i = 0; i < node.childCount; i++) {
289+
queue.push(node.child(i));
290+
}
291+
while (queue.length > 0) {
292+
const current = queue.shift();
293+
if (current.type === type) return current;
294+
for (let i = 0; i < current.childCount; i++) {
295+
queue.push(current.child(i));
296+
}
297+
}
298+
return null;
299+
}
300+
301+
/**
302+
* Check whether a function_definition has the `static` storage class.
303+
*
304+
* In tree-sitter-c/cpp, the storage class specifier appears as a direct
305+
* child of the function_definition with type `storage_class_specifier`
306+
* whose text is "static".
307+
*/
308+
_hasStorageClassStatic(funcDefNode) {
309+
for (let i = 0; i < funcDefNode.childCount; i++) {
310+
const child = funcDefNode.child(i);
311+
if (child.type === 'storage_class_specifier' && child.text === 'static') {
312+
return true;
313+
}
314+
}
315+
return false;
316+
}
317+
318+
/**
319+
* Extract the bare identifier from a possibly qualified name.
320+
* e.g. "Engine::start" → "start", "initialize" → "initialize"
321+
*/
322+
_bareIdentifier(text) {
323+
const idx = text.lastIndexOf('::');
324+
return idx >= 0 ? text.slice(idx + 2) : text;
325+
}
326+
}

0 commit comments

Comments
 (0)