88
99import ray
1010from codeanalyzer .utils import logger
11- from codeanalyzer .schema import PyApplication , PyModule , model_dump_json , model_validate_json
11+ from codeanalyzer .schema import (
12+ PyApplication ,
13+ PyExternalSymbol ,
14+ PyModule ,
15+ model_dump_json ,
16+ model_validate_json ,
17+ )
1218from codeanalyzer .schema .py_schema import PyCallEdge
1319from codeanalyzer .semantic_analysis .call_graph import (
1420 jedi_call_graph_edges ,
@@ -60,6 +66,7 @@ def __init__(self, options: AnalysisOptions) -> None:
6066 self .skip_tests = options .skip_tests
6167 self .using_codeql = options .using_codeql
6268 self .rebuild_analysis = options .rebuild_analysis
69+ self .no_venv = options .no_venv
6370 self .cache_dir = (
6471 options .cache_dir .resolve () if options .cache_dir is not None else self .project_dir
6572 ) / ".codeanalyzer"
@@ -226,13 +233,41 @@ def _get_base_interpreter() -> Path:
226233 f"a working Python interpreter that can create virtual environments."
227234 )
228235
236+ @staticmethod
237+ def _uv_bin () -> Optional [str ]:
238+ """Path to a uv binary: the one bundled with the ``uv`` PyPI package (a
239+ dependency, so normally always present -- including inside a Docker image),
240+ else a uv on PATH, else ``None`` (callers fall back to pip)."""
241+ try :
242+ from uv import find_uv_bin
243+
244+ return str (find_uv_bin ())
245+ except Exception :
246+ return shutil .which ("uv" )
247+
248+ def _install_into_venv (self , venv_python : Path , args : List [str ]) -> None :
249+ """Install packages into the target venv, preferring uv for speed (parallel
250+ downloads + a shared global cache) and falling back to the venv's own pip
251+ when uv is unavailable."""
252+ uv = self ._uv_bin ()
253+ if uv :
254+ cmd = [uv , "pip" , "install" , "--python" , str (venv_python ), * args ]
255+ else :
256+ cmd = [str (venv_python ), "-m" , "pip" , "install" , * args ]
257+ self ._cmd_exec_helper (cmd , cwd = self .project_dir , check = True )
258+
229259 def __enter__ (self ) -> "Codeanalyzer" :
230260 # If no virtualenv is provided, try to create one using requirements.txt or pyproject.toml
231261 venv_path = self .cache_dir / self .project_dir .name / "virtualenv"
232262 # Ensure the cache directory exists for this project
233263 venv_path .parent .mkdir (parents = True , exist_ok = True )
264+ if self .no_venv :
265+ logger .info (
266+ "--no-venv: using the ambient Python environment "
267+ "(skipping virtualenv creation and dependency installation)"
268+ )
234269 # Create the virtual environment if it does not exist
235- if not venv_path .exists () or self .rebuild_analysis :
270+ if not self . no_venv and ( not venv_path .exists () or self .rebuild_analysis ) :
236271 logger .info (f"(Re-)creating virtual environment at { venv_path } " )
237272 self ._cmd_exec_helper (
238273 [str (self ._get_base_interpreter ()), "-m" , "venv" , str (venv_path )],
@@ -249,24 +284,19 @@ def __enter__(self) -> "Codeanalyzer":
249284 ("test-requirements.txt" , ["-r" ]),
250285 ]
251286
252- for dep_file , pip_args in dependency_files :
287+ for dep_file , _ in dependency_files :
253288 if (self .project_dir / dep_file ).exists ():
254289 logger .info (f"Installing dependencies from { dep_file } " )
255- self ._cmd_exec_helper (
256- [str (venv_python ), "-m" , "pip" , "install" , "-U" ] + pip_args + [str (self .project_dir / dep_file )],
257- cwd = self .project_dir ,
258- check = True ,
290+ self ._install_into_venv (
291+ venv_python ,
292+ ["--upgrade" , "-r" , str (self .project_dir / dep_file )],
259293 )
260294
261295 # Handle Pipenv files
262296 if (self .project_dir / "Pipfile" ).exists ():
263297 logger .info ("Installing dependencies from Pipfile" )
264298 # Note: This would require pipenv to be installed
265- self ._cmd_exec_helper (
266- [str (venv_python ), "-m" , "pip" , "install" , "pipenv" ],
267- cwd = self .project_dir ,
268- check = True ,
269- )
299+ self ._install_into_venv (venv_python , ["pipenv" ])
270300 self ._cmd_exec_helper (
271301 ["pipenv" , "install" , "--dev" ],
272302 cwd = self .project_dir ,
@@ -289,14 +319,18 @@ def __enter__(self) -> "Codeanalyzer":
289319
290320 if any ((self .project_dir / file ).exists () for file in package_definition_files ):
291321 logger .info ("Installing project in editable mode" )
292- self ._cmd_exec_helper (
293- [str (venv_python ), "-m" , "pip" , "install" , "-e" , str (self .project_dir )],
294- cwd = self .project_dir ,
295- check = True ,
296- )
322+ self ._install_into_venv (venv_python , ["-e" , str (self .project_dir )])
297323 else :
298324 logger .warning ("No package definition files found, skipping editable installation" )
299325
326+ # Point Jedi at the analysis venv so it resolves the project's third-party
327+ # imports. This runs on both a fresh build and a lazy reuse of an existing
328+ # venv -- previously self.virtualenv stayed None, so the install above was
329+ # never actually used by the symbol-table builder. With --no-venv we leave
330+ # it None so Jedi resolves against the ambient interpreter instead.
331+ if not self .no_venv and venv_path .exists ():
332+ self .virtualenv = venv_path
333+
300334 if self .using_codeql :
301335 logger .info (f"(Re-)initializing CodeQL analysis for { self .project_dir } " )
302336
@@ -358,6 +392,43 @@ def __exit__(self, *args, **kwargs) -> None:
358392 logger .info (f"Clearing cache directory: { self .cache_dir } " )
359393 shutil .rmtree (self .cache_dir )
360394
395+ @staticmethod
396+ def _compute_external_symbols (symbol_table , call_graph ):
397+ """Build the external-symbol map: every call-graph endpoint whose signature
398+ is not a declared class/callable in the symbol table is an external (an
399+ imported library or builtin member). ``name``/``module`` are derived from
400+ the signature (best effort: split on the last dot)."""
401+ declared = set ()
402+
403+ def walk_callable (c ):
404+ declared .add (c .signature )
405+ for ic in (c .inner_callables or {}).values ():
406+ walk_callable (ic )
407+ for cl in (c .inner_classes or {}).values ():
408+ walk_class (cl )
409+
410+ def walk_class (cl ):
411+ declared .add (cl .signature )
412+ for m in (cl .methods or {}).values ():
413+ walk_callable (m )
414+ for ic in (cl .inner_classes or {}).values ():
415+ walk_class (ic )
416+
417+ for mod in symbol_table .values ():
418+ for c in (mod .functions or {}).values ():
419+ walk_callable (c )
420+ for cl in (mod .classes or {}).values ():
421+ walk_class (cl )
422+
423+ externals : Dict [str , PyExternalSymbol ] = {}
424+ for edge in call_graph :
425+ for sig in (edge .source , edge .target ):
426+ if sig in declared or sig in externals :
427+ continue
428+ module , name = sig .rsplit ("." , 1 ) if "." in sig else (sig , sig )
429+ externals [sig ] = PyExternalSymbol (name = name , module = module )
430+ return externals
431+
361432 def analyze (self ) -> PyApplication :
362433 """Analyze the project and return a PyApplication with symbol table.
363434
@@ -397,8 +468,19 @@ def analyze(self) -> PyApplication:
397468 jedi_edges = jedi_call_graph_edges (symbol_table )
398469 call_graph = merge_edges (jedi_edges , codeql_edges )
399470
471+ # Classify call-graph endpoints that are not declared in the symbol table
472+ # (imported library / builtin members) once, so the JSON and Neo4j backends
473+ # share one authoritative external-symbol set.
474+ external_symbols = self ._compute_external_symbols (symbol_table , call_graph )
475+
400476 # Recreate pyapplication
401- app = PyApplication .builder ().symbol_table (symbol_table ).call_graph (call_graph ).build ()
477+ app = (
478+ PyApplication .builder ()
479+ .symbol_table (symbol_table )
480+ .call_graph (call_graph )
481+ .external_symbols (external_symbols )
482+ .build ()
483+ )
402484
403485 # Save to cache
404486 self ._save_analysis_cache (app , cache_file )
0 commit comments