Skip to content

Commit 6090cbb

Browse files
authored
Merge pull request #150 from tylerstennett/feat/java-import-model
Add JImport model for structured Java import declarations
2 parents d8d8b26 + f25a69a commit 6090cbb

File tree

10 files changed

+9232
-1804
lines changed

10 files changed

+9232
-1804
lines changed

cldk/analysis/commons/treesitter/treesitter_java.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121
"""
2222
import logging
2323
from itertools import groupby
24-
from typing import List, Set, Dict
25-
from tree_sitter import Language, Node, Parser, Query, Tree
24+
from typing import Dict, List, Set
25+
2626
import tree_sitter_java as tsjava
27+
from tree_sitter import Language, Node, Parser, Query, Tree
28+
2729
from cldk.analysis.commons.treesitter.models import Captures
2830

2931
logger = logging.getLogger(__name__)
@@ -91,6 +93,8 @@ def get_raw_ast(self, code: str) -> Tree:
9193
"""
9294
return PARSER.parse(bytes(code, "utf-8"))
9395

96+
# NOTE: Not used anywhere in the codebase. Does not return the new JImport model.
97+
# Update if wired into a real code path.
9498
def get_all_imports(self, source_code: str) -> Set[str]:
9599
"""Return all import statements in the source.
96100

cldk/analysis/java/codeanalyzer/codeanalyzer.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from itertools import chain, groupby
2323
from pathlib import Path
2424
from subprocess import CompletedProcess
25-
from typing import Any, Dict, List, Tuple
25+
from typing import Dict, List, Tuple
2626
from typing import Union
2727

2828
import networkx as nx
@@ -124,23 +124,33 @@ def _init_japplication(data: str) -> JApplication:
124124

125125
# set_trace()
126126
return JApplication(**json.loads(data))
127-
127+
128128
@staticmethod
129129
def check_exisiting_analysis_file_level(analysis_json_path_file: Path, analysis_level: int) -> bool:
130+
"""Validate whether a cached analysis file is compatible with the current model.
131+
132+
Args:
133+
analysis_json_path_file (Path): Path to the cached ``analysis.json`` file.
134+
analysis_level (int): Requested analysis level (1=symbol table, 2=call graph).
135+
136+
Returns:
137+
bool: True if the cached file is compatible; otherwise False.
138+
"""
130139
analysis_file_compatible = True
131140
if not analysis_json_path_file.exists():
132141
analysis_file_compatible = False
133142
else:
134-
with open(analysis_json_path_file) as f:
135-
data = json.load(f)
136-
if analysis_level == 2 and "call_graph" not in data:
137-
analysis_file_compatible = False
138-
elif analysis_level == 1 and "symbol_table" not in data:
139-
analysis_file_compatible = False
143+
try:
144+
with open(analysis_json_path_file) as f:
145+
data = json.load(f)
146+
if analysis_level == 2 and "call_graph" not in data:
147+
analysis_file_compatible = False
148+
elif analysis_level == 1 and "symbol_table" not in data:
149+
analysis_file_compatible = False
150+
except (json.JSONDecodeError, OSError):
151+
analysis_file_compatible = False
140152
return analysis_file_compatible
141153

142-
143-
144154
def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
145155
"""Should initialize the Codeanalyzer.
146156

cldk/analysis/java/codeanalyzer/jar/codeanalyzer-2.3.6.jar renamed to cldk/analysis/java/codeanalyzer/jar/codeanalyzer-2.3.7.jar

28.7 MB
Binary file not shown.

cldk/models/java/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@
2121
from .models import (
2222
JApplication,
2323
JCallable,
24+
JImport,
2425
JType,
2526
JCompilationUnit,
2627
JGraphEdges,
2728
)
2829

2930
from .enums import CRUDOperationType, CRUDQueryType
3031

31-
__all__ = ["JApplication", "JCallable", "JType", "JCompilationUnit", "JGraphEdges", "CRUDOperationType", "CRUDQueryType"]
32+
__all__ = ["JApplication", "JCallable", "JImport", "JType", "JCompilationUnit", "JGraphEdges", "CRUDOperationType", "CRUDQueryType"]

cldk/models/java/models.py

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
Models module
1919
"""
2020
from typing import Any, Dict, List, Optional, Union
21-
from pydantic import BaseModel, field_validator
21+
from pydantic import BaseModel, Field, field_validator, model_validator
2222
from cldk.models.java.enums import CRUDOperationType, CRUDQueryType
2323

2424
_CALLABLES_LOOKUP_TABLE = dict()
@@ -44,6 +44,20 @@ class JComment(BaseModel):
4444
is_javadoc: bool = False
4545

4646

47+
class JImport(BaseModel):
48+
"""Represents a Java import declaration.
49+
50+
Attributes:
51+
path (str): Fully qualified import target path.
52+
is_static (bool): True when import uses the static modifier.
53+
is_wildcard (bool): True when import uses wildcard syntax.
54+
"""
55+
56+
path: str
57+
is_static: bool = False
58+
is_wildcard: bool = False
59+
60+
4761
class JRecordComponent(BaseModel):
4862
"""Represents a component of a Java record.
4963
@@ -370,17 +384,68 @@ class JCompilationUnit(BaseModel):
370384
file_path (str): The path to the source file.
371385
package_name (str): The name of the package for the comppilation unit.
372386
comments (List[JComment]): A list of comments in the compilation unit.
373-
imports (List[str]): A list of import statements in the compilation unit.
387+
imports (List[str]): A list of import paths in the compilation unit.
388+
Deprecated: use ``import_declarations`` to access structured import metadata.
389+
import_declarations (List[JImport]): A list of structured import declarations.
374390
type_declarations (Dict[str, JType]): A dictionary mapping type names to their corresponding JType representations.
375391
"""
376392

377393
file_path: str
378394
package_name: str
379395
comments: List[JComment]
380-
imports: List[str]
396+
# Deprecated: retained for backward compatibility with existing consumers.
397+
imports: List[str] = Field(default_factory=list)
398+
import_declarations: List[JImport] = Field(default_factory=list)
381399
type_declarations: Dict[str, JType]
382400
is_modified: bool = False
383401

402+
@model_validator(mode="before")
403+
@classmethod
404+
def normalize_import_fields(cls, data: Any) -> Any:
405+
"""Normalize legacy and structured import payloads into both model fields.
406+
407+
Args:
408+
data (Any): Raw input payload for ``JCompilationUnit``.
409+
410+
Returns:
411+
Any: Input payload with ``imports`` and ``import_declarations`` synchronized.
412+
"""
413+
if not isinstance(data, dict):
414+
return data
415+
416+
imports_payload = data.get("imports")
417+
import_declarations_payload = data.get("import_declarations")
418+
419+
normalized_imports: List[str] = []
420+
normalized_declarations: List[JImport] = []
421+
422+
# Prefer structured declarations only when they are provided and non-empty.
423+
source_payload: List[Any] | None = None
424+
source_name = "import entry"
425+
if isinstance(import_declarations_payload, list) and len(import_declarations_payload) > 0:
426+
source_payload = import_declarations_payload
427+
source_name = "import declaration entry"
428+
elif isinstance(imports_payload, list):
429+
source_payload = imports_payload
430+
431+
if source_payload is not None:
432+
for import_entry in source_payload:
433+
if isinstance(import_entry, str):
434+
import_declaration = JImport(path=import_entry)
435+
elif isinstance(import_entry, dict):
436+
import_declaration = JImport(**import_entry)
437+
elif isinstance(import_entry, JImport):
438+
import_declaration = import_entry
439+
else:
440+
raise TypeError(f"Unsupported {source_name} type: {type(import_entry)!r}")
441+
normalized_declarations.append(import_declaration)
442+
normalized_imports.append(import_declaration.path)
443+
444+
data["imports"] = normalized_imports
445+
data["import_declarations"] = normalized_declarations
446+
447+
return data
448+
384449

385450
class JMethodDetail(BaseModel):
386451
"""Represents details about a method in a Java class.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ include = [
2828
]
2929

3030
[tool.backend-versions]
31-
codeanalyzer-java = "2.3.3"
31+
codeanalyzer-java = "2.3.7"
3232

3333
[tool.poetry.dependencies]
3434
python = ">=3.11"

tests/analysis/java/test_jcodeanalyzer.py

Lines changed: 133 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,29 @@
2626

2727
from cldk.analysis import AnalysisLevel
2828
from cldk.analysis.java.codeanalyzer import JCodeanalyzer
29-
from cldk.models.java.models import JApplication, JCRUDOperation, JType, JCallable, JCompilationUnit, JMethodDetail
29+
from cldk.models.java.models import JApplication, JCRUDOperation, JType, JCallable, JCompilationUnit, JImport, JMethodDetail
3030
from cldk.models.java import JGraphEdges
3131

3232

33+
def _build_analysis_json_payload(version: str, imports: list[dict[str, object] | str], include_call_graph: bool = False) -> dict:
34+
payload = {
35+
"symbol_table": {
36+
"/tmp/T.java": {
37+
"file_path": "/tmp/T.java",
38+
"package_name": "",
39+
"comments": [],
40+
"imports": imports,
41+
"type_declarations": {},
42+
"is_modified": False,
43+
}
44+
},
45+
"version": version,
46+
}
47+
if include_call_graph:
48+
payload["call_graph"] = []
49+
return payload
50+
51+
3352
def test_init_japplication(test_fixture, codeanalyzer_jar_path, analysis_json):
3453
"""Should return the initialized JApplication"""
3554

@@ -102,6 +121,85 @@ def test_init_codeanalyzer_with_json_path(test_fixture, analysis_json, analysis_
102121
assert isinstance(app, JApplication)
103122

104123

124+
def test_init_japplication_supports_legacy_import_schema() -> None:
125+
"""Should parse legacy string-based imports and expose both import fields."""
126+
payload = _build_analysis_json_payload(version="2.3.6", imports=["java.util.List"])
127+
application = JCodeanalyzer._init_japplication(json.dumps(payload))
128+
compilation_unit = next(iter(application.symbol_table.values()))
129+
assert compilation_unit.imports == ["java.util.List"]
130+
assert len(compilation_unit.import_declarations) == 1
131+
assert isinstance(compilation_unit.import_declarations[0], JImport)
132+
assert compilation_unit.import_declarations[0].path == "java.util.List"
133+
assert compilation_unit.import_declarations[0].is_static is False
134+
assert compilation_unit.import_declarations[0].is_wildcard is False
135+
136+
137+
def test_init_japplication_supports_structured_import_schema() -> None:
138+
"""Should parse structured imports and keep legacy imports list populated."""
139+
payload = _build_analysis_json_payload(
140+
version="2.3.7",
141+
imports=[{"path": "java.util.List", "is_static": True, "is_wildcard": False}],
142+
)
143+
application = JCodeanalyzer._init_japplication(json.dumps(payload))
144+
compilation_unit = next(iter(application.symbol_table.values()))
145+
assert compilation_unit.imports == ["java.util.List"]
146+
assert len(compilation_unit.import_declarations) == 1
147+
assert isinstance(compilation_unit.import_declarations[0], JImport)
148+
assert compilation_unit.import_declarations[0].path == "java.util.List"
149+
assert compilation_unit.import_declarations[0].is_static is True
150+
assert compilation_unit.import_declarations[0].is_wildcard is False
151+
152+
153+
def test_check_existing_analysis_file_level_accepts_legacy_import_schema(tmp_path) -> None:
154+
"""Should accept cached analysis files that use the legacy imports schema."""
155+
analysis_file = tmp_path / "analysis.json"
156+
payload = _build_analysis_json_payload(version="2.3.6", imports=["java.util.List"])
157+
analysis_file.write_text(json.dumps(payload), encoding="utf-8")
158+
assert JCodeanalyzer.check_exisiting_analysis_file_level(analysis_file, analysis_level=1)
159+
160+
161+
def test_check_existing_analysis_file_level_accepts_structured_import_schema(tmp_path) -> None:
162+
"""Should accept cached analysis files that use the structured imports schema."""
163+
analysis_file = tmp_path / "analysis.json"
164+
payload = _build_analysis_json_payload(version="2.3.7", imports=[{"path": "java.util.List", "is_static": False, "is_wildcard": False}])
165+
analysis_file.write_text(json.dumps(payload), encoding="utf-8")
166+
assert JCodeanalyzer.check_exisiting_analysis_file_level(analysis_file, analysis_level=1)
167+
168+
169+
def test_check_existing_analysis_file_level_rejects_invalid_json(tmp_path) -> None:
170+
"""Should reject invalid analysis.json payloads and force regeneration."""
171+
analysis_file = tmp_path / "analysis.json"
172+
analysis_file.write_text("{not-valid-json", encoding="utf-8")
173+
assert not JCodeanalyzer.check_exisiting_analysis_file_level(analysis_file, analysis_level=1)
174+
175+
176+
def test_init_codeanalyzer_reuses_legacy_cache_when_compatible(test_fixture, codeanalyzer_jar_path, tmp_path) -> None:
177+
"""Should reuse cached analysis.json when legacy imports are still compatible."""
178+
analysis_json_dir = tmp_path / "analysis-cache"
179+
analysis_json_dir.mkdir()
180+
analysis_json_file = analysis_json_dir / "analysis.json"
181+
legacy_payload = _build_analysis_json_payload(version="2.3.6", imports=["java.util.List"])
182+
analysis_json_file.write_text(json.dumps(legacy_payload), encoding="utf-8")
183+
184+
with patch("cldk.analysis.java.codeanalyzer.codeanalyzer.subprocess.run") as run_mock:
185+
code_analyzer = JCodeanalyzer(
186+
project_dir=test_fixture,
187+
source_code=None,
188+
analysis_backend_path=codeanalyzer_jar_path,
189+
analysis_json_path=analysis_json_dir,
190+
analysis_level=AnalysisLevel.symbol_table,
191+
eager_analysis=False,
192+
target_files=None,
193+
)
194+
assert not run_mock.called
195+
compilation_unit = next(iter(code_analyzer.application.symbol_table.values()))
196+
assert compilation_unit.imports == ["java.util.List"]
197+
assert isinstance(compilation_unit.import_declarations[0], JImport)
198+
assert compilation_unit.import_declarations[0].path == "java.util.List"
199+
assert compilation_unit.import_declarations[0].is_static is False
200+
assert compilation_unit.import_declarations[0].is_wildcard is False
201+
202+
105203
def test_get_codeanalyzer_exec(test_fixture, codeanalyzer_jar_path, analysis_json):
106204
"""Should return the correct codeanalyzer location"""
107205

@@ -128,7 +226,7 @@ def test_get_codeanalyzer_exec(test_fixture, codeanalyzer_jar_path, analysis_jso
128226
code_analyzer.analysis_backend_path = None
129227
jar_file = code_analyzer._get_codeanalyzer_exec()[-1]
130228
exec_path = os.path.dirname(jar_file)
131-
relative_path = exec_path.split("/cldk")[1]
229+
relative_path = exec_path.rsplit("/cldk", 1)[1]
132230
assert relative_path == "/analysis/java/codeanalyzer/jar"
133231

134232

@@ -800,6 +898,39 @@ def test_get_all_entrypoint_methods_in_application(test_fixture, codeanalyzer_ja
800898
assert callable.is_entrypoint
801899

802900

901+
def test_source_analysis_imports_disambiguate_static_and_wildcard(codeanalyzer_jar_path) -> None:
902+
"""Should preserve static and wildcard import metadata for colliding import paths."""
903+
source_code = "import static Foo.bar;\nimport Foo.bar.*;\nclass T {}"
904+
code_analyzer = JCodeanalyzer(
905+
project_dir=".",
906+
source_code=source_code,
907+
analysis_backend_path=codeanalyzer_jar_path,
908+
analysis_json_path=None,
909+
analysis_level=AnalysisLevel.symbol_table,
910+
eager_analysis=False,
911+
target_files=None,
912+
)
913+
symbol_table = code_analyzer.get_symbol_table()
914+
assert len(symbol_table) == 1
915+
916+
compilation_unit = next(iter(symbol_table.values()))
917+
assert compilation_unit.imports == ["Foo.bar", "Foo.bar"]
918+
919+
import_declarations = compilation_unit.import_declarations
920+
assert len(import_declarations) == 2
921+
assert all(isinstance(import_decl, JImport) for import_decl in import_declarations)
922+
assert [import_decl.path for import_decl in import_declarations].count("Foo.bar") == 2
923+
924+
static_import = next((import_decl for import_decl in import_declarations if import_decl.is_static), None)
925+
wildcard_import = next((import_decl for import_decl in import_declarations if import_decl.is_wildcard), None)
926+
assert static_import is not None
927+
assert wildcard_import is not None
928+
assert static_import.path == "Foo.bar"
929+
assert static_import.is_wildcard is False
930+
assert wildcard_import.path == "Foo.bar"
931+
assert wildcard_import.is_static is False
932+
933+
803934
def test_get_all_entrypoint_classes_in_the_application(test_fixture, codeanalyzer_jar_path):
804935
"""Should return all of the entrypoint classes in an application"""
805936
code_analyzer = JCodeanalyzer(

0 commit comments

Comments
 (0)