Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions cldk/analysis/commons/treesitter/treesitter_java.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
"""
import logging
from itertools import groupby
from typing import List, Set, Dict
from tree_sitter import Language, Node, Parser, Query, Tree
from typing import Dict, List, Set

import tree_sitter_java as tsjava
from tree_sitter import Language, Node, Parser, Query, Tree

from cldk.analysis.commons.treesitter.models import Captures

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -91,6 +93,8 @@ def get_raw_ast(self, code: str) -> Tree:
"""
return PARSER.parse(bytes(code, "utf-8"))

# NOTE: Not used anywhere in the codebase. Does not return the new JImport model.
# Update if wired into a real code path.
def get_all_imports(self, source_code: str) -> Set[str]:
"""Return all import statements in the source.

Expand Down
30 changes: 20 additions & 10 deletions cldk/analysis/java/codeanalyzer/codeanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from itertools import chain, groupby
from pathlib import Path
from subprocess import CompletedProcess
from typing import Any, Dict, List, Tuple
from typing import Dict, List, Tuple
from typing import Union

import networkx as nx
Expand Down Expand Up @@ -124,23 +124,33 @@ def _init_japplication(data: str) -> JApplication:

# set_trace()
return JApplication(**json.loads(data))

@staticmethod
def check_exisiting_analysis_file_level(analysis_json_path_file: Path, analysis_level: int) -> bool:
"""Validate whether a cached analysis file is compatible with the current model.

Args:
analysis_json_path_file (Path): Path to the cached ``analysis.json`` file.
analysis_level (int): Requested analysis level (1=symbol table, 2=call graph).

Returns:
bool: True if the cached file is compatible; otherwise False.
"""
analysis_file_compatible = True
if not analysis_json_path_file.exists():
analysis_file_compatible = False
else:
with open(analysis_json_path_file) as f:
data = json.load(f)
if analysis_level == 2 and "call_graph" not in data:
analysis_file_compatible = False
elif analysis_level == 1 and "symbol_table" not in data:
analysis_file_compatible = False
try:
with open(analysis_json_path_file) as f:
data = json.load(f)
if analysis_level == 2 and "call_graph" not in data:
analysis_file_compatible = False
elif analysis_level == 1 and "symbol_table" not in data:
analysis_file_compatible = False
except (json.JSONDecodeError, OSError):
analysis_file_compatible = False
return analysis_file_compatible



def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
"""Should initialize the Codeanalyzer.

Expand Down
Binary file not shown.
3 changes: 2 additions & 1 deletion cldk/models/java/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@
from .models import (
JApplication,
JCallable,
JImport,
JType,
JCompilationUnit,
JGraphEdges,
)

from .enums import CRUDOperationType, CRUDQueryType

__all__ = ["JApplication", "JCallable", "JType", "JCompilationUnit", "JGraphEdges", "CRUDOperationType", "CRUDQueryType"]
__all__ = ["JApplication", "JCallable", "JImport", "JType", "JCompilationUnit", "JGraphEdges", "CRUDOperationType", "CRUDQueryType"]
71 changes: 68 additions & 3 deletions cldk/models/java/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
Models module
"""
from typing import Any, Dict, List, Optional, Union
from pydantic import BaseModel, field_validator
from pydantic import BaseModel, Field, field_validator, model_validator
from cldk.models.java.enums import CRUDOperationType, CRUDQueryType

_CALLABLES_LOOKUP_TABLE = dict()
Expand All @@ -44,6 +44,20 @@ class JComment(BaseModel):
is_javadoc: bool = False


class JImport(BaseModel):
"""Represents a Java import declaration.

Attributes:
path (str): Fully qualified import target path.
is_static (bool): True when import uses the static modifier.
is_wildcard (bool): True when import uses wildcard syntax.
"""

path: str
is_static: bool = False
is_wildcard: bool = False


class JRecordComponent(BaseModel):
"""Represents a component of a Java record.

Expand Down Expand Up @@ -370,17 +384,68 @@ class JCompilationUnit(BaseModel):
file_path (str): The path to the source file.
package_name (str): The name of the package for the comppilation unit.
comments (List[JComment]): A list of comments in the compilation unit.
imports (List[str]): A list of import statements in the compilation unit.
imports (List[str]): A list of import paths in the compilation unit.
Deprecated: use ``import_declarations`` to access structured import metadata.
import_declarations (List[JImport]): A list of structured import declarations.
type_declarations (Dict[str, JType]): A dictionary mapping type names to their corresponding JType representations.
"""

file_path: str
package_name: str
comments: List[JComment]
imports: List[str]
# Deprecated: retained for backward compatibility with existing consumers.
imports: List[str] = Field(default_factory=list)
import_declarations: List[JImport] = Field(default_factory=list)
type_declarations: Dict[str, JType]
is_modified: bool = False

@model_validator(mode="before")
@classmethod
def normalize_import_fields(cls, data: Any) -> Any:
"""Normalize legacy and structured import payloads into both model fields.

Args:
data (Any): Raw input payload for ``JCompilationUnit``.

Returns:
Any: Input payload with ``imports`` and ``import_declarations`` synchronized.
"""
if not isinstance(data, dict):
return data

imports_payload = data.get("imports")
import_declarations_payload = data.get("import_declarations")

normalized_imports: List[str] = []
normalized_declarations: List[JImport] = []

# Prefer structured declarations only when they are provided and non-empty.
source_payload: List[Any] | None = None
source_name = "import entry"
if isinstance(import_declarations_payload, list) and len(import_declarations_payload) > 0:
source_payload = import_declarations_payload
source_name = "import declaration entry"
elif isinstance(imports_payload, list):
source_payload = imports_payload

if source_payload is not None:
for import_entry in source_payload:
if isinstance(import_entry, str):
import_declaration = JImport(path=import_entry)
elif isinstance(import_entry, dict):
import_declaration = JImport(**import_entry)
elif isinstance(import_entry, JImport):
import_declaration = import_entry
else:
raise TypeError(f"Unsupported {source_name} type: {type(import_entry)!r}")
normalized_declarations.append(import_declaration)
normalized_imports.append(import_declaration.path)

data["imports"] = normalized_imports
data["import_declarations"] = normalized_declarations

return data


class JMethodDetail(BaseModel):
"""Represents details about a method in a Java class.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ include = [
]

[tool.backend-versions]
codeanalyzer-java = "2.3.3"
codeanalyzer-java = "2.3.7"

[tool.poetry.dependencies]
python = ">=3.11"
Expand Down
135 changes: 133 additions & 2 deletions tests/analysis/java/test_jcodeanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,29 @@

from cldk.analysis import AnalysisLevel
from cldk.analysis.java.codeanalyzer import JCodeanalyzer
from cldk.models.java.models import JApplication, JCRUDOperation, JType, JCallable, JCompilationUnit, JMethodDetail
from cldk.models.java.models import JApplication, JCRUDOperation, JType, JCallable, JCompilationUnit, JImport, JMethodDetail
from cldk.models.java import JGraphEdges


def _build_analysis_json_payload(version: str, imports: list[dict[str, object] | str], include_call_graph: bool = False) -> dict:
payload = {
"symbol_table": {
"/tmp/T.java": {
"file_path": "/tmp/T.java",
"package_name": "",
"comments": [],
"imports": imports,
"type_declarations": {},
"is_modified": False,
}
},
"version": version,
}
if include_call_graph:
payload["call_graph"] = []
return payload


def test_init_japplication(test_fixture, codeanalyzer_jar_path, analysis_json):
"""Should return the initialized JApplication"""

Expand Down Expand Up @@ -102,6 +121,85 @@ def test_init_codeanalyzer_with_json_path(test_fixture, analysis_json, analysis_
assert isinstance(app, JApplication)


def test_init_japplication_supports_legacy_import_schema() -> None:
"""Should parse legacy string-based imports and expose both import fields."""
payload = _build_analysis_json_payload(version="2.3.6", imports=["java.util.List"])
application = JCodeanalyzer._init_japplication(json.dumps(payload))
compilation_unit = next(iter(application.symbol_table.values()))
assert compilation_unit.imports == ["java.util.List"]
assert len(compilation_unit.import_declarations) == 1
assert isinstance(compilation_unit.import_declarations[0], JImport)
assert compilation_unit.import_declarations[0].path == "java.util.List"
assert compilation_unit.import_declarations[0].is_static is False
assert compilation_unit.import_declarations[0].is_wildcard is False


def test_init_japplication_supports_structured_import_schema() -> None:
"""Should parse structured imports and keep legacy imports list populated."""
payload = _build_analysis_json_payload(
version="2.3.7",
imports=[{"path": "java.util.List", "is_static": True, "is_wildcard": False}],
)
application = JCodeanalyzer._init_japplication(json.dumps(payload))
compilation_unit = next(iter(application.symbol_table.values()))
assert compilation_unit.imports == ["java.util.List"]
assert len(compilation_unit.import_declarations) == 1
assert isinstance(compilation_unit.import_declarations[0], JImport)
assert compilation_unit.import_declarations[0].path == "java.util.List"
assert compilation_unit.import_declarations[0].is_static is True
assert compilation_unit.import_declarations[0].is_wildcard is False


def test_check_existing_analysis_file_level_accepts_legacy_import_schema(tmp_path) -> None:
"""Should accept cached analysis files that use the legacy imports schema."""
analysis_file = tmp_path / "analysis.json"
payload = _build_analysis_json_payload(version="2.3.6", imports=["java.util.List"])
analysis_file.write_text(json.dumps(payload), encoding="utf-8")
assert JCodeanalyzer.check_exisiting_analysis_file_level(analysis_file, analysis_level=1)


def test_check_existing_analysis_file_level_accepts_structured_import_schema(tmp_path) -> None:
"""Should accept cached analysis files that use the structured imports schema."""
analysis_file = tmp_path / "analysis.json"
payload = _build_analysis_json_payload(version="2.3.7", imports=[{"path": "java.util.List", "is_static": False, "is_wildcard": False}])
analysis_file.write_text(json.dumps(payload), encoding="utf-8")
assert JCodeanalyzer.check_exisiting_analysis_file_level(analysis_file, analysis_level=1)


def test_check_existing_analysis_file_level_rejects_invalid_json(tmp_path) -> None:
"""Should reject invalid analysis.json payloads and force regeneration."""
analysis_file = tmp_path / "analysis.json"
analysis_file.write_text("{not-valid-json", encoding="utf-8")
assert not JCodeanalyzer.check_exisiting_analysis_file_level(analysis_file, analysis_level=1)


def test_init_codeanalyzer_reuses_legacy_cache_when_compatible(test_fixture, codeanalyzer_jar_path, tmp_path) -> None:
"""Should reuse cached analysis.json when legacy imports are still compatible."""
analysis_json_dir = tmp_path / "analysis-cache"
analysis_json_dir.mkdir()
analysis_json_file = analysis_json_dir / "analysis.json"
legacy_payload = _build_analysis_json_payload(version="2.3.6", imports=["java.util.List"])
analysis_json_file.write_text(json.dumps(legacy_payload), encoding="utf-8")

with patch("cldk.analysis.java.codeanalyzer.codeanalyzer.subprocess.run") as run_mock:
code_analyzer = JCodeanalyzer(
project_dir=test_fixture,
source_code=None,
analysis_backend_path=codeanalyzer_jar_path,
analysis_json_path=analysis_json_dir,
analysis_level=AnalysisLevel.symbol_table,
eager_analysis=False,
target_files=None,
)
assert not run_mock.called
compilation_unit = next(iter(code_analyzer.application.symbol_table.values()))
assert compilation_unit.imports == ["java.util.List"]
assert isinstance(compilation_unit.import_declarations[0], JImport)
assert compilation_unit.import_declarations[0].path == "java.util.List"
assert compilation_unit.import_declarations[0].is_static is False
assert compilation_unit.import_declarations[0].is_wildcard is False


def test_get_codeanalyzer_exec(test_fixture, codeanalyzer_jar_path, analysis_json):
"""Should return the correct codeanalyzer location"""

Expand All @@ -128,7 +226,7 @@ def test_get_codeanalyzer_exec(test_fixture, codeanalyzer_jar_path, analysis_jso
code_analyzer.analysis_backend_path = None
jar_file = code_analyzer._get_codeanalyzer_exec()[-1]
exec_path = os.path.dirname(jar_file)
relative_path = exec_path.split("/cldk")[1]
relative_path = exec_path.rsplit("/cldk", 1)[1]
assert relative_path == "/analysis/java/codeanalyzer/jar"


Expand Down Expand Up @@ -800,6 +898,39 @@ def test_get_all_entrypoint_methods_in_application(test_fixture, codeanalyzer_ja
assert callable.is_entrypoint


def test_source_analysis_imports_disambiguate_static_and_wildcard(codeanalyzer_jar_path) -> None:
"""Should preserve static and wildcard import metadata for colliding import paths."""
source_code = "import static Foo.bar;\nimport Foo.bar.*;\nclass T {}"
code_analyzer = JCodeanalyzer(
project_dir=".",
source_code=source_code,
analysis_backend_path=codeanalyzer_jar_path,
analysis_json_path=None,
analysis_level=AnalysisLevel.symbol_table,
eager_analysis=False,
target_files=None,
)
symbol_table = code_analyzer.get_symbol_table()
assert len(symbol_table) == 1

compilation_unit = next(iter(symbol_table.values()))
assert compilation_unit.imports == ["Foo.bar", "Foo.bar"]

import_declarations = compilation_unit.import_declarations
assert len(import_declarations) == 2
assert all(isinstance(import_decl, JImport) for import_decl in import_declarations)
assert [import_decl.path for import_decl in import_declarations].count("Foo.bar") == 2

static_import = next((import_decl for import_decl in import_declarations if import_decl.is_static), None)
wildcard_import = next((import_decl for import_decl in import_declarations if import_decl.is_wildcard), None)
assert static_import is not None
assert wildcard_import is not None
assert static_import.path == "Foo.bar"
assert static_import.is_wildcard is False
assert wildcard_import.path == "Foo.bar"
assert wildcard_import.is_static is False


def test_get_all_entrypoint_classes_in_the_application(test_fixture, codeanalyzer_jar_path):
"""Should return all of the entrypoint classes in an application"""
code_analyzer = JCodeanalyzer(
Expand Down
Loading