diff --git a/docs/src/content/docs/core-concepts/plugin-system.mdx b/docs/src/content/docs/core-concepts/plugin-system.mdx index 1d497da..6291327 100644 --- a/docs/src/content/docs/core-concepts/plugin-system.mdx +++ b/docs/src/content/docs/core-concepts/plugin-system.mdx @@ -24,6 +24,10 @@ kit comes with built-in support for 12+ programming languages: - **Dart** (`.dart`) - Classes, functions, mixins, enums, extensions - **HCL/Terraform** (`.hcl`, `.tf`) - Resources, variables, modules - **Haskell** (`.hs`) - Module header, functions (including lambda-binds), common type-level declarations +- **Swift** (`.swift`) - Classes, structs, enums, protocols, actors, extensions, functions +- **Bash** (`.sh`, `.bash`) - Function definitions +- **YAML** (`.yaml`, `.yml`) - Top-level mapping keys +- **TOML** (`.toml`) - Tables, array tables Each language supports comprehensive symbol extraction including: - **Classes and interfaces** with inheritance relationships diff --git a/src/kit/queries/bash/tags.scm b/src/kit/queries/bash/tags.scm new file mode 100644 index 0000000..702962a --- /dev/null +++ b/src/kit/queries/bash/tags.scm @@ -0,0 +1,5 @@ +;; tags.scm for Bash symbol extraction (tree-sitter-bash) + +; Function definitions (covers both "function name()" and "name()" syntax) +(function_definition + name: (word) @name) @definition.function diff --git a/src/kit/queries/swift/tags.scm b/src/kit/queries/swift/tags.scm new file mode 100644 index 0000000..59650d5 --- /dev/null +++ b/src/kit/queries/swift/tags.scm @@ -0,0 +1,42 @@ +;; tags.scm for Swift symbol extraction (tree-sitter-swift) + +; Function declarations +(function_declaration + name: (simple_identifier) @name) @definition.function + +; Class declarations (keyword-differentiated from struct/enum/extension) +(class_declaration + "class" + name: (type_identifier) @name) @definition.class + +; Actor declarations +(class_declaration + "actor" + name: (type_identifier) @name) @definition.actor + +; Struct declarations +(class_declaration + "struct" + name: (type_identifier) @name) @definition.struct + +; Enum declarations +(class_declaration + "enum" + name: (type_identifier) @name) @definition.enum + +; Extension declarations (name field is user_type, not type_identifier) +(class_declaration + "extension" + name: (user_type) @name) @definition.extension + +; Protocol declarations +(protocol_declaration + name: (type_identifier) @name) @definition.protocol + +; Type alias declarations +(typealias_declaration + name: (type_identifier) @name) @definition.typealias + +; Initializer declarations +(init_declaration + "init" @name) @definition.initializer diff --git a/src/kit/queries/toml/tags.scm b/src/kit/queries/toml/tags.scm new file mode 100644 index 0000000..c3a1e53 --- /dev/null +++ b/src/kit/queries/toml/tags.scm @@ -0,0 +1,19 @@ +;; tags.scm for TOML symbol extraction (tree-sitter-toml) + +; Table headers with bare key: [section] +(table (bare_key) @name) @definition.table + +; Table headers with dotted key: [section.subsection] +(table (dotted_key) @name) @definition.table + +; Table headers with quoted key: ["section.name"] +(table (quoted_key) @name) @definition.table + +; Array table headers with bare key: [[array]] +(table_array_element (bare_key) @name) @definition.table_array + +; Array table headers with dotted key: [[parent.array]] +(table_array_element (dotted_key) @name) @definition.table_array + +; Array table headers with quoted key: [["array.name"]] +(table_array_element (quoted_key) @name) @definition.table_array diff --git a/src/kit/queries/yaml/tags.scm b/src/kit/queries/yaml/tags.scm new file mode 100644 index 0000000..19749c1 --- /dev/null +++ b/src/kit/queries/yaml/tags.scm @@ -0,0 +1,10 @@ +;; tags.scm for YAML symbol extraction (tree-sitter-yaml) +;; Only captures top-level mapping keys (direct children of document root). +;; Use the full mapping pair as the definition so symbol spans/code include values. + +(stream + (document + (block_node + (block_mapping + (block_mapping_pair + key: (flow_node (_) @name)) @definition.key)))) diff --git a/src/kit/tree_sitter_symbol_extractor.py b/src/kit/tree_sitter_symbol_extractor.py index 273346f..0464100 100644 --- a/src/kit/tree_sitter_symbol_extractor.py +++ b/src/kit/tree_sitter_symbol_extractor.py @@ -34,6 +34,12 @@ ".hxx": "cpp", ".zig": "zig", ".cs": "csharp", + ".swift": "swift", + ".sh": "bash", + ".bash": "bash", + ".yaml": "yaml", + ".yml": "yaml", + ".toml": "toml", } @@ -350,11 +356,23 @@ def reset_plugins(cls) -> None: ".hxx": "cpp", ".zig": "zig", ".cs": "csharp", + ".swift": "swift", + ".sh": "bash", + ".bash": "bash", + ".yaml": "yaml", + ".yml": "yaml", + ".toml": "toml", } LANGUAGES.clear() LANGUAGES.update(original_languages) cls.LANGUAGES = set(LANGUAGES.keys()) + @staticmethod + def _strip_wrapping_quotes(text: str) -> str: + if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}: + return text[1:-1] + return text + @staticmethod def extract_symbols(ext: str, source_code: str) -> List[Dict[str, Any]]: """Extracts symbols from source code using tree-sitter queries.""" @@ -454,10 +472,13 @@ def extract_symbols(ext: str, source_code: str) -> List[Dict[str, Any]]: if hasattr(actual_name_node, "text") and actual_name_node.text else str(actual_name_node) ) - # HCL: Strip quotes from string literals - if ext == ".tf" and hasattr(actual_name_node, "type") and actual_name_node.type == "string_lit": - if len(symbol_name) >= 2 and symbol_name.startswith('"') and symbol_name.endswith('"'): - symbol_name = symbol_name[1:-1] + node_type = actual_name_node.type if hasattr(actual_name_node, "type") else None + if ( + (ext == ".tf" and node_type == "string_lit") + or (ext == ".toml" and node_type == "quoted_key") + or (ext in {".yaml", ".yml"} and node_type in {"double_quote_scalar", "single_quote_scalar"}) + ): + symbol_name = TreeSitterSymbolExtractor._strip_wrapping_quotes(symbol_name) definition_capture = next( ((name, node) for name, node in captures.items() if name.startswith("definition.")), None diff --git a/tests/test_bash_symbols.py b/tests/test_bash_symbols.py new file mode 100644 index 0000000..5cdf480 --- /dev/null +++ b/tests/test_bash_symbols.py @@ -0,0 +1,57 @@ +import pytest + +from kit.tree_sitter_symbol_extractor import TreeSitterSymbolExtractor + +BASH_SAMPLE = """\ +function greet() { + echo "Hello, $1!" +} + +say_hi() { + echo "Hi there" +} +""" + + +def test_bash_parser_and_query_available(): + parser = TreeSitterSymbolExtractor.get_parser(".sh") + query = TreeSitterSymbolExtractor.get_query(".sh") + if not parser or not query: + pytest.skip("Bash parser or query not available in this environment") + + tree = parser.parse(BASH_SAMPLE.encode("utf-8")) + assert tree.root_node is not None + + +def test_bash_symbols(): + parser = TreeSitterSymbolExtractor.get_parser(".sh") + query = TreeSitterSymbolExtractor.get_query(".sh") + if not parser or not query: + pytest.skip("Bash parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".sh", BASH_SAMPLE) + names = {s["name"] for s in symbols} + + assert "greet" in names + assert "say_hi" in names + assert all(s["type"] == "function" for s in symbols) + + +def test_bash_extensions(): + supported = TreeSitterSymbolExtractor.list_supported_languages() + assert "bash" in supported + assert ".sh" in supported["bash"] + assert ".bash" in supported["bash"] + + +def test_bash_extension_extracts_symbols(): + parser = TreeSitterSymbolExtractor.get_parser(".bash") + query = TreeSitterSymbolExtractor.get_query(".bash") + if not parser or not query: + pytest.skip("Bash parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".bash", BASH_SAMPLE) + names = {s["name"] for s in symbols} + + assert "greet" in names + assert "say_hi" in names diff --git a/tests/test_swift_symbols.py b/tests/test_swift_symbols.py new file mode 100644 index 0000000..3e11952 --- /dev/null +++ b/tests/test_swift_symbols.py @@ -0,0 +1,92 @@ +import pytest + +from kit.tree_sitter_symbol_extractor import TreeSitterSymbolExtractor + +SWIFT_SAMPLE = """\ +class Animal { + var name: String + init(name: String) { + self.name = name + } +} + +struct Point { + var x: Int + var y: Int +} + +enum Direction { + case north, south, east, west +} + +extension Animal { + func speak() -> String { + return name + } +} + +actor Worker { + func run() {} +} + +protocol Drawable { + func draw() +} + +typealias StringMap = [String: String] + +func greet(person: String) -> String { + return "Hello, \\(person)!" +} +""" + + +def test_swift_parser_and_query_available(): + parser = TreeSitterSymbolExtractor.get_parser(".swift") + query = TreeSitterSymbolExtractor.get_query(".swift") + if not parser or not query: + pytest.skip("Swift parser or query not available in this environment") + + tree = parser.parse(SWIFT_SAMPLE.encode("utf-8")) + assert tree.root_node is not None + + +def test_swift_symbols(): + parser = TreeSitterSymbolExtractor.get_parser(".swift") + query = TreeSitterSymbolExtractor.get_query(".swift") + if not parser or not query: + pytest.skip("Swift parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".swift", SWIFT_SAMPLE) + names = {s["name"] for s in symbols} + types = {s["type"] for s in symbols} + animal_symbols = [s for s in symbols if s["name"] == "Animal"] + + # All 9 symbol types + assert "Animal" in names + assert "Point" in names + assert "Direction" in names + assert "Worker" in names + assert "Drawable" in names + assert "StringMap" in names + assert "greet" in names + assert "init" in names + + assert len(animal_symbols) == 2 + assert {s["type"] for s in animal_symbols} == {"class", "extension"} + + assert "class" in types + assert "actor" in types + assert "struct" in types + assert "enum" in types + assert "extension" in types + assert "protocol" in types + assert "typealias" in types + assert "function" in types + assert "initializer" in types + + +def test_swift_in_supported_languages(): + supported = TreeSitterSymbolExtractor.list_supported_languages() + assert "swift" in supported + assert ".swift" in supported["swift"] diff --git a/tests/test_symbol_extraction_multilang.py b/tests/test_symbol_extraction_multilang.py index 7776728..16e0016 100644 --- a/tests/test_symbol_extraction_multilang.py +++ b/tests/test_symbol_extraction_multilang.py @@ -9,6 +9,10 @@ ".java": "class Bar { void foo() {} }\n", ".rs": "fn foo() {}\nstruct Bar;\n", ".zig": "pub fn foo() void {}\npub const Bar = struct {};\n", + ".swift": "func foo() -> Int { return 42 }\nclass Bar {}\n", + ".sh": "function foo() { echo hello; }\n", + ".yaml": "foo: bar\nbaz: 1\n", + ".toml": "[foo]\nbar = 1\n", } @@ -90,8 +94,7 @@ def test_symbol_code_contains_full_body(ext: str, code: str): # The code field should contain more than just the function name assert len(func_code) > len(func_name), ( - f"Code field for {ext} only contains name '{func_name}', expected full function body. " - f"Got: '{func_code}'" + f"Code field for {ext} only contains name '{func_name}', expected full function body. Got: '{func_code}'" ) # The code should contain the function keyword or definition diff --git a/tests/test_toml_symbols.py b/tests/test_toml_symbols.py new file mode 100644 index 0000000..1df6a5c --- /dev/null +++ b/tests/test_toml_symbols.py @@ -0,0 +1,88 @@ +import pytest + +from kit.tree_sitter_symbol_extractor import TreeSitterSymbolExtractor + +TOML_SAMPLE = """\ +[package] +name = "my-app" +version = "1.0.0" + +[dependencies] +serde = "1.0" + +[build.settings] +opt-level = 2 + +[[bin]] +name = "main" +path = "src/main.rs" + +[[test]] +name = "integration" +""" + +TOML_QUOTED_SAMPLE = """\ +["foo.bar"] +value = 1 + +[["bin.name"]] +name = "main" + +['lit'] +value = 2 +""" + + +def test_toml_parser_and_query_available(): + parser = TreeSitterSymbolExtractor.get_parser(".toml") + query = TreeSitterSymbolExtractor.get_query(".toml") + if not parser or not query: + pytest.skip("TOML parser or query not available in this environment") + + tree = parser.parse(TOML_SAMPLE.encode("utf-8")) + assert tree.root_node is not None + + +def test_toml_symbols(): + parser = TreeSitterSymbolExtractor.get_parser(".toml") + query = TreeSitterSymbolExtractor.get_query(".toml") + if not parser or not query: + pytest.skip("TOML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".toml", TOML_SAMPLE) + names = {s["name"] for s in symbols} + types = {s["type"] for s in symbols} + + # Table headers + assert "package" in names + assert "dependencies" in names + + # Dotted table header + assert "build.settings" in names + + # Array tables + assert "bin" in names + assert "test" in names + + assert "table" in types + assert "table_array" in types + + +def test_toml_quoted_table_names_are_normalized(): + parser = TreeSitterSymbolExtractor.get_parser(".toml") + query = TreeSitterSymbolExtractor.get_query(".toml") + if not parser or not query: + pytest.skip("TOML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".toml", TOML_QUOTED_SAMPLE) + names = {s["name"] for s in symbols} + + assert "foo.bar" in names + assert "bin.name" in names + assert "lit" in names + + +def test_toml_in_supported_languages(): + supported = TreeSitterSymbolExtractor.list_supported_languages() + assert "toml" in supported + assert ".toml" in supported["toml"] diff --git a/tests/test_tree_sitter_languages.py b/tests/test_tree_sitter_languages.py index f08160a..51ead25 100644 --- a/tests/test_tree_sitter_languages.py +++ b/tests/test_tree_sitter_languages.py @@ -12,6 +12,10 @@ "c": b"int foo() { return 42; }\n", "dart": b"int foo() { return 42; }\n", "zig": b"pub fn foo() void { }\n", + "swift": b"func foo() -> Int { return 42 }\n", + "bash": b"function foo() { echo hello; }\n", + "yaml": b"key: value\n", + "toml": b'[package]\nname = "test"\n', } diff --git a/tests/test_yaml_symbols.py b/tests/test_yaml_symbols.py new file mode 100644 index 0000000..f9f1614 --- /dev/null +++ b/tests/test_yaml_symbols.py @@ -0,0 +1,85 @@ +import pytest + +from kit.tree_sitter_symbol_extractor import TreeSitterSymbolExtractor + +YAML_SAMPLE = """\ +name: my-app +version: 1.0.0 +database: + host: localhost + port: 5432 +logging: + level: info + format: json +""" + +YAML_QUOTED_SAMPLE = """\ +"foo.bar": 1 +'quoted': 2 +""" + + +def test_yaml_parser_and_query_available(): + parser = TreeSitterSymbolExtractor.get_parser(".yaml") + query = TreeSitterSymbolExtractor.get_query(".yaml") + if not parser or not query: + pytest.skip("YAML parser or query not available in this environment") + + tree = parser.parse(YAML_SAMPLE.encode("utf-8")) + assert tree.root_node is not None + + +def test_yaml_top_level_keys_only(): + parser = TreeSitterSymbolExtractor.get_parser(".yaml") + query = TreeSitterSymbolExtractor.get_query(".yaml") + if not parser or not query: + pytest.skip("YAML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".yaml", YAML_SAMPLE) + names = {s["name"] for s in symbols} + + # Top-level keys should be captured + assert "name" in names + assert "version" in names + assert "database" in names + assert "logging" in names + + # Nested keys should NOT be captured + assert "host" not in names + assert "port" not in names + assert "level" not in names + assert "format" not in names + + +def test_yaml_symbol_code_uses_full_mapping_pair(): + parser = TreeSitterSymbolExtractor.get_parser(".yaml") + query = TreeSitterSymbolExtractor.get_query(".yaml") + if not parser or not query: + pytest.skip("YAML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".yaml", YAML_SAMPLE) + database_symbol = next(s for s in symbols if s["name"] == "database") + + assert database_symbol["code"].startswith("database:") + assert "host: localhost" in database_symbol["code"] + assert database_symbol["end_line"] > database_symbol["start_line"] + + +def test_yaml_quoted_keys_and_yml_extension(): + parser = TreeSitterSymbolExtractor.get_parser(".yml") + query = TreeSitterSymbolExtractor.get_query(".yml") + if not parser or not query: + pytest.skip("YAML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".yml", YAML_QUOTED_SAMPLE) + names = {s["name"] for s in symbols} + + assert "foo.bar" in names + assert "quoted" in names + + +def test_yaml_extensions(): + supported = TreeSitterSymbolExtractor.list_supported_languages() + assert "yaml" in supported + assert ".yaml" in supported["yaml"] + assert ".yml" in supported["yaml"]