From d89783da911077d12c42aa7869d42d06545193e2 Mon Sep 17 00:00:00 2001 From: shreddd Date: Fri, 3 Oct 2025 14:32:15 -0700 Subject: [PATCH 1/2] Enable properties / filters in geospatial search --- src/server.py | 49 ++++++++++++-- tests/test_api.py | 158 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+), 6 deletions(-) diff --git a/src/server.py b/src/server.py index a45d5b0..e72f793 100644 --- a/src/server.py +++ b/src/server.py @@ -1,3 +1,4 @@ +import json import logging from typing import Optional, Dict, Any, Union @@ -172,13 +173,18 @@ def find_nearby_entities( ..., ge=-180, le=180, description="Center longitude in degrees" ), radius_meters: float = Query(..., gt=0, description="Search radius in meters"), + filter_json: Optional[str] = Query( + None, description="Optional JSON string containing MongoDB-style filter criteria to refine search results" + ), ) -> EntitiesResponse: r"""Find entities within a specified radius of a geographic point using MongoDB's $near operator. This endpoint uses MongoDB's geospatial $near query which requires a 2dsphere index - on the coordinates field for optimal performance. + on the coordinates field for optimal performance. An optional filter_json parameter can be + provided as a JSON string to further refine the results. Example: /bertron/geo/nearby?latitude=47.6062&longitude=-122.3321&radius_meters=10000 + Example with filter: /bertron/geo/nearby?latitude=47.6062&longitude=-122.3321&radius_meters=10000&filter_json={"type":"sample"} """ db = mongo_client[cfg.mongo_database] @@ -205,8 +211,21 @@ def find_nearby_entities( } } - # Execute find with geospatial filter - cursor = collection.find(filter=geo_filter) + # Parse and combine with optional filter if provided + final_filter = geo_filter + if filter_json: + try: + additional_filter = json.loads(filter_json) + # Combine geospatial and additional filters using $and + final_filter = {"$and": [geo_filter, additional_filter]} + except json.JSONDecodeError: + raise HTTPException( + status_code=400, + detail="Invalid JSON format in filter_json parameter" + ) + + # Execute find with combined filter + cursor = collection.find(filter=final_filter) # Convert cursor to list and convert to Entity objects documents = list(cursor) @@ -234,13 +253,18 @@ def find_entities_in_bounding_box( northeast_lng: float = Query( ..., ge=-180, le=180, description="Northeast corner longitude" ), + filter_json: Optional[str] = Query( + None, description="Optional JSON string containing MongoDB-style filter criteria to refine search results" + ), ) -> EntitiesResponse: r"""Find entities within a bounding box using MongoDB's $geoWithin operator. This endpoint finds all entities whose coordinates fall within the specified - rectangular bounding box defined by southwest and northeast corners. + rectangular bounding box defined by southwest and northeast corners. An optional + filter_json parameter can be provided as a JSON string to further refine the results. Example: /bertron/geo/bbox?southwest_lat=47.5&southwest_lng=-122.4&northeast_lat=47.7&northeast_lng=-122.2 + Example with filter: /bertron/geo/bbox?southwest_lat=47.5&southwest_lng=-122.4&northeast_lat=47.7&northeast_lng=-122.2&filter_json={"type":"sample"} """ db = mongo_client[cfg.mongo_database] @@ -278,8 +302,21 @@ def find_entities_in_bounding_box( } } - # Execute find with geospatial filter - cursor = collection.find(filter=geo_filter) + # Parse and combine with optional filter if provided + final_filter = geo_filter + if filter_json: + try: + additional_filter = json.loads(filter_json) + # Combine geospatial and additional filters using $and + final_filter = {"$and": [geo_filter, additional_filter]} + except json.JSONDecodeError: + raise HTTPException( + status_code=400, + detail="Invalid JSON format in filter_json parameter" + ) + + # Execute find with combined filter + cursor = collection.find(filter=final_filter) # Convert cursor to list and convert to Entity objects documents = list(cursor) diff --git a/tests/test_api.py b/tests/test_api.py index 475649b..0980c08 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,4 +1,5 @@ from typing import Dict, Any +import json from fastapi.testclient import TestClient from pymongo.database import Database @@ -292,6 +293,163 @@ def test_geo_bounding_box_invalid_coordinates( error_data = response.json() assert "latitude" in error_data["detail"].lower() + def test_geo_nearby_search_with_filter(self, test_client: TestClient, seeded_db: Database): + """Test geographic nearby search with additional filter.""" + + # Search near the EMSL coordinates with a filter for EMSL data source + params = { + "latitude": 34.0, + "longitude": 118.0, + "radius_meters": 100000, # 100km radius + "filter_json": json.dumps({"ber_data_source": "EMSL"}) + } + + response = test_client.get("/bertron/geo/nearby", params=params) + + assert response.status_code == status.HTTP_200_OK + entities_data = response.json() + + assert "documents" in entities_data + assert "count" in entities_data + + # All returned entities should be from EMSL + for entity in entities_data["documents"]: + assert entity["ber_data_source"] == "EMSL" + self._verify_entity_structure(entity) + + # Should find at least one entity + assert entities_data["count"] > 0 + + def test_geo_nearby_search_with_invalid_filter_json(self, test_client: TestClient, seeded_db: Database): + """Test geographic nearby search with invalid JSON filter.""" + params = { + "latitude": 34.0, + "longitude": 118.0, + "radius_meters": 100000, + "filter_json": "invalid json {" # Invalid JSON + } + + response = test_client.get("/bertron/geo/nearby", params=params) + assert response.status_code == status.HTTP_400_BAD_REQUEST + + def test_geo_bbox_search_with_filter(self, test_client: TestClient, seeded_db: Database): + """Test geographic bounding box search with additional filter.""" + + # Bounding box around Alaska with filter for ESS-DIVE data source + params = { + "southwest_lat": 64.0, + "southwest_lng": -166.0, + "northeast_lat": 66.0, + "northeast_lng": -163.0, + "filter_json": json.dumps({"ber_data_source": "ESS-DIVE"}) + } + + response = test_client.get("/bertron/geo/bbox", params=params) + + assert response.status_code == status.HTTP_200_OK + entities_data = response.json() + + assert "documents" in entities_data + assert "count" in entities_data + + # All returned entities should be from ESS-DIVE and within bounding box + for entity in entities_data["documents"]: + assert entity["ber_data_source"] == "ESS-DIVE" + # Verify coordinates are within bounding box + lat = entity["coordinates"]["latitude"] + lng = entity["coordinates"]["longitude"] + assert 64.0 <= lat <= 66.0 + assert -166.0 <= lng <= -163.0 + self._verify_entity_structure(entity) + + def test_geo_bbox_search_with_empty_filter(self, test_client: TestClient, seeded_db: Database): + """Test geographic bounding box search with empty filter (should work like no filter).""" + + # Bounding box around Alaska with empty filter + params = { + "southwest_lat": 64.0, + "southwest_lng": -166.0, + "northeast_lat": 66.0, + "northeast_lng": -163.0, + "filter_json": json.dumps({}) # Empty filter + } + + response = test_client.get("/bertron/geo/bbox", params=params) + + assert response.status_code == status.HTTP_200_OK + entities_data = response.json() + + assert "documents" in entities_data + assert "count" in entities_data + + # Should find entities regardless of data source (empty filter = no additional restrictions) + for entity in entities_data["documents"]: + self._verify_entity_structure(entity) + + def test_geo_search_filter_with_complex_query(self, test_client: TestClient, seeded_db: Database): + """Test geospatial search with more complex filter query.""" + + # Test with a more complex filter using MongoDB operators + params = { + "latitude": 34.0, + "longitude": 118.0, + "radius_meters": 500000, # Larger radius to catch more entities + "filter_json": json.dumps({ + "entity_type": {"$in": ["sample", "study"]}, + "ber_data_source": {"$ne": "JGI"} # Exclude JGI data + }) + } + + response = test_client.get("/bertron/geo/nearby", params=params) + + assert response.status_code == status.HTTP_200_OK + entities_data = response.json() + + assert "documents" in entities_data + assert "count" in entities_data + + # Verify filter conditions are met + for entity in entities_data["documents"]: + # Should have entity_type containing "sample" or "study" + entity_types = entity.get("entity_type", []) + assert any(et in ["sample", "study"] for et in entity_types) + # Should not be from JGI + assert entity["ber_data_source"] != "JGI" + self._verify_entity_structure(entity) + + def test_geosearch_with_properties(self, test_client: TestClient, seeded_db: Database): + """Test searching entities by properties.""" + + # Search for entities with a specific property label + params = { + "latitude": 28.125842, + "longitude": -81.434174, + "radius_meters": 1000000, # 1000km radius to include NMDC entity + "filter_json": json.dumps({ + "properties.attribute.label": "depth", + "properties.numeric_value": 24 + }) + } + + response = test_client.get("/bertron/geo/nearby", params=params) + + assert response.status_code == status.HTTP_200_OK + entities_data = response.json() + + assert "documents" in entities_data + assert "count" in entities_data + + # Should find at least the NMDC entity with depth property + found_nmdc = False + for entity in entities_data["documents"]: + properties = [ prop["attribute"]["label"] for prop in entity.get("properties", []) ] + if "depth" in properties and entity["id"] == "nmdc:bsm-11-bsf8yq62": + found_nmdc = True + self._verify_entity_structure(entity) + + assert found_nmdc, "Should find NMDC entity with depth property" + + def _verify_entity_structure(self, entity: Dict[str, Any]): """Helper method to verify entity structure matches schema.""" required_fields = [ From 03bfae17a8732066871530a8b137b549de08af86 Mon Sep 17 00:00:00 2001 From: shreddd Date: Fri, 3 Oct 2025 17:51:47 -0700 Subject: [PATCH 2/2] format --- src/server.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/server.py b/src/server.py index e72f793..0089402 100644 --- a/src/server.py +++ b/src/server.py @@ -174,7 +174,8 @@ def find_nearby_entities( ), radius_meters: float = Query(..., gt=0, description="Search radius in meters"), filter_json: Optional[str] = Query( - None, description="Optional JSON string containing MongoDB-style filter criteria to refine search results" + None, + description="Optional JSON string containing MongoDB-style filter criteria to refine search results", ), ) -> EntitiesResponse: r"""Find entities within a specified radius of a geographic point using MongoDB's $near operator. @@ -220,8 +221,8 @@ def find_nearby_entities( final_filter = {"$and": [geo_filter, additional_filter]} except json.JSONDecodeError: raise HTTPException( - status_code=400, - detail="Invalid JSON format in filter_json parameter" + status_code=400, + detail="Invalid JSON format in filter_json parameter", ) # Execute find with combined filter @@ -254,13 +255,14 @@ def find_entities_in_bounding_box( ..., ge=-180, le=180, description="Northeast corner longitude" ), filter_json: Optional[str] = Query( - None, description="Optional JSON string containing MongoDB-style filter criteria to refine search results" + None, + description="Optional JSON string containing MongoDB-style filter criteria to refine search results", ), ) -> EntitiesResponse: r"""Find entities within a bounding box using MongoDB's $geoWithin operator. This endpoint finds all entities whose coordinates fall within the specified - rectangular bounding box defined by southwest and northeast corners. An optional + rectangular bounding box defined by southwest and northeast corners. An optional filter_json parameter can be provided as a JSON string to further refine the results. Example: /bertron/geo/bbox?southwest_lat=47.5&southwest_lng=-122.4&northeast_lat=47.7&northeast_lng=-122.2 @@ -311,8 +313,8 @@ def find_entities_in_bounding_box( final_filter = {"$and": [geo_filter, additional_filter]} except json.JSONDecodeError: raise HTTPException( - status_code=400, - detail="Invalid JSON format in filter_json parameter" + status_code=400, + detail="Invalid JSON format in filter_json parameter", ) # Execute find with combined filter