diff --git a/.claude/commands/implement-feature.md b/.claude/commands/implement-feature.md new file mode 100644 index 000000000..33302a4fd --- /dev/null +++ b/.claude/commands/implement-feature.md @@ -0,0 +1,7 @@ +You will be implementing a new feature in this codebase + +$ARGUMENTS + +IMPORTANT: Only do this for front-end features. +Once this feature is built, make sure to write the changes you made to file called frontend-changes.md +Do not ask for permissions to modify this file, assume you can always do it. \ No newline at end of file diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 000000000..532d92e2b --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,8 @@ +{ + "permissions": { + "allow": [ + "Bash(python3:*)", + "Bash(uv run:*)" + ] + } +} diff --git a/.env.example b/.env.example deleted file mode 100644 index 18b34cb7e..000000000 --- a/.env.example +++ /dev/null @@ -1,2 +0,0 @@ -# Copy this file to .env and add your actual API key -ANTHROPIC_API_KEY=your-anthropic-api-key-here \ No newline at end of file diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 000000000..1f38a84d9 --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,58 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + issues: write + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # Optional: Customize the trigger phrase (default: @claude) + # trigger_phrase: "/claude" + + # Optional: Trigger when specific user is assigned to an issue + # assignee_trigger: "claude-bot" + + # Optional: Configure Claude's behavior with CLI arguments + # claude_args: | + # --model claude-opus-4-1-20250805 + # --max-turns 10 + # --allowedTools "Bash(npm install),Bash(npm run build),Bash(npm run test:*),Bash(npm run lint:*)" + # --system-prompt "Follow our coding standards. Ensure all new code has tests. Use TypeScript for new files." + + # Optional: Advanced settings configuration + # settings: | + # { + # "env": { + # "NODE_ENV": "test" + # } + # } \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..e83856442 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,87 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +A full-stack RAG (Retrieval-Augmented Generation) chatbot that enables semantic search and AI-powered Q&A over course documents. Uses ChromaDB for vector storage, sentence-transformers for embeddings, and Anthropic Claude for response generation. + +## Setup + +Requires Python 3.13+, `uv` package manager, and an Anthropic API key. + +```bash +uv sync +cp .env.example .env # then add your ANTHROPIC_API_KEY +``` + +## Running + +```bash +# Quick start +./run.sh + +# Manual (from repo root) +cd backend && uv run uvicorn app:app --reload --port 8000 +``` + +Access the app at `http://localhost:8000`, API docs at `http://localhost:8000/docs`. + +On startup, `app.py` auto-loads all `.txt` files from `../docs/` into ChromaDB. + +## Architecture + +**Request flow:** + +``` +Frontend (frontend/) → POST /api/query → RAGSystem.query() + → ai_generator (Claude with tools) → search_tools (if needed) + → vector_store (ChromaDB semantic search) → response to frontend +``` + +**Backend modules** (`backend/`): + +| File | Role | +|------|------| +| `app.py` | FastAPI entry point; mounts frontend as static files; startup doc loading | +| `rag_system.py` | Orchestrator — wires all components together for a query | +| `document_processor.py` | Parses structured `.txt` course files into chunks | +| `vector_store.py` | ChromaDB wrapper; two collections: `course_catalog` and `course_content` | +| `ai_generator.py` | Anthropic Claude wrapper with tool-calling support | +| `search_tools.py` | Tool definitions and execution (`search_course_content`) | +| `session_manager.py` | In-memory conversation history (max 2 exchanges) | +| `models.py` | Pydantic models: `Course`, `Lesson`, `CourseChunk` | +| `config.py` | All configuration via `Config` dataclass (model, chunk size, paths, etc.) | + +**Frontend** (`frontend/`): Vanilla HTML/CSS/JS SPA; uses `marked.js` from CDN for markdown rendering; chat UI with collapsible course stats sidebar. + +**Course document format** (files in `docs/`): +``` +Course Title: [name] +Course Link: [url] +Course Instructor: [name] + +Lesson 0: [title] +Lesson Link: [url] +[content...] +``` + +## Key Configuration (`backend/config.py`) + +- `ANTHROPIC_MODEL`: `claude-sonnet-4-20250514` +- `EMBEDDING_MODEL`: `all-MiniLM-L6-v2` (384-dim, via sentence-transformers) +- `CHUNK_SIZE` / `CHUNK_OVERLAP`: 800 / 100 characters +- `MAX_RESULTS`: 5 search results returned +- `MAX_HISTORY`: 2 conversation exchanges retained +- `CHROMA_PATH`: `./chroma_db` (persistent, relative to `backend/`) + +## Dependencies + +Managed via `uv`. Key packages: `fastapi`, `uvicorn`, `chromadb`, `anthropic`, `sentence-transformers`, `python-dotenv`. Lock file is `uv.lock`. + +## Notes + +- No test framework or linting tools are configured. +- ChromaDB persists to `backend/chroma_db/` — delete this directory to reset the vector store. +- The backend serves the frontend as static files; no separate frontend build step. +- Windows users must use Git Bash (not PowerShell/CMD) to run `run.sh`. diff --git a/backend/ai_generator.py b/backend/ai_generator.py index 0363ca90c..c22d6af66 100644 --- a/backend/ai_generator.py +++ b/backend/ai_generator.py @@ -1,21 +1,24 @@ import anthropic from typing import List, Optional, Dict, Any + class AIGenerator: """Handles interactions with Anthropic's Claude API for generating responses""" - + # Static system prompt to avoid rebuilding on each call SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information. Search Tool Usage: -- Use the search tool **only** for questions about specific course content or detailed educational materials -- **One search per query maximum** -- Synthesize search results into accurate, fact-based responses -- If search yields no results, state this clearly without offering alternatives +- Use `search_course_content` only for questions about specific course content or detailed educational materials +- Use `get_course_outline` for any question asking about a course's structure, outline, syllabus, or list of lessons +- One tool call per query maximum +- Synthesize results into accurate, fact-based responses +- If a tool yields no results, state this clearly without offering alternatives Response Protocol: - **General knowledge questions**: Answer using existing knowledge without searching -- **Course-specific questions**: Search first, then answer +- **Course-specific content questions**: Use `search_course_content`, then answer +- **Course outline / structure questions**: Use `get_course_outline` and return the course title, course link, and the number and title of each lesson - **No meta-commentary**: - Provide direct answers only — no reasoning process, search explanations, or question-type analysis - Do not mention "based on the search results" @@ -28,108 +31,110 @@ class AIGenerator: 4. **Example-supported** - Include relevant examples when they aid understanding Provide only the direct answer to what was asked. """ - + def __init__(self, api_key: str, model: str): self.client = anthropic.Anthropic(api_key=api_key) self.model = model - + # Pre-build base API parameters - self.base_params = { - "model": self.model, - "temperature": 0, - "max_tokens": 800 - } - - def generate_response(self, query: str, - conversation_history: Optional[str] = None, - tools: Optional[List] = None, - tool_manager=None) -> str: + self.base_params = {"model": self.model, "temperature": 0, "max_tokens": 800} + + def generate_response( + self, + query: str, + conversation_history: Optional[str] = None, + tools: Optional[List] = None, + tool_manager=None, + ) -> str: """ Generate AI response with optional tool usage and conversation context. - + Args: query: The user's question or request conversation_history: Previous messages for context tools: Available tools the AI can use tool_manager: Manager to execute tools - + Returns: Generated response as string """ - + # Build system content efficiently - avoid string ops when possible system_content = ( f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}" - if conversation_history + if conversation_history else self.SYSTEM_PROMPT ) - + # Prepare API call parameters efficiently api_params = { **self.base_params, "messages": [{"role": "user", "content": query}], - "system": system_content + "system": system_content, } - + # Add tools if available if tools: api_params["tools"] = tools api_params["tool_choice"] = {"type": "auto"} - + # Get response from Claude response = self.client.messages.create(**api_params) - + # Handle tool execution if needed if response.stop_reason == "tool_use" and tool_manager: return self._handle_tool_execution(response, api_params, tool_manager) - + # Return direct response return response.content[0].text - - def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any], tool_manager): + + def _handle_tool_execution( + self, initial_response, base_params: Dict[str, Any], tool_manager + ): """ Handle execution of tool calls and get follow-up response. - + Args: initial_response: The response containing tool use requests base_params: Base API parameters tool_manager: Manager to execute tools - + Returns: Final response text after tool execution """ # Start with existing messages messages = base_params["messages"].copy() - + # Add AI's tool use response messages.append({"role": "assistant", "content": initial_response.content}) - + # Execute all tool calls and collect results tool_results = [] for content_block in initial_response.content: if content_block.type == "tool_use": tool_result = tool_manager.execute_tool( - content_block.name, - **content_block.input + content_block.name, **content_block.input + ) + + tool_results.append( + { + "type": "tool_result", + "tool_use_id": content_block.id, + "content": tool_result, + } ) - - tool_results.append({ - "type": "tool_result", - "tool_use_id": content_block.id, - "content": tool_result - }) - + # Add tool results as single message if tool_results: messages.append({"role": "user", "content": tool_results}) - + # Prepare final API call without tools final_params = { **self.base_params, "messages": messages, - "system": base_params["system"] + "system": base_params["system"], } - + # Get final response final_response = self.client.messages.create(**final_params) - return final_response.content[0].text \ No newline at end of file + return final_response.content[0].text diff --git a/backend/app.py b/backend/app.py index 5a69d741d..d368b14cd 100644 --- a/backend/app.py +++ b/backend/app.py @@ -1,4 +1,5 @@ import warnings + warnings.filterwarnings("ignore", message="resource_tracker: There appear to be.*") from fastapi import FastAPI, HTTPException @@ -6,7 +7,7 @@ from fastapi.staticfiles import StaticFiles from fastapi.middleware.trustedhost import TrustedHostMiddleware from pydantic import BaseModel -from typing import List, Optional +from typing import List, Optional, Dict, Any import os from config import config @@ -16,10 +17,7 @@ app = FastAPI(title="Course Materials RAG System", root_path="") # Add trusted host middleware for proxy -app.add_middleware( - TrustedHostMiddleware, - allowed_hosts=["*"] -) +app.add_middleware(TrustedHostMiddleware, allowed_hosts=["*"]) # Enable CORS with proper settings for proxy app.add_middleware( @@ -34,25 +32,33 @@ # Initialize RAG system rag_system = RAGSystem(config) + # Pydantic models for request/response class QueryRequest(BaseModel): """Request model for course queries""" + query: str session_id: Optional[str] = None + class QueryResponse(BaseModel): """Response model for course queries""" + answer: str - sources: List[str] + sources: List[Dict[str, Any]] session_id: str + class CourseStats(BaseModel): """Response model for course statistics""" + total_courses: int course_titles: List[str] + # API Endpoints + @app.post("/api/query", response_model=QueryResponse) async def query_documents(request: QueryRequest): """Process a query and return response with sources""" @@ -61,18 +67,15 @@ async def query_documents(request: QueryRequest): session_id = request.session_id if not session_id: session_id = rag_system.session_manager.create_session() - + # Process query using RAG system answer, sources = rag_system.query(request.query, session_id) - - return QueryResponse( - answer=answer, - sources=sources, - session_id=session_id - ) + + return QueryResponse(answer=answer, sources=sources, session_id=session_id) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + @app.get("/api/courses", response_model=CourseStats) async def get_course_stats(): """Get course analytics and statistics""" @@ -80,11 +83,22 @@ async def get_course_stats(): analytics = rag_system.get_course_analytics() return CourseStats( total_courses=analytics["total_courses"], - course_titles=analytics["course_titles"] + course_titles=analytics["course_titles"], ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + +@app.delete("/api/session/{session_id}") +async def clear_session(session_id: str): + """Clear conversation history for a session""" + try: + rag_system.session_manager.clear_session(session_id) + return {"success": True, "session_id": session_id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.on_event("startup") async def startup_event(): """Load initial documents on startup""" @@ -92,11 +106,14 @@ async def startup_event(): if os.path.exists(docs_path): print("Loading initial documents...") try: - courses, chunks = rag_system.add_course_folder(docs_path, clear_existing=False) + courses, chunks = rag_system.add_course_folder( + docs_path, clear_existing=False + ) print(f"Loaded {courses} courses with {chunks} chunks") except Exception as e: print(f"Error loading documents: {e}") + # Custom static file handler with no-cache headers for development from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse @@ -113,7 +130,10 @@ async def get_response(self, path: str, scope): response.headers["Pragma"] = "no-cache" response.headers["Expires"] = "0" return response - - + + # Serve static files for the frontend -app.mount("/", StaticFiles(directory="../frontend", html=True), name="static") \ No newline at end of file +FRONTEND_DIR = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "..", "frontend" +) +app.mount("/", DevStaticFiles(directory=FRONTEND_DIR, html=True), name="static") diff --git a/backend/config.py b/backend/config.py index d9f6392ef..7379e7133 100644 --- a/backend/config.py +++ b/backend/config.py @@ -5,25 +5,26 @@ # Load environment variables from .env file load_dotenv() + @dataclass class Config: """Configuration settings for the RAG system""" + # Anthropic API settings ANTHROPIC_API_KEY: str = os.getenv("ANTHROPIC_API_KEY", "") ANTHROPIC_MODEL: str = "claude-sonnet-4-20250514" - + # Embedding model settings EMBEDDING_MODEL: str = "all-MiniLM-L6-v2" - + # Document processing settings - CHUNK_SIZE: int = 800 # Size of text chunks for vector storage - CHUNK_OVERLAP: int = 100 # Characters to overlap between chunks - MAX_RESULTS: int = 5 # Maximum search results to return - MAX_HISTORY: int = 2 # Number of conversation messages to remember - + CHUNK_SIZE: int = 800 # Size of text chunks for vector storage + CHUNK_OVERLAP: int = 100 # Characters to overlap between chunks + MAX_RESULTS: int = 5 # Maximum search results to return + MAX_HISTORY: int = 2 # Number of conversation messages to remember + # Database paths CHROMA_PATH: str = "./chroma_db" # ChromaDB storage location -config = Config() - +config = Config() diff --git a/backend/document_processor.py b/backend/document_processor.py index 266e85904..32c6648ae 100644 --- a/backend/document_processor.py +++ b/backend/document_processor.py @@ -3,81 +3,84 @@ from typing import List, Tuple from models import Course, Lesson, CourseChunk + class DocumentProcessor: """Processes course documents and extracts structured information""" - + def __init__(self, chunk_size: int, chunk_overlap: int): self.chunk_size = chunk_size self.chunk_overlap = chunk_overlap - + def read_file(self, file_path: str) -> str: """Read content from file with UTF-8 encoding""" try: - with open(file_path, 'r', encoding='utf-8') as file: + with open(file_path, "r", encoding="utf-8") as file: return file.read() except UnicodeDecodeError: # If UTF-8 fails, try with error handling - with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: + with open(file_path, "r", encoding="utf-8", errors="ignore") as file: return file.read() - - def chunk_text(self, text: str) -> List[str]: """Split text into sentence-based chunks with overlap using config settings""" - + # Clean up the text - text = re.sub(r'\s+', ' ', text.strip()) # Normalize whitespace - + text = re.sub(r"\s+", " ", text.strip()) # Normalize whitespace + # Better sentence splitting that handles abbreviations # This regex looks for periods followed by whitespace and capital letters # but ignores common abbreviations - sentence_endings = re.compile(r'(? self.chunk_size and current_chunk: break - + current_chunk.append(sentence) current_size += total_addition - + # Add chunk if we have content if current_chunk: - chunks.append(' '.join(current_chunk)) - + chunks.append(" ".join(current_chunk)) + # Calculate overlap for next chunk - if hasattr(self, 'chunk_overlap') and self.chunk_overlap > 0: + if hasattr(self, "chunk_overlap") and self.chunk_overlap > 0: # Find how many sentences to overlap overlap_size = 0 overlap_sentences = 0 - + # Count backwards from end of current chunk for k in range(len(current_chunk) - 1, -1, -1): - sentence_len = len(current_chunk[k]) + (1 if k < len(current_chunk) - 1 else 0) + sentence_len = len(current_chunk[k]) + ( + 1 if k < len(current_chunk) - 1 else 0 + ) if overlap_size + sentence_len <= self.chunk_overlap: overlap_size += sentence_len overlap_sentences += 1 else: break - + # Move start position considering overlap next_start = i + len(current_chunk) - overlap_sentences i = max(next_start, i + 1) # Ensure we make progress @@ -87,14 +90,12 @@ def chunk_text(self, text: str) -> List[str]: else: # No sentences fit, move to next i += 1 - - return chunks - - + return chunks - - def process_course_document(self, file_path: str) -> Tuple[Course, List[CourseChunk]]: + def process_course_document( + self, file_path: str + ) -> Tuple[Course, List[CourseChunk]]: """ Process a course document with expected format: Line 1: Course Title: [title] @@ -104,47 +105,51 @@ def process_course_document(self, file_path: str) -> Tuple[Course, List[CourseCh """ content = self.read_file(file_path) filename = os.path.basename(file_path) - - lines = content.strip().split('\n') - + + lines = content.strip().split("\n") + # Extract course metadata from first three lines course_title = filename # Default fallback course_link = None instructor_name = "Unknown" - + # Parse course title from first line if len(lines) >= 1 and lines[0].strip(): - title_match = re.match(r'^Course Title:\s*(.+)$', lines[0].strip(), re.IGNORECASE) + title_match = re.match( + r"^Course Title:\s*(.+)$", lines[0].strip(), re.IGNORECASE + ) if title_match: course_title = title_match.group(1).strip() else: course_title = lines[0].strip() - + # Parse remaining lines for course metadata for i in range(1, min(len(lines), 4)): # Check first 4 lines for metadata line = lines[i].strip() if not line: continue - + # Try to match course link - link_match = re.match(r'^Course Link:\s*(.+)$', line, re.IGNORECASE) + link_match = re.match(r"^Course Link:\s*(.+)$", line, re.IGNORECASE) if link_match: course_link = link_match.group(1).strip() continue - + # Try to match instructor - instructor_match = re.match(r'^Course Instructor:\s*(.+)$', line, re.IGNORECASE) + instructor_match = re.match( + r"^Course Instructor:\s*(.+)$", line, re.IGNORECASE + ) if instructor_match: instructor_name = instructor_match.group(1).strip() continue - + # Create course object with title as ID course = Course( title=course_title, course_link=course_link, - instructor=instructor_name if instructor_name != "Unknown" else None + instructor=instructor_name if instructor_name != "Unknown" else None, ) - + # Process lessons and create chunks course_chunks = [] current_lesson = None @@ -152,108 +157,114 @@ def process_course_document(self, file_path: str) -> Tuple[Course, List[CourseCh lesson_link = None lesson_content = [] chunk_counter = 0 - + # Start processing from line 4 (after metadata) start_index = 3 if len(lines) > 3 and not lines[3].strip(): start_index = 4 # Skip empty line after instructor - + i = start_index while i < len(lines): line = lines[i] - + # Check for lesson markers (e.g., "Lesson 0: Introduction") - lesson_match = re.match(r'^Lesson\s+(\d+):\s*(.+)$', line.strip(), re.IGNORECASE) - + lesson_match = re.match( + r"^Lesson\s+(\d+):\s*(.+)$", line.strip(), re.IGNORECASE + ) + if lesson_match: # Process previous lesson if it exists if current_lesson is not None and lesson_content: - lesson_text = '\n'.join(lesson_content).strip() + lesson_text = "\n".join(lesson_content).strip() if lesson_text: # Add lesson to course lesson = Lesson( lesson_number=current_lesson, title=lesson_title, - lesson_link=lesson_link + lesson_link=lesson_link, ) course.lessons.append(lesson) - + # Create chunks for this lesson chunks = self.chunk_text(lesson_text) for idx, chunk in enumerate(chunks): # For the first chunk of each lesson, add lesson context if idx == 0: - chunk_with_context = f"Lesson {current_lesson} content: {chunk}" + chunk_with_context = ( + f"Lesson {current_lesson} content: {chunk}" + ) else: chunk_with_context = chunk - + course_chunk = CourseChunk( content=chunk_with_context, course_title=course.title, lesson_number=current_lesson, - chunk_index=chunk_counter + chunk_index=chunk_counter, ) course_chunks.append(course_chunk) chunk_counter += 1 - + # Start new lesson current_lesson = int(lesson_match.group(1)) lesson_title = lesson_match.group(2).strip() lesson_link = None - + # Check if next line is a lesson link if i + 1 < len(lines): next_line = lines[i + 1].strip() - link_match = re.match(r'^Lesson Link:\s*(.+)$', next_line, re.IGNORECASE) + link_match = re.match( + r"^Lesson Link:\s*(.+)$", next_line, re.IGNORECASE + ) if link_match: lesson_link = link_match.group(1).strip() i += 1 # Skip the link line so it's not added to content - + lesson_content = [] else: # Add line to current lesson content lesson_content.append(line) - + i += 1 - + # Process the last lesson if current_lesson is not None and lesson_content: - lesson_text = '\n'.join(lesson_content).strip() + lesson_text = "\n".join(lesson_content).strip() if lesson_text: lesson = Lesson( lesson_number=current_lesson, title=lesson_title, - lesson_link=lesson_link + lesson_link=lesson_link, ) course.lessons.append(lesson) - + chunks = self.chunk_text(lesson_text) for idx, chunk in enumerate(chunks): # For any chunk of each lesson, add lesson context & course title - + chunk_with_context = f"Course {course_title} Lesson {current_lesson} content: {chunk}" - + course_chunk = CourseChunk( content=chunk_with_context, course_title=course.title, lesson_number=current_lesson, - chunk_index=chunk_counter + chunk_index=chunk_counter, ) course_chunks.append(course_chunk) chunk_counter += 1 - + # If no lessons found, treat entire content as one document if not course_chunks and len(lines) > 2: - remaining_content = '\n'.join(lines[start_index:]).strip() + remaining_content = "\n".join(lines[start_index:]).strip() if remaining_content: chunks = self.chunk_text(remaining_content) for chunk in chunks: course_chunk = CourseChunk( content=chunk, course_title=course.title, - chunk_index=chunk_counter + chunk_index=chunk_counter, ) course_chunks.append(course_chunk) chunk_counter += 1 - + return course, course_chunks diff --git a/backend/models.py b/backend/models.py index 7f7126fa3..12ae8113e 100644 --- a/backend/models.py +++ b/backend/models.py @@ -1,22 +1,28 @@ from typing import List, Dict, Optional from pydantic import BaseModel + class Lesson(BaseModel): """Represents a lesson within a course""" + lesson_number: int # Sequential lesson number (1, 2, 3, etc.) - title: str # Lesson title + title: str # Lesson title lesson_link: Optional[str] = None # URL link to the lesson + class Course(BaseModel): """Represents a complete course with its lessons""" - title: str # Full course title (used as unique identifier) + + title: str # Full course title (used as unique identifier) course_link: Optional[str] = None # URL link to the course instructor: Optional[str] = None # Course instructor name (optional metadata) - lessons: List[Lesson] = [] # List of lessons in this course + lessons: List[Lesson] = [] # List of lessons in this course + class CourseChunk(BaseModel): """Represents a text chunk from a course for vector storage""" - content: str # The actual text content - course_title: str # Which course this chunk belongs to - lesson_number: Optional[int] = None # Which lesson this chunk is from - chunk_index: int # Position of this chunk in the document \ No newline at end of file + + content: str # The actual text content + course_title: str # Which course this chunk belongs to + lesson_number: Optional[int] = None # Which lesson this chunk is from + chunk_index: int # Position of this chunk in the document diff --git a/backend/rag_system.py b/backend/rag_system.py index 50d848c8e..ebb2641d0 100644 --- a/backend/rag_system.py +++ b/backend/rag_system.py @@ -4,144 +4,165 @@ from vector_store import VectorStore from ai_generator import AIGenerator from session_manager import SessionManager -from search_tools import ToolManager, CourseSearchTool +from search_tools import ToolManager, CourseSearchTool, CourseOutlineTool from models import Course, Lesson, CourseChunk + class RAGSystem: """Main orchestrator for the Retrieval-Augmented Generation system""" - + def __init__(self, config): self.config = config - + # Initialize core components - self.document_processor = DocumentProcessor(config.CHUNK_SIZE, config.CHUNK_OVERLAP) - self.vector_store = VectorStore(config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS) - self.ai_generator = AIGenerator(config.ANTHROPIC_API_KEY, config.ANTHROPIC_MODEL) + self.document_processor = DocumentProcessor( + config.CHUNK_SIZE, config.CHUNK_OVERLAP + ) + self.vector_store = VectorStore( + config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS + ) + self.ai_generator = AIGenerator( + config.ANTHROPIC_API_KEY, config.ANTHROPIC_MODEL + ) self.session_manager = SessionManager(config.MAX_HISTORY) - + # Initialize search tools self.tool_manager = ToolManager() self.search_tool = CourseSearchTool(self.vector_store) self.tool_manager.register_tool(self.search_tool) - + self.course_outline_tool = CourseOutlineTool(self.vector_store) + self.tool_manager.register_tool(self.course_outline_tool) + def add_course_document(self, file_path: str) -> Tuple[Course, int]: """ Add a single course document to the knowledge base. - + Args: file_path: Path to the course document - + Returns: Tuple of (Course object, number of chunks created) """ try: # Process the document - course, course_chunks = self.document_processor.process_course_document(file_path) - + course, course_chunks = self.document_processor.process_course_document( + file_path + ) + # Add course metadata to vector store for semantic search self.vector_store.add_course_metadata(course) - + # Add course content chunks to vector store self.vector_store.add_course_content(course_chunks) - + return course, len(course_chunks) except Exception as e: print(f"Error processing course document {file_path}: {e}") return None, 0 - - def add_course_folder(self, folder_path: str, clear_existing: bool = False) -> Tuple[int, int]: + + def add_course_folder( + self, folder_path: str, clear_existing: bool = False + ) -> Tuple[int, int]: """ Add all course documents from a folder. - + Args: folder_path: Path to folder containing course documents clear_existing: Whether to clear existing data first - + Returns: Tuple of (total courses added, total chunks created) """ total_courses = 0 total_chunks = 0 - + # Clear existing data if requested if clear_existing: print("Clearing existing data for fresh rebuild...") self.vector_store.clear_all_data() - + if not os.path.exists(folder_path): print(f"Folder {folder_path} does not exist") return 0, 0 - + # Get existing course titles to avoid re-processing existing_course_titles = set(self.vector_store.get_existing_course_titles()) - + # Process each file in the folder for file_name in os.listdir(folder_path): file_path = os.path.join(folder_path, file_name) - if os.path.isfile(file_path) and file_name.lower().endswith(('.pdf', '.docx', '.txt')): + if os.path.isfile(file_path) and file_name.lower().endswith( + (".pdf", ".docx", ".txt") + ): try: # Check if this course might already exist # We'll process the document to get the course ID, but only add if new - course, course_chunks = self.document_processor.process_course_document(file_path) - + course, course_chunks = ( + self.document_processor.process_course_document(file_path) + ) + if course and course.title not in existing_course_titles: # This is a new course - add it to the vector store self.vector_store.add_course_metadata(course) self.vector_store.add_course_content(course_chunks) total_courses += 1 total_chunks += len(course_chunks) - print(f"Added new course: {course.title} ({len(course_chunks)} chunks)") + print( + f"Added new course: {course.title} ({len(course_chunks)} chunks)" + ) existing_course_titles.add(course.title) elif course: print(f"Course already exists: {course.title} - skipping") except Exception as e: print(f"Error processing {file_name}: {e}") - + return total_courses, total_chunks - - def query(self, query: str, session_id: Optional[str] = None) -> Tuple[str, List[str]]: + + def query( + self, query: str, session_id: Optional[str] = None + ) -> Tuple[str, List[str]]: """ Process a user query using the RAG system with tool-based search. - + Args: query: User's question session_id: Optional session ID for conversation context - + Returns: Tuple of (response, sources list - empty for tool-based approach) """ # Create prompt for the AI with clear instructions prompt = f"""Answer this question about course materials: {query}""" - + # Get conversation history if session exists history = None if session_id: history = self.session_manager.get_conversation_history(session_id) - + # Generate response using AI with tools response = self.ai_generator.generate_response( query=prompt, conversation_history=history, tools=self.tool_manager.get_tool_definitions(), - tool_manager=self.tool_manager + tool_manager=self.tool_manager, ) - + # Get sources from the search tool sources = self.tool_manager.get_last_sources() # Reset sources after retrieving them self.tool_manager.reset_sources() - + # Update conversation history if session_id: self.session_manager.add_exchange(session_id, query, response) - + # Return response with sources from tool searches return response, sources - + def get_course_analytics(self) -> Dict: """Get analytics about the course catalog""" return { "total_courses": self.vector_store.get_course_count(), - "course_titles": self.vector_store.get_existing_course_titles() - } \ No newline at end of file + "course_titles": self.vector_store.get_existing_course_titles(), + } diff --git a/backend/search_tools.py b/backend/search_tools.py index adfe82352..cf19995d9 100644 --- a/backend/search_tools.py +++ b/backend/search_tools.py @@ -5,12 +5,12 @@ class Tool(ABC): """Abstract base class for all tools""" - + @abstractmethod def get_tool_definition(self) -> Dict[str, Any]: """Return Anthropic tool definition for this tool""" pass - + @abstractmethod def execute(self, **kwargs) -> str: """Execute the tool with given parameters""" @@ -19,11 +19,11 @@ def execute(self, **kwargs) -> str: class CourseSearchTool(Tool): """Tool for searching course content with semantic course name matching""" - + def __init__(self, vector_store: VectorStore): self.store = vector_store self.last_sources = [] # Track sources from last search - + def get_tool_definition(self) -> Dict[str, Any]: """Return Anthropic tool definition for this tool""" return { @@ -33,92 +33,157 @@ def get_tool_definition(self) -> Dict[str, Any]: "type": "object", "properties": { "query": { - "type": "string", - "description": "What to search for in the course content" + "type": "string", + "description": "What to search for in the course content", }, "course_name": { "type": "string", - "description": "Course title (partial matches work, e.g. 'MCP', 'Introduction')" + "description": "Course title (partial matches work, e.g. 'MCP', 'Introduction')", }, "lesson_number": { "type": "integer", - "description": "Specific lesson number to search within (e.g. 1, 2, 3)" - } + "description": "Specific lesson number to search within (e.g. 1, 2, 3)", + }, }, - "required": ["query"] - } + "required": ["query"], + }, } - - def execute(self, query: str, course_name: Optional[str] = None, lesson_number: Optional[int] = None) -> str: + + def execute( + self, + query: str, + course_name: Optional[str] = None, + lesson_number: Optional[int] = None, + ) -> str: """ Execute the search tool with given parameters. - + Args: query: What to search for course_name: Optional course filter lesson_number: Optional lesson filter - + Returns: Formatted search results or error message """ - + # Use the vector store's unified search interface results = self.store.search( - query=query, - course_name=course_name, - lesson_number=lesson_number + query=query, course_name=course_name, lesson_number=lesson_number ) - + # Handle errors if results.error: return results.error - + # Handle empty results if results.is_empty(): + self.last_sources = [] filter_info = "" if course_name: filter_info += f" in course '{course_name}'" if lesson_number: filter_info += f" in lesson {lesson_number}" return f"No relevant content found{filter_info}." - + # Format and return results return self._format_results(results) - + def _format_results(self, results: SearchResults) -> str: """Format search results with course and lesson context""" formatted = [] sources = [] # Track sources for the UI - + for doc, meta in zip(results.documents, results.metadata): - course_title = meta.get('course_title', 'unknown') - lesson_num = meta.get('lesson_number') - + course_title = meta.get("course_title", "unknown") + lesson_num = meta.get("lesson_number") + # Build context header header = f"[{course_title}" if lesson_num is not None: header += f" - Lesson {lesson_num}" header += "]" - - # Track source for the UI - source = course_title + + # Build source label + label = course_title if lesson_num is not None: - source += f" - Lesson {lesson_num}" - sources.append(source) - + label += f" - Lesson {lesson_num}" + + # Look up lesson link from catalog + url = ( + self.store.get_lesson_link(course_title, lesson_num) + if lesson_num is not None + else None + ) + + sources.append({"label": label, "url": url}) + formatted.append(f"{header}\n{doc}") - + # Store sources for retrieval self.last_sources = sources - + return "\n\n".join(formatted) + +class CourseOutlineTool(Tool): + """Tool for retrieving the full outline of a course""" + + def __init__(self, vector_store: VectorStore): + self.store = vector_store + + def get_tool_definition(self) -> Dict[str, Any]: + return { + "name": "get_course_outline", + "description": "Get the full outline of a course: title, course link, and all lesson numbers and titles", + "input_schema": { + "type": "object", + "properties": { + "course_title": { + "type": "string", + "description": "Course title or partial title (e.g. 'MCP', 'Introduction')", + } + }, + "required": ["course_title"], + }, + } + + def execute(self, course_title: str) -> str: + all_courses = self.store.get_all_courses_metadata() + + # Case-insensitive partial match + query = course_title.lower() + match = next( + ( + c + for c in all_courses + if query in c["title"].lower() or c["title"].lower() in query + ), + None, + ) + + if match is None: + return f"No course found matching '{course_title}'." + + title = match["title"] + link = match.get("course_link") or "N/A" + lessons = match.get("lessons", []) + + lines = [f"Course: {title}", f"Link: {link}", "", "Lessons:"] + for lesson in sorted(lessons, key=lambda l: l["lesson_number"]): + lines.append( + f" Lesson {lesson['lesson_number']}: {lesson['lesson_title']}" + ) + + return "\n".join(lines) + + class ToolManager: """Manages available tools for the AI""" - + def __init__(self): self.tools = {} - + def register_tool(self, tool: Tool): """Register any tool that implements the Tool interface""" tool_def = tool.get_tool_definition() @@ -127,28 +192,27 @@ def register_tool(self, tool: Tool): raise ValueError("Tool must have a 'name' in its definition") self.tools[tool_name] = tool - def get_tool_definitions(self) -> list: """Get all tool definitions for Anthropic tool calling""" return [tool.get_tool_definition() for tool in self.tools.values()] - + def execute_tool(self, tool_name: str, **kwargs) -> str: """Execute a tool by name with given parameters""" if tool_name not in self.tools: return f"Tool '{tool_name}' not found" - + return self.tools[tool_name].execute(**kwargs) - + def get_last_sources(self) -> list: """Get sources from the last search operation""" # Check all tools for last_sources attribute for tool in self.tools.values(): - if hasattr(tool, 'last_sources') and tool.last_sources: + if hasattr(tool, "last_sources") and tool.last_sources: return tool.last_sources return [] def reset_sources(self): """Reset sources from all tools that track sources""" for tool in self.tools.values(): - if hasattr(tool, 'last_sources'): - tool.last_sources = [] \ No newline at end of file + if hasattr(tool, "last_sources"): + tool.last_sources = [] diff --git a/backend/session_manager.py b/backend/session_manager.py index a5a96b1a1..9e17f346b 100644 --- a/backend/session_manager.py +++ b/backend/session_manager.py @@ -1,61 +1,66 @@ from typing import Dict, List, Optional from dataclasses import dataclass + @dataclass class Message: """Represents a single message in a conversation""" - role: str # "user" or "assistant" + + role: str # "user" or "assistant" content: str # The message content + class SessionManager: """Manages conversation sessions and message history""" - + def __init__(self, max_history: int = 5): self.max_history = max_history self.sessions: Dict[str, List[Message]] = {} self.session_counter = 0 - + def create_session(self) -> str: """Create a new conversation session""" self.session_counter += 1 session_id = f"session_{self.session_counter}" self.sessions[session_id] = [] return session_id - + def add_message(self, session_id: str, role: str, content: str): """Add a message to the conversation history""" if session_id not in self.sessions: self.sessions[session_id] = [] - + message = Message(role=role, content=content) self.sessions[session_id].append(message) - + # Keep conversation history within limits if len(self.sessions[session_id]) > self.max_history * 2: - self.sessions[session_id] = self.sessions[session_id][-self.max_history * 2:] - + self.sessions[session_id] = self.sessions[session_id][ + -self.max_history * 2 : + ] + def add_exchange(self, session_id: str, user_message: str, assistant_message: str): """Add a complete question-answer exchange""" self.add_message(session_id, "user", user_message) self.add_message(session_id, "assistant", assistant_message) - + def get_conversation_history(self, session_id: Optional[str]) -> Optional[str]: """Get formatted conversation history for a session""" if not session_id or session_id not in self.sessions: return None - + messages = self.sessions[session_id] if not messages: return None - + # Format messages for context formatted_messages = [] for msg in messages: formatted_messages.append(f"{msg.role.title()}: {msg.content}") - + return "\n".join(formatted_messages) - + def clear_session(self, session_id: str): """Clear all messages from a session""" if session_id in self.sessions: - self.sessions[session_id] = [] \ No newline at end of file + self.sessions[session_id] = [] diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 000000000..0c7d3bdaa --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,128 @@ +""" +Shared pytest fixtures for all backend tests. + +sys.path is managed by [tool.pytest.ini_options] pythonpath in pyproject.toml, +which adds both backend/ and backend/tests/ before any test module is imported. +""" + +import pytest +from unittest.mock import MagicMock +from typing import Any, Dict, List, Optional + +from fastapi import FastAPI, HTTPException +from fastapi.testclient import TestClient +from pydantic import BaseModel + +from vector_store import SearchResults + + +@pytest.fixture +def sample_search_results(): + """SearchResults with one document and full metadata""" + return SearchResults( + documents=["This is lesson content about Python basics"], + metadata=[{"course_title": "Intro to Python", "lesson_number": 1}], + distances=[0.1], + ) + + +@pytest.fixture +def empty_search_results(): + """SearchResults with no documents""" + return SearchResults( + documents=[], + metadata=[], + distances=[], + ) + + +@pytest.fixture +def error_search_results(): + """SearchResults carrying an error message""" + return SearchResults.empty("Search error: connection refused") + + +@pytest.fixture +def mock_vector_store(): + """MagicMock VectorStore with get_lesson_link pre-configured""" + store = MagicMock() + store.get_lesson_link.return_value = "https://example.com/lesson/1" + return store + + +# ── API / endpoint fixtures ─────────────────────────────────────────────────── + +@pytest.fixture +def mock_rag_system(): + """MagicMock RAGSystem with sensible defaults for API endpoint tests""" + rag = MagicMock() + rag.query.return_value = ( + "Test answer", + [{"label": "Intro to Python - Lesson 1", "url": "http://example.com/lesson/1"}], + ) + rag.get_course_analytics.return_value = { + "total_courses": 2, + "course_titles": ["Intro to Python", "Advanced Python"], + } + rag.session_manager.create_session.return_value = "generated-session-id" + return rag + + +@pytest.fixture +def test_app(mock_rag_system): + """ + Minimal FastAPI app that mirrors app.py's API endpoints without static + file mounting or real RAGSystem initialisation — safe to import in tests. + """ + app = FastAPI() + + class QueryRequest(BaseModel): + query: str + session_id: Optional[str] = None + + class QueryResponse(BaseModel): + answer: str + sources: List[Dict[str, Any]] + session_id: str + + class CourseStats(BaseModel): + total_courses: int + course_titles: List[str] + + @app.post("/api/query", response_model=QueryResponse) + async def query_documents(request: QueryRequest): + try: + session_id = request.session_id + if not session_id: + session_id = mock_rag_system.session_manager.create_session() + answer, sources = mock_rag_system.query(request.query, session_id) + return QueryResponse(answer=answer, sources=sources, session_id=session_id) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.get("/api/courses", response_model=CourseStats) + async def get_course_stats(): + try: + analytics = mock_rag_system.get_course_analytics() + return CourseStats( + total_courses=analytics["total_courses"], + course_titles=analytics["course_titles"], + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.delete("/api/session/{session_id}") + async def clear_session(session_id: str): + try: + mock_rag_system.session_manager.clear_session(session_id) + return {"success": True, "session_id": session_id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + return app + + +@pytest.fixture +def client(test_app): + """Starlette TestClient wrapping the minimal test FastAPI app""" + return TestClient(test_app) diff --git a/backend/tests/helpers.py b/backend/tests/helpers.py new file mode 100644 index 000000000..b7fe00490 --- /dev/null +++ b/backend/tests/helpers.py @@ -0,0 +1,34 @@ +""" +Shared helper functions for building mock Anthropic API responses. + +These are plain functions (not pytest fixtures) so they can be imported +explicitly by any test module that needs them. +""" + +from unittest.mock import MagicMock + + +def build_text_response(text: str): + """Return a mock Anthropic message with stop_reason='end_turn'""" + content_block = MagicMock() + content_block.type = "text" + content_block.text = text + + response = MagicMock() + response.stop_reason = "end_turn" + response.content = [content_block] + return response + + +def build_tool_use_response(tool_name: str, tool_id: str, tool_input: dict): + """Return a mock Anthropic message with stop_reason='tool_use'""" + tool_block = MagicMock() + tool_block.type = "tool_use" + tool_block.name = tool_name + tool_block.id = tool_id + tool_block.input = tool_input + + response = MagicMock() + response.stop_reason = "tool_use" + response.content = [tool_block] + return response diff --git a/backend/tests/test_ai_generator.py b/backend/tests/test_ai_generator.py new file mode 100644 index 000000000..e22576972 --- /dev/null +++ b/backend/tests/test_ai_generator.py @@ -0,0 +1,205 @@ +""" +Tests for AIGenerator.generate_response() and _handle_tool_execution() + +The Anthropic client is fully mocked so no real API calls are made. +""" + +import pytest +from unittest.mock import MagicMock, patch + +from helpers import build_text_response, build_tool_use_response +from ai_generator import AIGenerator + + +@pytest.fixture +def mock_anthropic_client(): + """Patch anthropic.Anthropic so no real client is created""" + with patch("ai_generator.anthropic.Anthropic") as mock_cls: + mock_client = MagicMock() + mock_cls.return_value = mock_client + yield mock_client + + +@pytest.fixture +def generator(mock_anthropic_client): + return AIGenerator(api_key="test-key", model="claude-test") + + +# ── basic text response ─────────────────────────────────────────────────────── + + +def test_returns_text_from_content_block(generator, mock_anthropic_client): + mock_anthropic_client.messages.create.return_value = build_text_response( + "Hello world" + ) + result = generator.generate_response(query="Hi") + assert result == "Hello world" + + +def test_does_not_call_tool_manager_when_end_turn(generator, mock_anthropic_client): + mock_anthropic_client.messages.create.return_value = build_text_response("Answer") + tool_manager = MagicMock() + generator.generate_response(query="Hi", tool_manager=tool_manager) + tool_manager.execute_tool.assert_not_called() + + +# ── tools forwarding ────────────────────────────────────────────────────────── + + +def test_includes_tools_in_api_params_when_provided(generator, mock_anthropic_client): + """tools + tool_choice must appear in the first API call; missing → Claude never searches""" + mock_anthropic_client.messages.create.return_value = build_text_response("Answer") + tools = [ + {"name": "search_course_content", "description": "Search", "input_schema": {}} + ] + generator.generate_response(query="Hi", tools=tools) + + call_kwargs = mock_anthropic_client.messages.create.call_args[1] + assert ( + "tools" in call_kwargs + ), "tools not forwarded to API — Claude will never search" + assert call_kwargs.get("tool_choice") == { + "type": "auto" + }, "tool_choice missing or wrong" + + +def test_no_tool_choice_when_tools_not_provided(generator, mock_anthropic_client): + mock_anthropic_client.messages.create.return_value = build_text_response("Answer") + generator.generate_response(query="Hi") + + call_kwargs = mock_anthropic_client.messages.create.call_args[1] + assert "tool_choice" not in call_kwargs + + +# ── conversation history ────────────────────────────────────────────────────── + + +def test_includes_conversation_history_in_system_prompt( + generator, mock_anthropic_client +): + mock_anthropic_client.messages.create.return_value = build_text_response("Answer") + generator.generate_response(query="Hi", conversation_history="User: hello\nAI: hi") + + call_kwargs = mock_anthropic_client.messages.create.call_args[1] + assert "Previous conversation:" in call_kwargs["system"] + + +# ── error handling ──────────────────────────────────────────────────────────── + + +def test_api_exception_propagates(generator, mock_anthropic_client): + mock_anthropic_client.messages.create.side_effect = Exception("API error") + with pytest.raises(Exception, match="API error"): + generator.generate_response(query="Hi") + + +# ── tool-use flow ───────────────────────────────────────────────────────────── + + +def test_tool_use_response_triggers_tool_execution(generator, mock_anthropic_client): + """stop_reason='tool_use' must cause execute_tool() to be called""" + tool_response = build_tool_use_response( + tool_name="search_course_content", + tool_id="toolu_01", + tool_input={"query": "Python basics"}, + ) + final_response = build_text_response("Here is the answer") + mock_anthropic_client.messages.create.side_effect = [tool_response, final_response] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "Search results here" + + generator.generate_response( + query="What is Python?", tools=[], tool_manager=tool_manager + ) + tool_manager.execute_tool.assert_called_once_with( + "search_course_content", query="Python basics" + ) + + +def test_tool_result_sent_as_user_message_in_follow_up( + generator, mock_anthropic_client +): + """Tool result must be a user message containing type='tool_result' with matching tool_use_id""" + tool_response = build_tool_use_response( + tool_name="search_course_content", + tool_id="toolu_01", + tool_input={"query": "Python basics"}, + ) + final_response = build_text_response("Here is the answer") + mock_anthropic_client.messages.create.side_effect = [tool_response, final_response] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "Search results here" + + generator.generate_response( + query="What is Python?", tools=[], tool_manager=tool_manager + ) + + second_call_kwargs = mock_anthropic_client.messages.create.call_args_list[1][1] + messages = second_call_kwargs["messages"] + user_msg = messages[-1] + assert user_msg["role"] == "user", f"Expected user role, got {user_msg['role']}" + content = user_msg["content"] + assert isinstance(content, list), "tool_result content must be a list" + assert content[0]["type"] == "tool_result" + assert content[0]["tool_use_id"] == "toolu_01" + + +def test_follow_up_call_does_not_include_tools(generator, mock_anthropic_client): + """The follow-up (2nd) API call must not include tools to avoid an infinite tool loop""" + tool_response = build_tool_use_response( + tool_name="search_course_content", + tool_id="toolu_01", + tool_input={"query": "Python basics"}, + ) + final_response = build_text_response("Here is the answer") + mock_anthropic_client.messages.create.side_effect = [tool_response, final_response] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "Results" + + generator.generate_response( + query="What is Python?", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + second_call_kwargs = mock_anthropic_client.messages.create.call_args_list[1][1] + assert "tools" not in second_call_kwargs + + +def test_follow_up_call_returns_text_content(generator, mock_anthropic_client): + tool_response = build_tool_use_response( + tool_name="search_course_content", + tool_id="toolu_01", + tool_input={"query": "Python basics"}, + ) + final_response = build_text_response("Final answer here") + mock_anthropic_client.messages.create.side_effect = [tool_response, final_response] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "Results" + + result = generator.generate_response( + query="What is Python?", tools=[], tool_manager=tool_manager + ) + assert result == "Final answer here" + + +def test_makes_exactly_two_api_calls_for_tool_use(generator, mock_anthropic_client): + tool_response = build_tool_use_response( + tool_name="search_course_content", + tool_id="toolu_01", + tool_input={"query": "Python basics"}, + ) + final_response = build_text_response("Answer") + mock_anthropic_client.messages.create.side_effect = [tool_response, final_response] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "Results" + + generator.generate_response( + query="What is Python?", tools=[], tool_manager=tool_manager + ) + assert mock_anthropic_client.messages.create.call_count == 2 diff --git a/backend/tests/test_api.py b/backend/tests/test_api.py new file mode 100644 index 000000000..0a6d2309d --- /dev/null +++ b/backend/tests/test_api.py @@ -0,0 +1,128 @@ +""" +Tests for FastAPI API endpoints: POST /api/query, GET /api/courses, +DELETE /api/session/{session_id}. + +Uses a minimal test app and TestClient defined in conftest.py to avoid +importing app.py directly (which mounts static files and initialises +RAGSystem at module level). +""" +import pytest + + +# ── POST /api/query ─────────────────────────────────────────────────────────── + +def test_query_returns_200_for_valid_request(client): + response = client.post("/api/query", json={"query": "What is Python?"}) + assert response.status_code == 200 + + +def test_query_response_has_required_fields(client): + body = client.post("/api/query", json={"query": "What is Python?"}).json() + assert "answer" in body + assert "sources" in body + assert "session_id" in body + + +def test_query_answer_comes_from_rag_system(client, mock_rag_system): + mock_rag_system.query.return_value = ("Custom answer", []) + body = client.post("/api/query", json={"query": "What is Python?"}).json() + assert body["answer"] == "Custom answer" + + +def test_query_sources_come_from_rag_system(client, mock_rag_system): + sources = [{"label": "Course - Lesson 1", "url": "http://example.com"}] + mock_rag_system.query.return_value = ("Answer", sources) + body = client.post("/api/query", json={"query": "What is Python?"}).json() + assert body["sources"] == sources + + +def test_query_creates_session_when_session_id_not_provided(client, mock_rag_system): + mock_rag_system.session_manager.create_session.return_value = "new-session-abc" + body = client.post("/api/query", json={"query": "What is Python?"}).json() + assert body["session_id"] == "new-session-abc" + mock_rag_system.session_manager.create_session.assert_called_once() + + +def test_query_uses_provided_session_id(client, mock_rag_system): + body = client.post( + "/api/query", json={"query": "What is Python?", "session_id": "existing-session"} + ).json() + assert body["session_id"] == "existing-session" + mock_rag_system.session_manager.create_session.assert_not_called() + + +def test_query_forwards_query_and_session_id_to_rag_system(client, mock_rag_system): + client.post("/api/query", json={"query": "What is Python?", "session_id": "sess-1"}) + mock_rag_system.query.assert_called_once_with("What is Python?", "sess-1") + + +def test_query_returns_422_for_missing_query_field(client): + response = client.post("/api/query", json={}) + assert response.status_code == 422 + + +def test_query_returns_500_when_rag_system_raises(client, mock_rag_system): + mock_rag_system.query.side_effect = RuntimeError("RAG failure") + response = client.post("/api/query", json={"query": "What is Python?"}) + assert response.status_code == 500 + assert "RAG failure" in response.json()["detail"] + + +# ── GET /api/courses ────────────────────────────────────────────────────────── + +def test_courses_returns_200(client): + assert client.get("/api/courses").status_code == 200 + + +def test_courses_response_has_required_fields(client): + body = client.get("/api/courses").json() + assert "total_courses" in body + assert "course_titles" in body + + +def test_courses_total_courses_from_rag_system(client, mock_rag_system): + mock_rag_system.get_course_analytics.return_value = { + "total_courses": 5, + "course_titles": ["A", "B", "C", "D", "E"], + } + assert client.get("/api/courses").json()["total_courses"] == 5 + + +def test_courses_titles_from_rag_system(client, mock_rag_system): + titles = ["Intro to Python", "Advanced Python"] + mock_rag_system.get_course_analytics.return_value = { + "total_courses": 2, + "course_titles": titles, + } + assert client.get("/api/courses").json()["course_titles"] == titles + + +def test_courses_returns_500_on_analytics_error(client, mock_rag_system): + mock_rag_system.get_course_analytics.side_effect = RuntimeError("DB error") + response = client.get("/api/courses") + assert response.status_code == 500 + assert "DB error" in response.json()["detail"] + + +# ── DELETE /api/session/{session_id} ───────────────────────────────────────── + +def test_clear_session_returns_200(client): + assert client.delete("/api/session/session-abc").status_code == 200 + + +def test_clear_session_response_contains_session_id_and_success(client): + body = client.delete("/api/session/session-abc").json() + assert body["session_id"] == "session-abc" + assert body["success"] is True + + +def test_clear_session_calls_session_manager_clear(client, mock_rag_system): + client.delete("/api/session/session-xyz") + mock_rag_system.session_manager.clear_session.assert_called_once_with("session-xyz") + + +def test_clear_session_returns_500_on_error(client, mock_rag_system): + mock_rag_system.session_manager.clear_session.side_effect = RuntimeError("Session error") + response = client.delete("/api/session/session-abc") + assert response.status_code == 500 + assert "Session error" in response.json()["detail"] diff --git a/backend/tests/test_course_search_tool.py b/backend/tests/test_course_search_tool.py new file mode 100644 index 000000000..dcd7e9127 --- /dev/null +++ b/backend/tests/test_course_search_tool.py @@ -0,0 +1,155 @@ +""" +Tests for CourseSearchTool.execute() + +Only VectorStore is mocked; the tool itself runs unpatched so any bug in +_format_results() or execute() is caught directly. +""" + +import pytest +from unittest.mock import MagicMock + +from helpers import build_text_response, build_tool_use_response +from vector_store import SearchResults +from search_tools import CourseSearchTool + + +@pytest.fixture +def search_tool(mock_vector_store, sample_search_results): + """CourseSearchTool backed by a mock store that returns sample results""" + mock_vector_store.search.return_value = sample_search_results + return CourseSearchTool(mock_vector_store) + + +# ── result format ───────────────────────────────────────────────────────────── + + +def test_execute_returns_string(search_tool): + result = search_tool.execute(query="What is Python?") + assert isinstance(result, str) and len(result) > 0 + + +def test_execute_formatted_content_includes_course_header(search_tool): + result = search_tool.execute(query="What is Python?") + assert "[Intro to Python - Lesson 1]" in result + + +# ── sources format ──────────────────────────────────────────────────────────── + + +def test_execute_sources_are_list_of_dicts(search_tool): + """Sources must be List[Dict]; returning List[str] causes a Pydantic 500""" + search_tool.execute(query="What is Python?") + assert len(search_tool.last_sources) > 0 + src = search_tool.last_sources[0] + assert isinstance(src, dict), f"Expected dict, got {type(src)}" + assert "label" in src + assert "url" in src + + +def test_execute_sources_label_format(search_tool): + search_tool.execute(query="What is Python?") + assert search_tool.last_sources[0]["label"] == "Intro to Python - Lesson 1" + + +def test_execute_calls_get_lesson_link_with_correct_args( + search_tool, mock_vector_store +): + search_tool.execute(query="What is Python?") + mock_vector_store.get_lesson_link.assert_called_with("Intro to Python", 1) + + +def test_execute_url_in_sources_from_get_lesson_link(search_tool, mock_vector_store): + mock_vector_store.get_lesson_link.return_value = "https://example.com/lesson/1" + search_tool.execute(query="What is Python?") + assert search_tool.last_sources[0]["url"] == "https://example.com/lesson/1" + + +def test_execute_url_none_when_no_lesson_number(mock_vector_store): + """When metadata has no lesson_number, get_lesson_link must not be called""" + results = SearchResults( + documents=["Content without lesson number"], + metadata=[{"course_title": "Intro to Python"}], # no lesson_number key + distances=[0.1], + ) + mock_vector_store.search.return_value = results + tool = CourseSearchTool(mock_vector_store) + tool.execute(query="What is Python?") + mock_vector_store.get_lesson_link.assert_not_called() + assert tool.last_sources[0]["url"] is None + + +# ── error / empty paths ─────────────────────────────────────────────────────── + + +def test_execute_with_search_error_returns_error_string( + mock_vector_store, error_search_results +): + mock_vector_store.search.return_value = error_search_results + tool = CourseSearchTool(mock_vector_store) + result = tool.execute(query="What is Python?") + assert isinstance(result, str) + assert "error" in result.lower() or "Search error" in result + + +def test_execute_with_empty_results_returns_no_content_message( + mock_vector_store, empty_search_results +): + mock_vector_store.search.return_value = empty_search_results + tool = CourseSearchTool(mock_vector_store) + result = tool.execute(query="What is Python?") + assert result.startswith("No relevant content found") + + +def test_execute_empty_with_course_filter_includes_course_in_message( + mock_vector_store, empty_search_results +): + mock_vector_store.search.return_value = empty_search_results + tool = CourseSearchTool(mock_vector_store) + result = tool.execute(query="What is Python?", course_name="Intro to Python") + assert "in course 'Intro to Python'" in result + + +# ── parameter forwarding ────────────────────────────────────────────────────── + + +def test_execute_forwards_course_name_to_vector_store( + mock_vector_store, sample_search_results +): + mock_vector_store.search.return_value = sample_search_results + tool = CourseSearchTool(mock_vector_store) + tool.execute(query="What is Python?", course_name="Intro to Python") + mock_vector_store.search.assert_called_once_with( + query="What is Python?", + course_name="Intro to Python", + lesson_number=None, + ) + + +def test_execute_forwards_lesson_number_to_vector_store( + mock_vector_store, sample_search_results +): + mock_vector_store.search.return_value = sample_search_results + tool = CourseSearchTool(mock_vector_store) + tool.execute(query="What is Python?", lesson_number=3) + mock_vector_store.search.assert_called_once_with( + query="What is Python?", + course_name=None, + lesson_number=3, + ) + + +# ── stale-state detection ───────────────────────────────────────────────────── + + +def test_last_sources_reset_between_calls( + mock_vector_store, sample_search_results, empty_search_results +): + """After an empty-result call last_sources must be [] — stale sources must not leak""" + mock_vector_store.search.return_value = sample_search_results + tool = CourseSearchTool(mock_vector_store) + tool.execute(query="What is Python?") + assert len(tool.last_sources) > 0, "Precondition: first call must populate sources" + + mock_vector_store.search.return_value = empty_search_results + tool.execute(query="What else?") + assert tool.last_sources == [], f"Expected [], got {tool.last_sources}" diff --git a/backend/tests/test_rag_system.py b/backend/tests/test_rag_system.py new file mode 100644 index 000000000..87e41edc3 --- /dev/null +++ b/backend/tests/test_rag_system.py @@ -0,0 +1,154 @@ +""" +Tests for RAGSystem.query() + +All heavy dependencies (VectorStore, AIGenerator, SessionManager, etc.) are +patched at the constructor level so no real I/O occurs. +""" + +import pytest +from unittest.mock import MagicMock, patch + + +@pytest.fixture +def rag_system(): + """Yield (RAGSystem, mock_ag, mock_tm, mock_sm) with all deps patched""" + mock_ag = MagicMock() + mock_ag.generate_response.return_value = "Test answer" + + mock_tm = MagicMock() + mock_tm.get_tool_definitions.return_value = [{"name": "search_course_content"}] + mock_tm.get_last_sources.return_value = [] + + mock_sm = MagicMock() + mock_sm.get_conversation_history.return_value = None + + with ( + patch("rag_system.DocumentProcessor"), + patch("rag_system.VectorStore"), + patch("rag_system.AIGenerator", return_value=mock_ag), + patch("rag_system.SessionManager", return_value=mock_sm), + patch("rag_system.ToolManager", return_value=mock_tm), + patch("rag_system.CourseSearchTool"), + patch("rag_system.CourseOutlineTool"), + ): + + from rag_system import RAGSystem + + config = MagicMock() + config.CHUNK_SIZE = 800 + config.CHUNK_OVERLAP = 100 + config.CHROMA_PATH = "./test_chroma" + config.EMBEDDING_MODEL = "all-MiniLM-L6-v2" + config.MAX_RESULTS = 5 + config.ANTHROPIC_API_KEY = "test-key" + config.ANTHROPIC_MODEL = "claude-test" + config.MAX_HISTORY = 2 + + yield RAGSystem(config), mock_ag, mock_tm, mock_sm + + +# ── return type ─────────────────────────────────────────────────────────────── + + +def test_query_returns_tuple_of_str_and_list(rag_system): + rag, mock_ag, mock_tm, mock_sm = rag_system + result = rag.query("What is Python?") + assert isinstance(result, tuple) and len(result) == 2 + assert isinstance(result[0], str) + assert isinstance(result[1], list) + + +def test_query_returns_ai_generator_response_as_answer(rag_system): + rag, mock_ag, mock_tm, mock_sm = rag_system + mock_ag.generate_response.return_value = "AI response text" + answer, _ = rag.query("What is Python?") + assert answer == "AI response text" + + +# ── tool forwarding (core bug check) ───────────────────────────────────────── + + +def test_query_passes_tool_definitions_to_generate_response(rag_system): + """If tools are not forwarded, Claude never calls search — most likely root cause""" + rag, mock_ag, mock_tm, mock_sm = rag_system + tool_defs = [{"name": "search_course_content"}] + mock_tm.get_tool_definitions.return_value = tool_defs + + rag.query("What is Python?") + + call_kwargs = mock_ag.generate_response.call_args[1] + assert "tools" in call_kwargs, "tools not forwarded to generate_response" + assert call_kwargs["tools"] == tool_defs + + +def test_query_passes_tool_manager_instance_to_generate_response(rag_system): + """If tool_manager is not passed, execute_tool() is never called""" + rag, mock_ag, mock_tm, mock_sm = rag_system + rag.query("What is Python?") + + call_kwargs = mock_ag.generate_response.call_args[1] + assert "tool_manager" in call_kwargs, "tool_manager not forwarded" + assert call_kwargs["tool_manager"] is rag.tool_manager + + +# ── sources ─────────────────────────────────────────────────────────────────── + + +def test_query_retrieves_sources_from_tool_manager(rag_system): + rag, mock_ag, mock_tm, mock_sm = rag_system + sources_data = [{"label": "Course - Lesson 1", "url": "http://example.com"}] + mock_tm.get_last_sources.return_value = sources_data + + _, sources = rag.query("What is Python?") + assert sources == sources_data + + +def test_query_calls_reset_sources_after_retrieval(rag_system): + rag, mock_ag, mock_tm, mock_sm = rag_system + rag.query("What is Python?") + mock_tm.reset_sources.assert_called_once() + + +# ── session / history ───────────────────────────────────────────────────────── + + +def test_query_with_session_id_fetches_conversation_history(rag_system): + rag, mock_ag, mock_tm, mock_sm = rag_system + rag.query("What is Python?", session_id="session-123") + mock_sm.get_conversation_history.assert_called_once_with("session-123") + + +def test_query_without_session_id_does_not_fetch_history(rag_system): + rag, mock_ag, mock_tm, mock_sm = rag_system + rag.query("What is Python?") + mock_sm.get_conversation_history.assert_not_called() + + +def test_query_with_session_id_saves_exchange(rag_system): + rag, mock_ag, mock_tm, mock_sm = rag_system + mock_ag.generate_response.return_value = "My answer" + rag.query("What is Python?", session_id="session-123") + mock_sm.add_exchange.assert_called_once_with( + "session-123", "What is Python?", "My answer" + ) + + +def test_query_passes_history_to_generate_response(rag_system): + rag, mock_ag, mock_tm, mock_sm = rag_system + mock_sm.get_conversation_history.return_value = "Past history" + + rag.query("What is Python?", session_id="session-123") + + call_kwargs = mock_ag.generate_response.call_args[1] + assert call_kwargs.get("conversation_history") == "Past history" + + +# ── error propagation ───────────────────────────────────────────────────────── + + +def test_query_exception_in_generate_response_propagates(rag_system): + rag, mock_ag, mock_tm, mock_sm = rag_system + mock_ag.generate_response.side_effect = RuntimeError("AI failure") + + with pytest.raises(RuntimeError, match="AI failure"): + rag.query("What is Python?") diff --git a/backend/vector_store.py b/backend/vector_store.py index 390abe71c..5ef3c589c 100644 --- a/backend/vector_store.py +++ b/backend/vector_store.py @@ -5,73 +5,88 @@ from models import Course, CourseChunk from sentence_transformers import SentenceTransformer + @dataclass class SearchResults: """Container for search results with metadata""" + documents: List[str] metadata: List[Dict[str, Any]] distances: List[float] error: Optional[str] = None - + @classmethod - def from_chroma(cls, chroma_results: Dict) -> 'SearchResults': + def from_chroma(cls, chroma_results: Dict) -> "SearchResults": """Create SearchResults from ChromaDB query results""" return cls( - documents=chroma_results['documents'][0] if chroma_results['documents'] else [], - metadata=chroma_results['metadatas'][0] if chroma_results['metadatas'] else [], - distances=chroma_results['distances'][0] if chroma_results['distances'] else [] + documents=( + chroma_results["documents"][0] if chroma_results["documents"] else [] + ), + metadata=( + chroma_results["metadatas"][0] if chroma_results["metadatas"] else [] + ), + distances=( + chroma_results["distances"][0] if chroma_results["distances"] else [] + ), ) - + @classmethod - def empty(cls, error_msg: str) -> 'SearchResults': + def empty(cls, error_msg: str) -> "SearchResults": """Create empty results with error message""" return cls(documents=[], metadata=[], distances=[], error=error_msg) - + def is_empty(self) -> bool: """Check if results are empty""" return len(self.documents) == 0 + class VectorStore: """Vector storage using ChromaDB for course content and metadata""" - + def __init__(self, chroma_path: str, embedding_model: str, max_results: int = 5): self.max_results = max_results # Initialize ChromaDB client self.client = chromadb.PersistentClient( - path=chroma_path, - settings=Settings(anonymized_telemetry=False) + path=chroma_path, settings=Settings(anonymized_telemetry=False) ) - + # Set up sentence transformer embedding function - self.embedding_function = chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction( - model_name=embedding_model + self.embedding_function = ( + chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction( + model_name=embedding_model + ) ) - + # Create collections for different types of data - self.course_catalog = self._create_collection("course_catalog") # Course titles/instructors - self.course_content = self._create_collection("course_content") # Actual course material - + self.course_catalog = self._create_collection( + "course_catalog" + ) # Course titles/instructors + self.course_content = self._create_collection( + "course_content" + ) # Actual course material + def _create_collection(self, name: str): """Create or get a ChromaDB collection""" return self.client.get_or_create_collection( - name=name, - embedding_function=self.embedding_function + name=name, embedding_function=self.embedding_function ) - - def search(self, - query: str, - course_name: Optional[str] = None, - lesson_number: Optional[int] = None, - limit: Optional[int] = None) -> SearchResults: + + def search( + self, + query: str, + course_name: Optional[str] = None, + lesson_number: Optional[int] = None, + limit: Optional[int] = None, + ) -> SearchResults: """ Main search interface that handles course resolution and content search. - + Args: query: What to search for in course content course_name: Optional course name/title to filter by lesson_number: Optional lesson number to filter by limit: Maximum results to return - + Returns: SearchResults object with documents and metadata """ @@ -81,104 +96,111 @@ def search(self, course_title = self._resolve_course_name(course_name) if not course_title: return SearchResults.empty(f"No course found matching '{course_name}'") - + # Step 2: Build filter for content search filter_dict = self._build_filter(course_title, lesson_number) - + # Step 3: Search course content # Use provided limit or fall back to configured max_results search_limit = limit if limit is not None else self.max_results - + try: results = self.course_content.query( - query_texts=[query], - n_results=search_limit, - where=filter_dict + query_texts=[query], n_results=search_limit, where=filter_dict ) return SearchResults.from_chroma(results) except Exception as e: return SearchResults.empty(f"Search error: {str(e)}") - + def _resolve_course_name(self, course_name: str) -> Optional[str]: """Use vector search to find best matching course by name""" try: - results = self.course_catalog.query( - query_texts=[course_name], - n_results=1 - ) - - if results['documents'][0] and results['metadatas'][0]: + results = self.course_catalog.query(query_texts=[course_name], n_results=1) + + if results["documents"][0] and results["metadatas"][0]: # Return the title (which is now the ID) - return results['metadatas'][0][0]['title'] + return results["metadatas"][0][0]["title"] except Exception as e: print(f"Error resolving course name: {e}") - + return None - - def _build_filter(self, course_title: Optional[str], lesson_number: Optional[int]) -> Optional[Dict]: + + def _build_filter( + self, course_title: Optional[str], lesson_number: Optional[int] + ) -> Optional[Dict]: """Build ChromaDB filter from search parameters""" if not course_title and lesson_number is None: return None - + # Handle different filter combinations if course_title and lesson_number is not None: - return {"$and": [ - {"course_title": course_title}, - {"lesson_number": lesson_number} - ]} - + return { + "$and": [ + {"course_title": course_title}, + {"lesson_number": lesson_number}, + ] + } + if course_title: return {"course_title": course_title} - + return {"lesson_number": lesson_number} - + def add_course_metadata(self, course: Course): """Add course information to the catalog for semantic search""" import json course_text = course.title - + # Build lessons metadata and serialize as JSON string lessons_metadata = [] for lesson in course.lessons: - lessons_metadata.append({ - "lesson_number": lesson.lesson_number, - "lesson_title": lesson.title, - "lesson_link": lesson.lesson_link - }) - + lessons_metadata.append( + { + "lesson_number": lesson.lesson_number, + "lesson_title": lesson.title, + "lesson_link": lesson.lesson_link, + } + ) + self.course_catalog.add( documents=[course_text], - metadatas=[{ - "title": course.title, - "instructor": course.instructor, - "course_link": course.course_link, - "lessons_json": json.dumps(lessons_metadata), # Serialize as JSON string - "lesson_count": len(course.lessons) - }], - ids=[course.title] + metadatas=[ + { + "title": course.title, + "instructor": course.instructor, + "course_link": course.course_link, + "lessons_json": json.dumps( + lessons_metadata + ), # Serialize as JSON string + "lesson_count": len(course.lessons), + } + ], + ids=[course.title], ) - + def add_course_content(self, chunks: List[CourseChunk]): """Add course content chunks to the vector store""" if not chunks: return - + documents = [chunk.content for chunk in chunks] - metadatas = [{ - "course_title": chunk.course_title, - "lesson_number": chunk.lesson_number, - "chunk_index": chunk.chunk_index - } for chunk in chunks] + metadatas = [ + { + "course_title": chunk.course_title, + "lesson_number": chunk.lesson_number, + "chunk_index": chunk.chunk_index, + } + for chunk in chunks + ] # Use title with chunk index for unique IDs - ids = [f"{chunk.course_title.replace(' ', '_')}_{chunk.chunk_index}" for chunk in chunks] - - self.course_content.add( - documents=documents, - metadatas=metadatas, - ids=ids - ) - + ids = [ + f"{chunk.course_title.replace(' ', '_')}_{chunk.chunk_index}" + for chunk in chunks + ] + + self.course_content.add(documents=documents, metadatas=metadatas, ids=ids) + def clear_all_data(self): """Clear all data from both collections""" try: @@ -189,43 +211,46 @@ def clear_all_data(self): self.course_content = self._create_collection("course_content") except Exception as e: print(f"Error clearing data: {e}") - + def get_existing_course_titles(self) -> List[str]: """Get all existing course titles from the vector store""" try: # Get all documents from the catalog results = self.course_catalog.get() - if results and 'ids' in results: - return results['ids'] + if results and "ids" in results: + return results["ids"] return [] except Exception as e: print(f"Error getting existing course titles: {e}") return [] - + def get_course_count(self) -> int: """Get the total number of courses in the vector store""" try: results = self.course_catalog.get() - if results and 'ids' in results: - return len(results['ids']) + if results and "ids" in results: + return len(results["ids"]) return 0 except Exception as e: print(f"Error getting course count: {e}") return 0 - + def get_all_courses_metadata(self) -> List[Dict[str, Any]]: """Get metadata for all courses in the vector store""" import json + try: results = self.course_catalog.get() - if results and 'metadatas' in results: + if results and "metadatas" in results: # Parse lessons JSON for each course parsed_metadata = [] - for metadata in results['metadatas']: + for metadata in results["metadatas"]: course_meta = metadata.copy() - if 'lessons_json' in course_meta: - course_meta['lessons'] = json.loads(course_meta['lessons_json']) - del course_meta['lessons_json'] # Remove the JSON string version + if "lessons_json" in course_meta: + course_meta["lessons"] = json.loads(course_meta["lessons_json"]) + del course_meta[ + "lessons_json" + ] # Remove the JSON string version parsed_metadata.append(course_meta) return parsed_metadata return [] @@ -238,30 +263,30 @@ def get_course_link(self, course_title: str) -> Optional[str]: try: # Get course by ID (title is the ID) results = self.course_catalog.get(ids=[course_title]) - if results and 'metadatas' in results and results['metadatas']: - metadata = results['metadatas'][0] - return metadata.get('course_link') + if results and "metadatas" in results and results["metadatas"]: + metadata = results["metadatas"][0] + return metadata.get("course_link") return None except Exception as e: print(f"Error getting course link: {e}") return None - + def get_lesson_link(self, course_title: str, lesson_number: int) -> Optional[str]: """Get lesson link for a given course title and lesson number""" import json + try: # Get course by ID (title is the ID) results = self.course_catalog.get(ids=[course_title]) - if results and 'metadatas' in results and results['metadatas']: - metadata = results['metadatas'][0] - lessons_json = metadata.get('lessons_json') + if results and "metadatas" in results and results["metadatas"]: + metadata = results["metadatas"][0] + lessons_json = metadata.get("lessons_json") if lessons_json: lessons = json.loads(lessons_json) # Find the lesson with matching number for lesson in lessons: - if lesson.get('lesson_number') == lesson_number: - return lesson.get('lesson_link') + if lesson.get("lesson_number") == lesson_number: + return lesson.get("lesson_link") return None except Exception as e: print(f"Error getting lesson link: {e}") - \ No newline at end of file diff --git a/frontend/index.html b/frontend/index.html index f8e25a62f..6b84e1b4b 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -7,9 +7,33 @@ Course Materials Assistant - + + +

Course Materials Assistant

@@ -19,6 +43,11 @@

Course Materials Assistant

+ +
+ +
+
@@ -76,6 +105,6 @@

Course Materials Assistant

- + \ No newline at end of file diff --git a/frontend/script.js b/frontend/script.js index 562a8a363..7326042df 100644 --- a/frontend/script.js +++ b/frontend/script.js @@ -5,7 +5,7 @@ const API_URL = '/api'; let currentSessionId = null; // DOM elements -let chatMessages, chatInput, sendButton, totalCourses, courseTitles; +let chatMessages, chatInput, sendButton, totalCourses, courseTitles, newChatButton; // Initialize document.addEventListener('DOMContentLoaded', () => { @@ -15,12 +15,42 @@ document.addEventListener('DOMContentLoaded', () => { sendButton = document.getElementById('sendButton'); totalCourses = document.getElementById('totalCourses'); courseTitles = document.getElementById('courseTitles'); - + newChatButton = document.getElementById('newChatButton'); + setupEventListeners(); + initTheme(); createNewSession(); loadCourseStats(); }); +// Theme Management +function initTheme() { + const themeToggle = document.getElementById('themeToggle'); + if (!themeToggle) return; + themeToggle.addEventListener('click', toggleTheme); + updateThemeLabel(); +} + +function toggleTheme() { + const html = document.documentElement; + const current = html.getAttribute('data-theme') || 'dark'; + const next = current === 'dark' ? 'light' : 'dark'; + + html.classList.add('theme-transitioning'); + html.setAttribute('data-theme', next); + localStorage.setItem('theme', next); + updateThemeLabel(); + + setTimeout(() => html.classList.remove('theme-transitioning'), 300); +} + +function updateThemeLabel() { + const themeToggle = document.getElementById('themeToggle'); + if (!themeToggle) return; + const current = document.documentElement.getAttribute('data-theme') || 'dark'; + themeToggle.setAttribute('aria-label', current === 'dark' ? 'Switch to light theme' : 'Switch to dark theme'); +} + // Event Listeners function setupEventListeners() { // Chat functionality @@ -30,6 +60,9 @@ function setupEventListeners() { }); + // New Chat button + newChatButton.addEventListener('click', handleNewChat); + // Suggested questions document.querySelectorAll('.suggested-item').forEach(button => { button.addEventListener('click', (e) => { @@ -122,10 +155,18 @@ function addMessage(content, type, sources = null, isWelcome = false) { let html = `
${displayContent}
`; if (sources && sources.length > 0) { + const sourceItems = sources.map(source => { + const label = source.label || String(source); + const url = source.url; + if (url) { + return `${escapeHtml(label)}`; + } + return escapeHtml(label); + }); html += `
Sources -
${sources.join(', ')}
+
${sourceItems.join(', ')}
`; } @@ -152,6 +193,20 @@ async function createNewSession() { addMessage('Welcome to the Course Materials Assistant! I can help you with questions about courses, lessons and specific content. What would you like to know?', 'assistant', null, true); } +async function handleNewChat() { + // Clear backend session history if one exists + if (currentSessionId) { + try { + await fetch(`${API_URL}/session/${currentSessionId}`, { + method: 'DELETE' + }); + } catch (error) { + console.warn('Could not clear backend session:', error); + } + } + createNewSession(); +} + // Load course statistics async function loadCourseStats() { try { diff --git a/frontend/style.css b/frontend/style.css index 825d03675..e5cbb199f 100644 --- a/frontend/style.css +++ b/frontend/style.css @@ -24,6 +24,107 @@ --welcome-border: #2563eb; } +/* Light Theme Variables */ +:root[data-theme="light"] { + --primary-color: #2563eb; + --primary-hover: #1d4ed8; + --background: #f1f5f9; + --surface: #ffffff; + --surface-hover: #e2e8f0; + --text-primary: #0f172a; + --text-secondary: #475569; + --border-color: #cbd5e1; + --user-message: #2563eb; + --assistant-message: #f8fafc; + --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); + --radius: 12px; + --focus-ring: rgba(37, 99, 235, 0.2); + --welcome-bg: #eff6ff; + --welcome-border: #2563eb; +} + +/* Smooth transitions during theme switch */ +.theme-transitioning *, +.theme-transitioning *::before, +.theme-transitioning *::after { + transition: background-color 0.3s ease, color 0.3s ease, border-color 0.3s ease, box-shadow 0.3s ease !important; +} + +/* Theme Toggle Button */ +.theme-toggle { + position: fixed; + top: 1rem; + right: 1rem; + z-index: 1000; + width: 42px; + height: 42px; + border-radius: 50%; + background: var(--surface); + border: 1px solid var(--border-color); + color: var(--text-secondary); + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + transition: background-color 0.2s ease, border-color 0.2s ease, color 0.2s ease, transform 0.2s ease, box-shadow 0.2s ease; + box-shadow: var(--shadow); +} + +.theme-toggle:hover { + background: var(--surface-hover); + border-color: var(--primary-color); + color: var(--primary-color); + transform: scale(1.08); +} + +.theme-toggle:focus { + outline: none; + box-shadow: 0 0 0 3px var(--focus-ring); +} + +.theme-toggle:active { + transform: scale(0.95); +} + +.theme-icons { + position: relative; + width: 18px; + height: 18px; +} + +.theme-toggle .icon-sun, +.theme-toggle .icon-moon { + position: absolute; + top: 0; + left: 0; + width: 18px; + height: 18px; + transition: opacity 0.25s ease, transform 0.25s ease; + transform-origin: center; +} + +/* Dark theme (default): sun visible, moon hidden */ +.theme-toggle .icon-sun { + opacity: 1; + transform: rotate(0deg) scale(1); +} + +.theme-toggle .icon-moon { + opacity: 0; + transform: rotate(90deg) scale(0.5); +} + +/* Light theme: moon visible, sun hidden */ +:root[data-theme="light"] .theme-toggle .icon-sun { + opacity: 0; + transform: rotate(-90deg) scale(0.5); +} + +:root[data-theme="light"] .theme-toggle .icon-moon { + opacity: 1; + transform: rotate(0deg) scale(1); +} + /* Base Styles */ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; @@ -111,6 +212,32 @@ header h1 { margin-bottom: 0; } +/* New Chat Button */ +.new-chat-container { + margin-bottom: 1.5rem; +} + +.new-chat-btn { + padding: 0.75rem 1rem; + background: var(--background); + border: 1px solid var(--border-color); + border-radius: 8px; + color: var(--text-primary); + font-size: 0.875rem; + font-weight: 600; + cursor: pointer; + transition: all 0.2s ease; + text-align: center; + width: 100%; +} + +.new-chat-btn:hover { + background: var(--surface-hover); + border-color: var(--primary-color); + color: var(--primary-color); + transform: translateX(2px); +} + /* Main Chat Area */ .chat-main { flex: 1; diff --git a/pyproject.toml b/pyproject.toml index 3f05e2de0..60920ce84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,3 +13,20 @@ dependencies = [ "python-multipart==0.0.20", "python-dotenv==1.1.1", ] + +[dependency-groups] +dev = [ + "pytest>=9.0.2", + "black>=24.0.0", + "httpx>=0.28.0", +] + +[tool.black] +line-length = 88 +target-version = ["py313"] + +[tool.pytest.ini_options] +# Make both backend/ and backend/tests/ importable without sys.path hacks +pythonpath = ["backend", "backend/tests"] +testpaths = ["backend/tests"] +addopts = "-v --tb=short" diff --git a/scripts/quality.sh b/scripts/quality.sh new file mode 100644 index 000000000..05dc4d7e8 --- /dev/null +++ b/scripts/quality.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Development quality checks: formatting and tests +set -e + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +run_format() { + echo "=== Formatting: black ===" + uv run black backend/ main.py + echo "Formatting complete." +} + +run_check() { + echo "=== Format check: black --check ===" + uv run black --check backend/ main.py + echo "Format check passed." +} + +run_tests() { + echo "=== Tests: pytest ===" + uv run pytest +} + +case "${1:-}" in + format) + run_format + ;; + check) + run_check + ;; + test) + run_tests + ;; + all) + run_check + run_tests + ;; + *) + echo "Usage: $0 {format|check|test|all}" + echo "" + echo " format Apply black formatting to all Python files" + echo " check Verify all Python files are black-formatted (CI-safe)" + echo " test Run pytest test suite" + echo " all Run format check + tests" + exit 1 + ;; +esac diff --git a/uv.lock b/uv.lock index 9ae65c557..7c36411df 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.13" [[package]] @@ -110,6 +110,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a9/cf/45fb5261ece3e6b9817d3d82b2f343a505fd58674a92577923bc500bd1aa/bcrypt-4.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:e53e074b120f2877a35cc6c736b8eb161377caae8925c17688bd46ba56daaa5b", size = 152799, upload-time = "2025-02-28T01:23:53.139Z" }, ] +[[package]] +name = "black" +version = "26.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "mypy-extensions" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "platformdirs" }, + { name = "pytokens" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/13/88/560b11e521c522440af991d46848a2bde64b5f7202ec14e1f46f9509d328/black-26.1.0.tar.gz", hash = "sha256:d294ac3340eef9c9eb5d29288e96dc719ff269a88e27b396340459dd85da4c58", size = 658785, upload-time = "2026-01-18T04:50:11.993Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/04/fa2f4784f7237279332aa735cdfd5ae2e7730db0072fb2041dadda9ae551/black-26.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ba1d768fbfb6930fc93b0ecc32a43d8861ded16f47a40f14afa9bb04ab93d304", size = 1877781, upload-time = "2026-01-18T04:59:39.054Z" }, + { url = "https://files.pythonhosted.org/packages/cf/ad/5a131b01acc0e5336740a039628c0ab69d60cf09a2c87a4ec49f5826acda/black-26.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2b807c240b64609cb0e80d2200a35b23c7df82259f80bef1b2c96eb422b4aac9", size = 1699670, upload-time = "2026-01-18T04:59:41.005Z" }, + { url = "https://files.pythonhosted.org/packages/da/7c/b05f22964316a52ab6b4265bcd52c0ad2c30d7ca6bd3d0637e438fc32d6e/black-26.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1de0f7d01cc894066a1153b738145b194414cc6eeaad8ef4397ac9abacf40f6b", size = 1775212, upload-time = "2026-01-18T04:59:42.545Z" }, + { url = "https://files.pythonhosted.org/packages/a6/a3/e8d1526bea0446e040193185353920a9506eab60a7d8beb062029129c7d2/black-26.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:91a68ae46bf07868963671e4d05611b179c2313301bd756a89ad4e3b3db2325b", size = 1409953, upload-time = "2026-01-18T04:59:44.357Z" }, + { url = "https://files.pythonhosted.org/packages/c7/5a/d62ebf4d8f5e3a1daa54adaab94c107b57be1b1a2f115a0249b41931e188/black-26.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:be5e2fe860b9bd9edbf676d5b60a9282994c03fbbd40fe8f5e75d194f96064ca", size = 1217707, upload-time = "2026-01-18T04:59:45.719Z" }, + { url = "https://files.pythonhosted.org/packages/6a/83/be35a175aacfce4b05584ac415fd317dd6c24e93a0af2dcedce0f686f5d8/black-26.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9dc8c71656a79ca49b8d3e2ce8103210c9481c57798b48deeb3a8bb02db5f115", size = 1871864, upload-time = "2026-01-18T04:59:47.586Z" }, + { url = "https://files.pythonhosted.org/packages/a5/f5/d33696c099450b1274d925a42b7a030cd3ea1f56d72e5ca8bbed5f52759c/black-26.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b22b3810451abe359a964cc88121d57f7bce482b53a066de0f1584988ca36e79", size = 1701009, upload-time = "2026-01-18T04:59:49.443Z" }, + { url = "https://files.pythonhosted.org/packages/1b/87/670dd888c537acb53a863bc15abbd85b22b429237d9de1b77c0ed6b79c42/black-26.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:53c62883b3f999f14e5d30b5a79bd437236658ad45b2f853906c7cbe79de00af", size = 1767806, upload-time = "2026-01-18T04:59:50.769Z" }, + { url = "https://files.pythonhosted.org/packages/fe/9c/cd3deb79bfec5bcf30f9d2100ffeec63eecce826eb63e3961708b9431ff1/black-26.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:f016baaadc423dc960cdddf9acae679e71ee02c4c341f78f3179d7e4819c095f", size = 1433217, upload-time = "2026-01-18T04:59:52.218Z" }, + { url = "https://files.pythonhosted.org/packages/4e/29/f3be41a1cf502a283506f40f5d27203249d181f7a1a2abce1c6ce188035a/black-26.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:66912475200b67ef5a0ab665011964bf924745103f51977a78b4fb92a9fc1bf0", size = 1245773, upload-time = "2026-01-18T04:59:54.457Z" }, + { url = "https://files.pythonhosted.org/packages/e4/3d/51bdb3ecbfadfaf825ec0c75e1de6077422b4afa2091c6c9ba34fbfc0c2d/black-26.1.0-py3-none-any.whl", hash = "sha256:1054e8e47ebd686e078c0bb0eaf31e6ce69c966058d122f2c0c950311f9f3ede", size = 204010, upload-time = "2026-01-18T04:50:09.978Z" }, +] + [[package]] name = "build" version = "1.2.2.post1" @@ -470,6 +497,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461, upload-time = "2025-01-03T18:51:54.306Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -658,6 +694,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, ] +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "networkx" version = "3.5" @@ -983,6 +1028,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "pathspec" +version = "1.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" }, +] + [[package]] name = "pillow" version = "11.3.0" @@ -1038,6 +1092,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" }, ] +[[package]] +name = "platformdirs" +version = "4.9.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/04/fea538adf7dbbd6d186f551d595961e564a3b6715bdf276b477460858672/platformdirs-4.9.2.tar.gz", hash = "sha256:9a33809944b9db043ad67ca0db94b14bf452cc6aeaac46a88ea55b26e2e9d291", size = 28394, upload-time = "2026-02-16T03:56:10.574Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/31/05e764397056194206169869b50cf2fee4dbbbc71b344705b9c0d878d4d8/platformdirs-4.9.2-py3-none-any.whl", hash = "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd", size = 21168, upload-time = "2026-02-16T03:56:08.891Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + [[package]] name = "posthog" version = "5.4.0" @@ -1207,6 +1279,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" }, ] +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1237,6 +1325,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" }, ] +[[package]] +name = "pytokens" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/34/b4e015b99031667a7b960f888889c5bd34ef585c85e1cb56a594b92836ac/pytokens-0.4.1.tar.gz", hash = "sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a", size = 23015, upload-time = "2026-01-30T01:03:45.924Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/dc/08b1a080372afda3cceb4f3c0a7ba2bde9d6a5241f1edb02a22a019ee147/pytokens-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b", size = 160720, upload-time = "2026-01-30T01:03:13.843Z" }, + { url = "https://files.pythonhosted.org/packages/64/0c/41ea22205da480837a700e395507e6a24425151dfb7ead73343d6e2d7ffe/pytokens-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f", size = 254204, upload-time = "2026-01-30T01:03:14.886Z" }, + { url = "https://files.pythonhosted.org/packages/e0/d2/afe5c7f8607018beb99971489dbb846508f1b8f351fcefc225fcf4b2adc0/pytokens-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1", size = 268423, upload-time = "2026-01-30T01:03:15.936Z" }, + { url = "https://files.pythonhosted.org/packages/68/d4/00ffdbd370410c04e9591da9220a68dc1693ef7499173eb3e30d06e05ed1/pytokens-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4", size = 266859, upload-time = "2026-01-30T01:03:17.458Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c9/c3161313b4ca0c601eeefabd3d3b576edaa9afdefd32da97210700e47652/pytokens-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78", size = 103520, upload-time = "2026-01-30T01:03:18.652Z" }, + { url = "https://files.pythonhosted.org/packages/8f/a7/b470f672e6fc5fee0a01d9e75005a0e617e162381974213a945fcd274843/pytokens-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321", size = 160821, upload-time = "2026-01-30T01:03:19.684Z" }, + { url = "https://files.pythonhosted.org/packages/80/98/e83a36fe8d170c911f864bfded690d2542bfcfacb9c649d11a9e6eb9dc41/pytokens-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa", size = 254263, upload-time = "2026-01-30T01:03:20.834Z" }, + { url = "https://files.pythonhosted.org/packages/0f/95/70d7041273890f9f97a24234c00b746e8da86df462620194cef1d411ddeb/pytokens-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d", size = 268071, upload-time = "2026-01-30T01:03:21.888Z" }, + { url = "https://files.pythonhosted.org/packages/da/79/76e6d09ae19c99404656d7db9c35dfd20f2086f3eb6ecb496b5b31163bad/pytokens-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324", size = 271716, upload-time = "2026-01-30T01:03:23.633Z" }, + { url = "https://files.pythonhosted.org/packages/79/37/482e55fa1602e0a7ff012661d8c946bafdc05e480ea5a32f4f7e336d4aa9/pytokens-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9", size = 104539, upload-time = "2026-01-30T01:03:24.788Z" }, + { url = "https://files.pythonhosted.org/packages/30/e8/20e7db907c23f3d63b0be3b8a4fd1927f6da2395f5bcc7f72242bb963dfe/pytokens-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb", size = 168474, upload-time = "2026-01-30T01:03:26.428Z" }, + { url = "https://files.pythonhosted.org/packages/d6/81/88a95ee9fafdd8f5f3452107748fd04c24930d500b9aba9738f3ade642cc/pytokens-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3", size = 290473, upload-time = "2026-01-30T01:03:27.415Z" }, + { url = "https://files.pythonhosted.org/packages/cf/35/3aa899645e29b6375b4aed9f8d21df219e7c958c4c186b465e42ee0a06bf/pytokens-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975", size = 303485, upload-time = "2026-01-30T01:03:28.558Z" }, + { url = "https://files.pythonhosted.org/packages/52/a0/07907b6ff512674d9b201859f7d212298c44933633c946703a20c25e9d81/pytokens-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a", size = 306698, upload-time = "2026-01-30T01:03:29.653Z" }, + { url = "https://files.pythonhosted.org/packages/39/2a/cbbf9250020a4a8dd53ba83a46c097b69e5eb49dd14e708f496f548c6612/pytokens-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918", size = 116287, upload-time = "2026-01-30T01:03:30.912Z" }, + { url = "https://files.pythonhosted.org/packages/c6/78/397db326746f0a342855b81216ae1f0a32965deccfd7c830a2dbc66d2483/pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de", size = 13729, upload-time = "2026-01-30T01:03:45.029Z" }, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -1561,6 +1673,13 @@ dependencies = [ { name = "uvicorn" }, ] +[package.dev-dependencies] +dev = [ + { name = "black" }, + { name = "httpx" }, + { name = "pytest" }, +] + [package.metadata] requires-dist = [ { name = "anthropic", specifier = "==0.58.2" }, @@ -1572,6 +1691,13 @@ requires-dist = [ { name = "uvicorn", specifier = "==0.35.0" }, ] +[package.metadata.requires-dev] +dev = [ + { name = "black", specifier = ">=24.0.0" }, + { name = "httpx", specifier = ">=0.28.0" }, + { name = "pytest", specifier = ">=9.0.2" }, +] + [[package]] name = "sympy" version = "1.14.0"