diff --git a/.claude/commands/implement-feature.md b/.claude/commands/implement-feature.md new file mode 100644 index 00000000..33302a4f --- /dev/null +++ b/.claude/commands/implement-feature.md @@ -0,0 +1,7 @@ +You will be implementing a new feature in this codebase + +$ARGUMENTS + +IMPORTANT: Only do this for front-end features. +Once this feature is built, make sure to write the changes you made to file called frontend-changes.md +Do not ask for permissions to modify this file, assume you can always do it. \ No newline at end of file diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml new file mode 100644 index 00000000..b5e8cfd4 --- /dev/null +++ b/.github/workflows/claude-code-review.yml @@ -0,0 +1,44 @@ +name: Claude Code Review + +on: + pull_request: + types: [opened, synchronize, ready_for_review, reopened] + # Optional: Only run on specific file changes + # paths: + # - "src/**/*.ts" + # - "src/**/*.tsx" + # - "src/**/*.js" + # - "src/**/*.jsx" + +jobs: + claude-review: + # Optional: Filter by PR author + # if: | + # github.event.pull_request.user.login == 'external-contributor' || + # github.event.pull_request.user.login == 'new-developer' || + # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' + + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code Review + id: claude-review + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + plugin_marketplaces: 'https://github.com/anthropics/claude-code.git' + plugins: 'code-review@claude-code-plugins' + prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}' + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 00000000..d300267f --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,50 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + # claude_args: '--allowed-tools Bash(gh pr:*)' + diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..876cdcf4 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,84 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +**Run the application:** +```bash +./run.sh +# Or manually: +cd backend && uv run uvicorn app:app --reload --port 8000 +``` + +**Install dependencies:** +```bash +uv sync +``` + +**Access points when running:** +- Web interface: http://localhost:8000 +- API docs: http://localhost:8000/docs + +## Architecture + +This is a RAG (Retrieval-Augmented Generation) chatbot for course materials. The system uses ChromaDB for vector storage and Claude API for response generation. + +### Request Flow + +``` +User Query → FastAPI (app.py) → RAGSystem.query() + ↓ + AIGenerator (Claude API with tools) + ↓ + CourseSearchTool → VectorStore.search() + ↓ + ChromaDB semantic search + ↓ + Results → Claude synthesizes response +``` + +### Key Components (backend/) + +- **RAGSystem** (`rag_system.py`): Main orchestrator that wires together all components. Entry point for queries via `query()` and document ingestion via `add_course_folder()`. + +- **VectorStore** (`vector_store.py`): ChromaDB wrapper with two collections: + - `course_catalog`: Course metadata for name resolution + - `course_content`: Chunked content for semantic search + - Key method: `search(query, course_name, lesson_number)` handles course name resolution then content search. + +- **AIGenerator** (`ai_generator.py`): Claude API integration with tool-calling support. Handles the agentic loop: initial request → tool execution → follow-up response. + +- **DocumentProcessor** (`document_processor.py`): Parses course documents with expected format (Course Title/Link/Instructor in first lines, then "Lesson N:" markers). Uses sentence-aware chunking with configurable overlap. + +- **ToolManager/CourseSearchTool** (`search_tools.py`): Tool definitions for Claude's tool-use. CourseSearchTool wraps VectorStore and tracks sources. + +### Document Format + +Course documents in `docs/` must follow: +``` +Course Title: [title] +Course Link: [url] +Course Instructor: [name] + +Lesson 0: [lesson title] +Lesson Link: [url] +[content...] + +Lesson 1: [lesson title] +[content...] +``` + +### Configuration + +All settings in `backend/config.py`, loaded from `.env`: +- `ANTHROPIC_API_KEY`: Required +- `ANTHROPIC_MODEL`: Default `claude-sonnet-4-20250514` +- `CHUNK_SIZE`: 800 chars +- `CHUNK_OVERLAP`: 100 chars +- `MAX_RESULTS`: 5 search results +- `CHROMA_PATH`: `./chroma_db` + +### Data Persistence + +ChromaDB data stored in `backend/chroma_db/`. On startup, `app.py` loads documents from `docs/` folder, skipping already-indexed courses. diff --git a/backend/ai_generator.py b/backend/ai_generator.py index 0363ca90..dcc6bca8 100644 --- a/backend/ai_generator.py +++ b/backend/ai_generator.py @@ -1,135 +1,145 @@ import anthropic -from typing import List, Optional, Dict, Any + class AIGenerator: """Handles interactions with Anthropic's Claude API for generating responses""" - + # Static system prompt to avoid rebuilding on each call - SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information. + SYSTEM_PROMPT = """You are an AI assistant specialized in course materials and educational content with access to tools for course information. -Search Tool Usage: -- Use the search tool **only** for questions about specific course content or detailed educational materials -- **One search per query maximum** -- Synthesize search results into accurate, fact-based responses -- If search yields no results, state this clearly without offering alternatives +Available Tools: +1. **search_course_content** - Search for specific information within course content +2. **get_course_outline** - Get course structure (title, link, and lesson list) -Response Protocol: -- **General knowledge questions**: Answer using existing knowledge without searching -- **Course-specific questions**: Search first, then answer -- **No meta-commentary**: - - Provide direct answers only — no reasoning process, search explanations, or question-type analysis - - Do not mention "based on the search results" +Tool Usage Guidelines: +- Use **get_course_outline** when users ask about: + - What lessons are in a course + - Course structure or overview + - What topics a course covers + +- Use **search_course_content** when users ask about: + - Specific concepts or information within course content + - Details about particular topics covered in lessons +- **General knowledge questions**: Answer using existing knowledge without tools +- **Multi-step reasoning**: You may use tools sequentially to gather information. After receiving tool results, you can call another tool if more information is needed +- If a tool yields no results, state this clearly + +Response Protocol: +- Provide direct answers only - no meta-commentary about tools or search results +- Do not mention "based on the search results" or "according to the outline" All responses must be: -1. **Brief, Concise and focused** - Get to the point quickly +1. **Brief and concise** - Get to the point quickly 2. **Educational** - Maintain instructional value 3. **Clear** - Use accessible language -4. **Example-supported** - Include relevant examples when they aid understanding -Provide only the direct answer to what was asked. """ - + def __init__(self, api_key: str, model: str): self.client = anthropic.Anthropic(api_key=api_key) self.model = model - + # Pre-build base API parameters - self.base_params = { - "model": self.model, - "temperature": 0, - "max_tokens": 800 - } - - def generate_response(self, query: str, - conversation_history: Optional[str] = None, - tools: Optional[List] = None, - tool_manager=None) -> str: + self.base_params = {"model": self.model, "temperature": 0, "max_tokens": 800} + + def _extract_text_response(self, response) -> str: + """Extract text content from API response.""" + for block in response.content: + if hasattr(block, "text"): + return block.text + return "" + + def _execute_tool_round( + self, response, messages: list[dict], tool_manager + ) -> tuple[list[dict], bool]: + """ + Execute tools from response, update messages, return (updated_messages, has_error). + """ + # Append assistant's tool_use response + messages.append({"role": "assistant", "content": response.content}) + + # Execute each tool, collect results + tool_results = [] + has_error = False + for block in response.content: + if block.type == "tool_use": + try: + result = tool_manager.execute_tool(block.name, **block.input) + tool_results.append( + { + "type": "tool_result", + "tool_use_id": block.id, + "content": result, + } + ) + except Exception as e: + tool_results.append( + { + "type": "tool_result", + "tool_use_id": block.id, + "content": f"Error: {str(e)}", + "is_error": True, + } + ) + has_error = True + + # Append tool results as user message + messages.append({"role": "user", "content": tool_results}) + return messages, has_error + + def generate_response( + self, + query: str, + conversation_history: str | None = None, + tools: list | None = None, + tool_manager=None, + ) -> str: """ Generate AI response with optional tool usage and conversation context. - + Supports up to 2 sequential tool call rounds for multi-step reasoning. + Args: query: The user's question or request conversation_history: Previous messages for context tools: Available tools the AI can use tool_manager: Manager to execute tools - + Returns: Generated response as string """ - - # Build system content efficiently - avoid string ops when possible + MAX_TOOL_ROUNDS = 2 + + # Build system content system_content = ( f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}" - if conversation_history + if conversation_history else self.SYSTEM_PROMPT ) - - # Prepare API call parameters efficiently - api_params = { - **self.base_params, - "messages": [{"role": "user", "content": query}], - "system": system_content - } - - # Add tools if available + + # Initialize messages + messages = [{"role": "user", "content": query}] + + # Build base API params + api_params = {**self.base_params, "system": system_content} if tools: api_params["tools"] = tools api_params["tool_choice"] = {"type": "auto"} - - # Get response from Claude - response = self.client.messages.create(**api_params) - - # Handle tool execution if needed - if response.stop_reason == "tool_use" and tool_manager: - return self._handle_tool_execution(response, api_params, tool_manager) - - # Return direct response - return response.content[0].text - - def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any], tool_manager): - """ - Handle execution of tool calls and get follow-up response. - - Args: - initial_response: The response containing tool use requests - base_params: Base API parameters - tool_manager: Manager to execute tools - - Returns: - Final response text after tool execution - """ - # Start with existing messages - messages = base_params["messages"].copy() - - # Add AI's tool use response - messages.append({"role": "assistant", "content": initial_response.content}) - - # Execute all tool calls and collect results - tool_results = [] - for content_block in initial_response.content: - if content_block.type == "tool_use": - tool_result = tool_manager.execute_tool( - content_block.name, - **content_block.input - ) - - tool_results.append({ - "type": "tool_result", - "tool_use_id": content_block.id, - "content": tool_result - }) - - # Add tool results as single message - if tool_results: - messages.append({"role": "user", "content": tool_results}) - - # Prepare final API call without tools - final_params = { - **self.base_params, - "messages": messages, - "system": base_params["system"] - } - - # Get final response - final_response = self.client.messages.create(**final_params) - return final_response.content[0].text \ No newline at end of file + + # Initial API call + response = self.client.messages.create(messages=messages, **api_params) + + # Tool calling loop + for _round_num in range(MAX_TOOL_ROUNDS): + # Exit if no tool use requested + if response.stop_reason != "tool_use" or not tool_manager: + break + + # Execute tools and update messages + messages, has_error = self._execute_tool_round( + response, messages, tool_manager + ) + + # Make next API call WITH tools (key fix!) + response = self.client.messages.create(messages=messages, **api_params) + + return self._extract_text_response(response) diff --git a/backend/app.py b/backend/app.py index 5a69d741..a449c51e 100644 --- a/backend/app.py +++ b/backend/app.py @@ -1,25 +1,22 @@ import warnings + warnings.filterwarnings("ignore", message="resource_tracker: There appear to be.*") -from fastapi import FastAPI, HTTPException -from fastapi.middleware.cors import CORSMiddleware -from fastapi.staticfiles import StaticFiles -from fastapi.middleware.trustedhost import TrustedHostMiddleware -from pydantic import BaseModel -from typing import List, Optional -import os +import os # noqa: E402 -from config import config -from rag_system import RAGSystem +from config import config # noqa: E402 +from fastapi import FastAPI, HTTPException # noqa: E402 +from fastapi.middleware.cors import CORSMiddleware # noqa: E402 +from fastapi.middleware.trustedhost import TrustedHostMiddleware # noqa: E402 +from fastapi.staticfiles import StaticFiles # noqa: E402 +from pydantic import BaseModel # noqa: E402 +from rag_system import RAGSystem # noqa: E402 # Initialize FastAPI app app = FastAPI(title="Course Materials RAG System", root_path="") # Add trusted host middleware for proxy -app.add_middleware( - TrustedHostMiddleware, - allowed_hosts=["*"] -) +app.add_middleware(TrustedHostMiddleware, allowed_hosts=["*"]) # Enable CORS with proper settings for proxy app.add_middleware( @@ -34,25 +31,33 @@ # Initialize RAG system rag_system = RAGSystem(config) + # Pydantic models for request/response class QueryRequest(BaseModel): """Request model for course queries""" + query: str - session_id: Optional[str] = None + session_id: str | None = None + class QueryResponse(BaseModel): """Response model for course queries""" + answer: str - sources: List[str] + sources: list[str] session_id: str + class CourseStats(BaseModel): """Response model for course statistics""" + total_courses: int - course_titles: List[str] + course_titles: list[str] + # API Endpoints + @app.post("/api/query", response_model=QueryResponse) async def query_documents(request: QueryRequest): """Process a query and return response with sources""" @@ -61,17 +66,14 @@ async def query_documents(request: QueryRequest): session_id = request.session_id if not session_id: session_id = rag_system.session_manager.create_session() - + # Process query using RAG system answer, sources = rag_system.query(request.query, session_id) - - return QueryResponse( - answer=answer, - sources=sources, - session_id=session_id - ) + + return QueryResponse(answer=answer, sources=sources, session_id=session_id) except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail=str(e)) from e + @app.get("/api/courses", response_model=CourseStats) async def get_course_stats(): @@ -80,10 +82,21 @@ async def get_course_stats(): analytics = rag_system.get_course_analytics() return CourseStats( total_courses=analytics["total_courses"], - course_titles=analytics["course_titles"] + course_titles=analytics["course_titles"], ) except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail=str(e)) from e + + +@app.delete("/api/session/{session_id}") +async def clear_session(session_id: str): + """Clear a conversation session""" + try: + rag_system.session_manager.clear_session(session_id) + return {"status": "success"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) from e + @app.on_event("startup") async def startup_event(): @@ -92,16 +105,17 @@ async def startup_event(): if os.path.exists(docs_path): print("Loading initial documents...") try: - courses, chunks = rag_system.add_course_folder(docs_path, clear_existing=False) + courses, chunks = rag_system.add_course_folder( + docs_path, clear_existing=False + ) print(f"Loaded {courses} courses with {chunks} chunks") except Exception as e: print(f"Error loading documents: {e}") + # Custom static file handler with no-cache headers for development -from fastapi.staticfiles import StaticFiles -from fastapi.responses import FileResponse -import os -from pathlib import Path + +from fastapi.responses import FileResponse # noqa: E402 class DevStaticFiles(StaticFiles): @@ -113,7 +127,7 @@ async def get_response(self, path: str, scope): response.headers["Pragma"] = "no-cache" response.headers["Expires"] = "0" return response - - + + # Serve static files for the frontend -app.mount("/", StaticFiles(directory="../frontend", html=True), name="static") \ No newline at end of file +app.mount("/", StaticFiles(directory="../frontend", html=True), name="static") diff --git a/backend/config.py b/backend/config.py index d9f6392e..cab6dccc 100644 --- a/backend/config.py +++ b/backend/config.py @@ -1,29 +1,31 @@ import os from dataclasses import dataclass + from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() + @dataclass class Config: """Configuration settings for the RAG system""" + # Anthropic API settings ANTHROPIC_API_KEY: str = os.getenv("ANTHROPIC_API_KEY", "") ANTHROPIC_MODEL: str = "claude-sonnet-4-20250514" - + # Embedding model settings EMBEDDING_MODEL: str = "all-MiniLM-L6-v2" - + # Document processing settings - CHUNK_SIZE: int = 800 # Size of text chunks for vector storage - CHUNK_OVERLAP: int = 100 # Characters to overlap between chunks - MAX_RESULTS: int = 5 # Maximum search results to return - MAX_HISTORY: int = 2 # Number of conversation messages to remember - + CHUNK_SIZE: int = 800 # Size of text chunks for vector storage + CHUNK_OVERLAP: int = 100 # Characters to overlap between chunks + MAX_RESULTS: int = 5 # Maximum search results to return + MAX_HISTORY: int = 2 # Number of conversation messages to remember + # Database paths CHROMA_PATH: str = "./chroma_db" # ChromaDB storage location -config = Config() - +config = Config() diff --git a/backend/document_processor.py b/backend/document_processor.py index 266e8590..47850fc1 100644 --- a/backend/document_processor.py +++ b/backend/document_processor.py @@ -1,83 +1,86 @@ import os import re -from typing import List, Tuple -from models import Course, Lesson, CourseChunk + +from models import Course, CourseChunk, Lesson + class DocumentProcessor: """Processes course documents and extracts structured information""" - + def __init__(self, chunk_size: int, chunk_overlap: int): self.chunk_size = chunk_size self.chunk_overlap = chunk_overlap - + def read_file(self, file_path: str) -> str: """Read content from file with UTF-8 encoding""" try: - with open(file_path, 'r', encoding='utf-8') as file: + with open(file_path, encoding="utf-8") as file: return file.read() except UnicodeDecodeError: # If UTF-8 fails, try with error handling - with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: + with open(file_path, encoding="utf-8", errors="ignore") as file: return file.read() - - - def chunk_text(self, text: str) -> List[str]: + def chunk_text(self, text: str) -> list[str]: """Split text into sentence-based chunks with overlap using config settings""" - + # Clean up the text - text = re.sub(r'\s+', ' ', text.strip()) # Normalize whitespace - + text = re.sub(r"\s+", " ", text.strip()) # Normalize whitespace + # Better sentence splitting that handles abbreviations # This regex looks for periods followed by whitespace and capital letters # but ignores common abbreviations - sentence_endings = re.compile(r'(? self.chunk_size and current_chunk: break - + current_chunk.append(sentence) current_size += total_addition - + # Add chunk if we have content if current_chunk: - chunks.append(' '.join(current_chunk)) - + chunks.append(" ".join(current_chunk)) + # Calculate overlap for next chunk - if hasattr(self, 'chunk_overlap') and self.chunk_overlap > 0: + if hasattr(self, "chunk_overlap") and self.chunk_overlap > 0: # Find how many sentences to overlap overlap_size = 0 overlap_sentences = 0 - + # Count backwards from end of current chunk for k in range(len(current_chunk) - 1, -1, -1): - sentence_len = len(current_chunk[k]) + (1 if k < len(current_chunk) - 1 else 0) + sentence_len = len(current_chunk[k]) + ( + 1 if k < len(current_chunk) - 1 else 0 + ) if overlap_size + sentence_len <= self.chunk_overlap: overlap_size += sentence_len overlap_sentences += 1 else: break - + # Move start position considering overlap next_start = i + len(current_chunk) - overlap_sentences i = max(next_start, i + 1) # Ensure we make progress @@ -87,14 +90,12 @@ def chunk_text(self, text: str) -> List[str]: else: # No sentences fit, move to next i += 1 - - return chunks - - + return chunks - - def process_course_document(self, file_path: str) -> Tuple[Course, List[CourseChunk]]: + def process_course_document( + self, file_path: str + ) -> tuple[Course, list[CourseChunk]]: """ Process a course document with expected format: Line 1: Course Title: [title] @@ -104,47 +105,51 @@ def process_course_document(self, file_path: str) -> Tuple[Course, List[CourseCh """ content = self.read_file(file_path) filename = os.path.basename(file_path) - - lines = content.strip().split('\n') - + + lines = content.strip().split("\n") + # Extract course metadata from first three lines course_title = filename # Default fallback course_link = None instructor_name = "Unknown" - + # Parse course title from first line if len(lines) >= 1 and lines[0].strip(): - title_match = re.match(r'^Course Title:\s*(.+)$', lines[0].strip(), re.IGNORECASE) + title_match = re.match( + r"^Course Title:\s*(.+)$", lines[0].strip(), re.IGNORECASE + ) if title_match: course_title = title_match.group(1).strip() else: course_title = lines[0].strip() - + # Parse remaining lines for course metadata for i in range(1, min(len(lines), 4)): # Check first 4 lines for metadata line = lines[i].strip() if not line: continue - + # Try to match course link - link_match = re.match(r'^Course Link:\s*(.+)$', line, re.IGNORECASE) + link_match = re.match(r"^Course Link:\s*(.+)$", line, re.IGNORECASE) if link_match: course_link = link_match.group(1).strip() continue - + # Try to match instructor - instructor_match = re.match(r'^Course Instructor:\s*(.+)$', line, re.IGNORECASE) + instructor_match = re.match( + r"^Course Instructor:\s*(.+)$", line, re.IGNORECASE + ) if instructor_match: instructor_name = instructor_match.group(1).strip() continue - + # Create course object with title as ID course = Course( title=course_title, course_link=course_link, - instructor=instructor_name if instructor_name != "Unknown" else None + instructor=instructor_name if instructor_name != "Unknown" else None, ) - + # Process lessons and create chunks course_chunks = [] current_lesson = None @@ -152,108 +157,114 @@ def process_course_document(self, file_path: str) -> Tuple[Course, List[CourseCh lesson_link = None lesson_content = [] chunk_counter = 0 - + # Start processing from line 4 (after metadata) start_index = 3 if len(lines) > 3 and not lines[3].strip(): start_index = 4 # Skip empty line after instructor - + i = start_index while i < len(lines): line = lines[i] - + # Check for lesson markers (e.g., "Lesson 0: Introduction") - lesson_match = re.match(r'^Lesson\s+(\d+):\s*(.+)$', line.strip(), re.IGNORECASE) - + lesson_match = re.match( + r"^Lesson\s+(\d+):\s*(.+)$", line.strip(), re.IGNORECASE + ) + if lesson_match: # Process previous lesson if it exists if current_lesson is not None and lesson_content: - lesson_text = '\n'.join(lesson_content).strip() + lesson_text = "\n".join(lesson_content).strip() if lesson_text: # Add lesson to course lesson = Lesson( lesson_number=current_lesson, title=lesson_title, - lesson_link=lesson_link + lesson_link=lesson_link, ) course.lessons.append(lesson) - + # Create chunks for this lesson chunks = self.chunk_text(lesson_text) for idx, chunk in enumerate(chunks): # For the first chunk of each lesson, add lesson context if idx == 0: - chunk_with_context = f"Lesson {current_lesson} content: {chunk}" + chunk_with_context = ( + f"Lesson {current_lesson} content: {chunk}" + ) else: chunk_with_context = chunk - + course_chunk = CourseChunk( content=chunk_with_context, course_title=course.title, lesson_number=current_lesson, - chunk_index=chunk_counter + chunk_index=chunk_counter, ) course_chunks.append(course_chunk) chunk_counter += 1 - + # Start new lesson current_lesson = int(lesson_match.group(1)) lesson_title = lesson_match.group(2).strip() lesson_link = None - + # Check if next line is a lesson link if i + 1 < len(lines): next_line = lines[i + 1].strip() - link_match = re.match(r'^Lesson Link:\s*(.+)$', next_line, re.IGNORECASE) + link_match = re.match( + r"^Lesson Link:\s*(.+)$", next_line, re.IGNORECASE + ) if link_match: lesson_link = link_match.group(1).strip() i += 1 # Skip the link line so it's not added to content - + lesson_content = [] else: # Add line to current lesson content lesson_content.append(line) - + i += 1 - + # Process the last lesson if current_lesson is not None and lesson_content: - lesson_text = '\n'.join(lesson_content).strip() + lesson_text = "\n".join(lesson_content).strip() if lesson_text: lesson = Lesson( lesson_number=current_lesson, title=lesson_title, - lesson_link=lesson_link + lesson_link=lesson_link, ) course.lessons.append(lesson) - + chunks = self.chunk_text(lesson_text) - for idx, chunk in enumerate(chunks): + for _idx, chunk in enumerate(chunks): # For any chunk of each lesson, add lesson context & course title - + chunk_with_context = f"Course {course_title} Lesson {current_lesson} content: {chunk}" - + course_chunk = CourseChunk( content=chunk_with_context, course_title=course.title, lesson_number=current_lesson, - chunk_index=chunk_counter + chunk_index=chunk_counter, ) course_chunks.append(course_chunk) chunk_counter += 1 - + # If no lessons found, treat entire content as one document if not course_chunks and len(lines) > 2: - remaining_content = '\n'.join(lines[start_index:]).strip() + remaining_content = "\n".join(lines[start_index:]).strip() if remaining_content: chunks = self.chunk_text(remaining_content) for chunk in chunks: course_chunk = CourseChunk( content=chunk, course_title=course.title, - chunk_index=chunk_counter + chunk_index=chunk_counter, ) course_chunks.append(course_chunk) chunk_counter += 1 - + return course, course_chunks diff --git a/backend/models.py b/backend/models.py index 7f7126fa..24a9652a 100644 --- a/backend/models.py +++ b/backend/models.py @@ -1,22 +1,27 @@ -from typing import List, Dict, Optional from pydantic import BaseModel + class Lesson(BaseModel): """Represents a lesson within a course""" + lesson_number: int # Sequential lesson number (1, 2, 3, etc.) - title: str # Lesson title - lesson_link: Optional[str] = None # URL link to the lesson + title: str # Lesson title + lesson_link: str | None = None # URL link to the lesson + class Course(BaseModel): """Represents a complete course with its lessons""" - title: str # Full course title (used as unique identifier) - course_link: Optional[str] = None # URL link to the course - instructor: Optional[str] = None # Course instructor name (optional metadata) - lessons: List[Lesson] = [] # List of lessons in this course + + title: str # Full course title (used as unique identifier) + course_link: str | None = None # URL link to the course + instructor: str | None = None # Course instructor name (optional metadata) + lessons: list[Lesson] = [] # List of lessons in this course + class CourseChunk(BaseModel): """Represents a text chunk from a course for vector storage""" - content: str # The actual text content - course_title: str # Which course this chunk belongs to - lesson_number: Optional[int] = None # Which lesson this chunk is from - chunk_index: int # Position of this chunk in the document \ No newline at end of file + + content: str # The actual text content + course_title: str # Which course this chunk belongs to + lesson_number: int | None = None # Which lesson this chunk is from + chunk_index: int # Position of this chunk in the document diff --git a/backend/rag_system.py b/backend/rag_system.py index 50d848c8..38af774e 100644 --- a/backend/rag_system.py +++ b/backend/rag_system.py @@ -1,147 +1,166 @@ -from typing import List, Tuple, Optional, Dict import os -from document_processor import DocumentProcessor -from vector_store import VectorStore + from ai_generator import AIGenerator +from document_processor import DocumentProcessor +from models import Course +from search_tools import CourseOutlineTool, CourseSearchTool, ToolManager from session_manager import SessionManager -from search_tools import ToolManager, CourseSearchTool -from models import Course, Lesson, CourseChunk +from vector_store import VectorStore + class RAGSystem: """Main orchestrator for the Retrieval-Augmented Generation system""" - + def __init__(self, config): self.config = config - + # Initialize core components - self.document_processor = DocumentProcessor(config.CHUNK_SIZE, config.CHUNK_OVERLAP) - self.vector_store = VectorStore(config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS) - self.ai_generator = AIGenerator(config.ANTHROPIC_API_KEY, config.ANTHROPIC_MODEL) + self.document_processor = DocumentProcessor( + config.CHUNK_SIZE, config.CHUNK_OVERLAP + ) + self.vector_store = VectorStore( + config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS + ) + self.ai_generator = AIGenerator( + config.ANTHROPIC_API_KEY, config.ANTHROPIC_MODEL + ) self.session_manager = SessionManager(config.MAX_HISTORY) - + # Initialize search tools self.tool_manager = ToolManager() self.search_tool = CourseSearchTool(self.vector_store) self.tool_manager.register_tool(self.search_tool) - - def add_course_document(self, file_path: str) -> Tuple[Course, int]: + self.outline_tool = CourseOutlineTool(self.vector_store) + self.tool_manager.register_tool(self.outline_tool) + + def add_course_document(self, file_path: str) -> tuple[Course, int]: """ Add a single course document to the knowledge base. - + Args: file_path: Path to the course document - + Returns: Tuple of (Course object, number of chunks created) """ try: # Process the document - course, course_chunks = self.document_processor.process_course_document(file_path) - + course, course_chunks = self.document_processor.process_course_document( + file_path + ) + # Add course metadata to vector store for semantic search self.vector_store.add_course_metadata(course) - + # Add course content chunks to vector store self.vector_store.add_course_content(course_chunks) - + return course, len(course_chunks) except Exception as e: print(f"Error processing course document {file_path}: {e}") return None, 0 - - def add_course_folder(self, folder_path: str, clear_existing: bool = False) -> Tuple[int, int]: + + def add_course_folder( + self, folder_path: str, clear_existing: bool = False + ) -> tuple[int, int]: """ Add all course documents from a folder. - + Args: folder_path: Path to folder containing course documents clear_existing: Whether to clear existing data first - + Returns: Tuple of (total courses added, total chunks created) """ total_courses = 0 total_chunks = 0 - + # Clear existing data if requested if clear_existing: print("Clearing existing data for fresh rebuild...") self.vector_store.clear_all_data() - + if not os.path.exists(folder_path): print(f"Folder {folder_path} does not exist") return 0, 0 - + # Get existing course titles to avoid re-processing existing_course_titles = set(self.vector_store.get_existing_course_titles()) - + # Process each file in the folder for file_name in os.listdir(folder_path): file_path = os.path.join(folder_path, file_name) - if os.path.isfile(file_path) and file_name.lower().endswith(('.pdf', '.docx', '.txt')): + if os.path.isfile(file_path) and file_name.lower().endswith( + (".pdf", ".docx", ".txt") + ): try: # Check if this course might already exist # We'll process the document to get the course ID, but only add if new - course, course_chunks = self.document_processor.process_course_document(file_path) - + course, course_chunks = ( + self.document_processor.process_course_document(file_path) + ) + if course and course.title not in existing_course_titles: # This is a new course - add it to the vector store self.vector_store.add_course_metadata(course) self.vector_store.add_course_content(course_chunks) total_courses += 1 total_chunks += len(course_chunks) - print(f"Added new course: {course.title} ({len(course_chunks)} chunks)") + print( + f"Added new course: {course.title} ({len(course_chunks)} chunks)" + ) existing_course_titles.add(course.title) elif course: print(f"Course already exists: {course.title} - skipping") except Exception as e: print(f"Error processing {file_name}: {e}") - + return total_courses, total_chunks - - def query(self, query: str, session_id: Optional[str] = None) -> Tuple[str, List[str]]: + + def query(self, query: str, session_id: str | None = None) -> tuple[str, list[str]]: """ Process a user query using the RAG system with tool-based search. - + Args: query: User's question session_id: Optional session ID for conversation context - + Returns: Tuple of (response, sources list - empty for tool-based approach) """ # Create prompt for the AI with clear instructions prompt = f"""Answer this question about course materials: {query}""" - + # Get conversation history if session exists history = None if session_id: history = self.session_manager.get_conversation_history(session_id) - + # Generate response using AI with tools response = self.ai_generator.generate_response( query=prompt, conversation_history=history, tools=self.tool_manager.get_tool_definitions(), - tool_manager=self.tool_manager + tool_manager=self.tool_manager, ) - + # Get sources from the search tool sources = self.tool_manager.get_last_sources() # Reset sources after retrieving them self.tool_manager.reset_sources() - + # Update conversation history if session_id: self.session_manager.add_exchange(session_id, query, response) - + # Return response with sources from tool searches return response, sources - - def get_course_analytics(self) -> Dict: + + def get_course_analytics(self) -> dict: """Get analytics about the course catalog""" return { "total_courses": self.vector_store.get_course_count(), - "course_titles": self.vector_store.get_existing_course_titles() - } \ No newline at end of file + "course_titles": self.vector_store.get_existing_course_titles(), + } diff --git a/backend/search_tools.py b/backend/search_tools.py index adfe8235..ae021e8d 100644 --- a/backend/search_tools.py +++ b/backend/search_tools.py @@ -1,16 +1,17 @@ -from typing import Dict, Any, Optional, Protocol from abc import ABC, abstractmethod -from vector_store import VectorStore, SearchResults +from typing import Any + +from vector_store import SearchResults, VectorStore class Tool(ABC): """Abstract base class for all tools""" - + @abstractmethod - def get_tool_definition(self) -> Dict[str, Any]: + def get_tool_definition(self) -> dict[str, Any]: """Return Anthropic tool definition for this tool""" pass - + @abstractmethod def execute(self, **kwargs) -> str: """Execute the tool with given parameters""" @@ -19,12 +20,12 @@ def execute(self, **kwargs) -> str: class CourseSearchTool(Tool): """Tool for searching course content with semantic course name matching""" - + def __init__(self, vector_store: VectorStore): self.store = vector_store self.last_sources = [] # Track sources from last search - - def get_tool_definition(self) -> Dict[str, Any]: + + def get_tool_definition(self) -> dict[str, Any]: """Return Anthropic tool definition for this tool""" return { "name": "search_course_content", @@ -33,46 +34,49 @@ def get_tool_definition(self) -> Dict[str, Any]: "type": "object", "properties": { "query": { - "type": "string", - "description": "What to search for in the course content" + "type": "string", + "description": "What to search for in the course content", }, "course_name": { "type": "string", - "description": "Course title (partial matches work, e.g. 'MCP', 'Introduction')" + "description": "Course title (partial matches work, e.g. 'MCP', 'Introduction')", }, "lesson_number": { "type": "integer", - "description": "Specific lesson number to search within (e.g. 1, 2, 3)" - } + "description": "Specific lesson number to search within (e.g. 1, 2, 3)", + }, }, - "required": ["query"] - } + "required": ["query"], + }, } - - def execute(self, query: str, course_name: Optional[str] = None, lesson_number: Optional[int] = None) -> str: + + def execute( + self, + query: str, + course_name: str | None = None, + lesson_number: int | None = None, + ) -> str: """ Execute the search tool with given parameters. - + Args: query: What to search for course_name: Optional course filter lesson_number: Optional lesson filter - + Returns: Formatted search results or error message """ - + # Use the vector store's unified search interface results = self.store.search( - query=query, - course_name=course_name, - lesson_number=lesson_number + query=query, course_name=course_name, lesson_number=lesson_number ) - + # Handle errors if results.error: return results.error - + # Handle empty results if results.is_empty(): filter_info = "" @@ -81,44 +85,102 @@ def execute(self, query: str, course_name: Optional[str] = None, lesson_number: if lesson_number: filter_info += f" in lesson {lesson_number}" return f"No relevant content found{filter_info}." - + # Format and return results return self._format_results(results) - + def _format_results(self, results: SearchResults) -> str: """Format search results with course and lesson context""" formatted = [] sources = [] # Track sources for the UI - - for doc, meta in zip(results.documents, results.metadata): - course_title = meta.get('course_title', 'unknown') - lesson_num = meta.get('lesson_number') - + + for doc, meta in zip(results.documents, results.metadata, strict=False): + course_title = meta.get("course_title", "unknown") + lesson_num = meta.get("lesson_number") + # Build context header header = f"[{course_title}" if lesson_num is not None: header += f" - Lesson {lesson_num}" header += "]" - - # Track source for the UI - source = course_title + + # Track source for the UI with link + source_text = course_title if lesson_num is not None: - source += f" - Lesson {lesson_num}" - sources.append(source) - + source_text += f" - Lesson {lesson_num}" + + # Get lesson link from vector store + lesson_link = None + if lesson_num is not None: + lesson_link = self.store.get_lesson_link(course_title, lesson_num) + + sources.append({"text": source_text, "link": lesson_link}) + formatted.append(f"{header}\n{doc}") - + # Store sources for retrieval self.last_sources = sources - + return "\n\n".join(formatted) + +class CourseOutlineTool(Tool): + """Tool for retrieving course outline/structure""" + + def __init__(self, vector_store: VectorStore): + self.store = vector_store + + def get_tool_definition(self) -> dict[str, Any]: + return { + "name": "get_course_outline", + "description": "Get the complete outline of a course including course title, course link, and list of all lessons with their numbers and titles. Use this when users ask about what a course covers, what lessons are in a course, or want an overview of course structure.", + "input_schema": { + "type": "object", + "properties": { + "course_name": { + "type": "string", + "description": "Course title or name (partial matches work)", + } + }, + "required": ["course_name"], + }, + } + + def execute(self, course_name: str) -> str: + metadata = self.store.get_course_metadata(course_name) + + if not metadata: + return f"No course found matching '{course_name}'" + + return self._format_outline(metadata) + + def _format_outline(self, metadata: dict[str, Any]) -> str: + lines = [ + f"Course: {metadata.get('title', 'Unknown')}", + f"Course Link: {metadata.get('course_link', 'Not available')}", + "", + "Lessons:", + ] + + lessons = metadata.get("lessons", []) + if lessons: + sorted_lessons = sorted(lessons, key=lambda x: x.get("lesson_number", 0)) + for lesson in sorted_lessons: + lines.append( + f"- Lesson {lesson.get('lesson_number', '?')}: {lesson.get('lesson_title', 'Untitled')}" + ) + else: + lines.append("- No lessons found") + + return "\n".join(lines) + + class ToolManager: """Manages available tools for the AI""" - + def __init__(self): self.tools = {} - + def register_tool(self, tool: Tool): """Register any tool that implements the Tool interface""" tool_def = tool.get_tool_definition() @@ -127,28 +189,27 @@ def register_tool(self, tool: Tool): raise ValueError("Tool must have a 'name' in its definition") self.tools[tool_name] = tool - def get_tool_definitions(self) -> list: """Get all tool definitions for Anthropic tool calling""" return [tool.get_tool_definition() for tool in self.tools.values()] - + def execute_tool(self, tool_name: str, **kwargs) -> str: """Execute a tool by name with given parameters""" if tool_name not in self.tools: return f"Tool '{tool_name}' not found" - + return self.tools[tool_name].execute(**kwargs) - + def get_last_sources(self) -> list: """Get sources from the last search operation""" # Check all tools for last_sources attribute for tool in self.tools.values(): - if hasattr(tool, 'last_sources') and tool.last_sources: + if hasattr(tool, "last_sources") and tool.last_sources: return tool.last_sources return [] def reset_sources(self): """Reset sources from all tools that track sources""" for tool in self.tools.values(): - if hasattr(tool, 'last_sources'): - tool.last_sources = [] \ No newline at end of file + if hasattr(tool, "last_sources"): + tool.last_sources = [] diff --git a/backend/session_manager.py b/backend/session_manager.py index a5a96b1a..cae2f3ca 100644 --- a/backend/session_manager.py +++ b/backend/session_manager.py @@ -1,61 +1,65 @@ -from typing import Dict, List, Optional from dataclasses import dataclass + @dataclass class Message: """Represents a single message in a conversation""" - role: str # "user" or "assistant" + + role: str # "user" or "assistant" content: str # The message content + class SessionManager: """Manages conversation sessions and message history""" - + def __init__(self, max_history: int = 5): self.max_history = max_history - self.sessions: Dict[str, List[Message]] = {} + self.sessions: dict[str, list[Message]] = {} self.session_counter = 0 - + def create_session(self) -> str: """Create a new conversation session""" self.session_counter += 1 session_id = f"session_{self.session_counter}" self.sessions[session_id] = [] return session_id - + def add_message(self, session_id: str, role: str, content: str): """Add a message to the conversation history""" if session_id not in self.sessions: self.sessions[session_id] = [] - + message = Message(role=role, content=content) self.sessions[session_id].append(message) - + # Keep conversation history within limits if len(self.sessions[session_id]) > self.max_history * 2: - self.sessions[session_id] = self.sessions[session_id][-self.max_history * 2:] - + self.sessions[session_id] = self.sessions[session_id][ + -self.max_history * 2 : + ] + def add_exchange(self, session_id: str, user_message: str, assistant_message: str): """Add a complete question-answer exchange""" self.add_message(session_id, "user", user_message) self.add_message(session_id, "assistant", assistant_message) - - def get_conversation_history(self, session_id: Optional[str]) -> Optional[str]: + + def get_conversation_history(self, session_id: str | None) -> str | None: """Get formatted conversation history for a session""" if not session_id or session_id not in self.sessions: return None - + messages = self.sessions[session_id] if not messages: return None - + # Format messages for context formatted_messages = [] for msg in messages: formatted_messages.append(f"{msg.role.title()}: {msg.content}") - + return "\n".join(formatted_messages) - + def clear_session(self, session_id: str): """Clear all messages from a session""" if session_id in self.sessions: - self.sessions[session_id] = [] \ No newline at end of file + self.sessions[session_id] = [] diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 00000000..96e9ea2e --- /dev/null +++ b/backend/tests/__init__.py @@ -0,0 +1 @@ +# Test package for RAG chatbot backend diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 00000000..6b847065 --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,201 @@ +"""Shared fixtures and mocks for RAG chatbot tests""" + +import os +import sys +from unittest.mock import Mock + +import pytest + +# Add backend to path for imports +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from vector_store import SearchResults + + +# ============================================================================ +# Configuration Fixtures +# ============================================================================ + +@pytest.fixture +def mock_config(): + """Create a mock configuration object for testing""" + config = Mock() + config.MAX_RESULTS = 5 + config.ANTHROPIC_API_KEY = "test-api-key" + config.ANTHROPIC_MODEL = "claude-sonnet-4-20250514" + config.CHUNK_SIZE = 800 + config.CHUNK_OVERLAP = 100 + config.CHROMA_PATH = "/tmp/test_chroma" + config.EMBEDDING_MODEL = "all-MiniLM-L6-v2" + config.MAX_HISTORY = 2 + return config + + +# ============================================================================ +# RAG System Fixtures +# ============================================================================ + +@pytest.fixture +def mock_rag_system(): + """Create a mock RAGSystem for API testing""" + rag = Mock() + + # Mock query method + rag.query.return_value = ( + "This is a test answer about the course material.", + [{"text": "ML Fundamentals - Lesson 1", "link": "https://example.com/lesson1"}] + ) + + # Mock session manager + rag.session_manager = Mock() + rag.session_manager.create_session.return_value = "test-session-123" + rag.session_manager.clear_session.return_value = None + + # Mock get_course_analytics + rag.get_course_analytics.return_value = { + "total_courses": 3, + "course_titles": ["ML Fundamentals", "Python Basics", "Data Science 101"] + } + + return rag + + +@pytest.fixture +def mock_rag_system_empty(): + """Create a mock RAGSystem with no courses""" + rag = Mock() + + rag.query.return_value = ( + "I don't have any course materials to search.", + [] + ) + + rag.session_manager = Mock() + rag.session_manager.create_session.return_value = "empty-session-456" + + rag.get_course_analytics.return_value = { + "total_courses": 0, + "course_titles": [] + } + + return rag + + +@pytest.fixture +def mock_rag_system_error(): + """Create a mock RAGSystem that raises errors""" + rag = Mock() + + rag.query.side_effect = Exception("Database connection failed") + + rag.session_manager = Mock() + rag.session_manager.create_session.return_value = "error-session-789" + rag.session_manager.clear_session.side_effect = Exception("Session not found") + + rag.get_course_analytics.side_effect = Exception("Analytics unavailable") + + return rag + + +# ============================================================================ +# Session Manager Fixtures +# ============================================================================ + +@pytest.fixture +def mock_session_manager(): + """Create a mock SessionManager""" + manager = Mock() + manager.create_session.return_value = "session-abc-123" + manager.get_conversation_history.return_value = None + manager.add_exchange.return_value = None + manager.clear_session.return_value = None + return manager + + +@pytest.fixture +def mock_session_manager_with_history(): + """Create a mock SessionManager with existing conversation history""" + manager = Mock() + manager.create_session.return_value = "session-with-history" + manager.get_conversation_history.return_value = [ + {"role": "user", "content": "What is machine learning?"}, + {"role": "assistant", "content": "Machine learning is..."} + ] + manager.add_exchange.return_value = None + return manager + + +@pytest.fixture +def sample_search_results(): + """Create sample successful search results""" + return SearchResults( + documents=["Lesson content about machine learning basics"], + metadata=[ + {"course_title": "ML Fundamentals", "lesson_number": 1, "chunk_index": 0} + ], + distances=[0.15], + ) + + +@pytest.fixture +def empty_search_results(): + """Create empty search results""" + return SearchResults(documents=[], metadata=[], distances=[]) + + +@pytest.fixture +def error_search_results(): + """Create search results with error""" + return SearchResults.empty("No course found matching 'NonExistent'") + + +@pytest.fixture +def mock_vector_store(sample_search_results): + """Create a mock VectorStore with working configuration""" + store = Mock() + store.max_results = 5 + store.search = Mock(return_value=sample_search_results) + store.get_lesson_link = Mock(return_value="https://example.com/lesson1") + return store + + +@pytest.fixture +def mock_vector_store_zero_results(): + """Create a mock VectorStore simulating MAX_RESULTS=0 bug""" + store = Mock() + store.max_results = 0 + store.search = Mock( + return_value=SearchResults(documents=[], metadata=[], distances=[]) + ) + store.get_lesson_link = Mock(return_value=None) + return store + + +@pytest.fixture +def mock_anthropic_response_with_tool_use(): + """Create mock Anthropic response that requests tool use""" + response = Mock() + response.stop_reason = "tool_use" + + tool_use_block = Mock() + tool_use_block.type = "tool_use" + tool_use_block.name = "search_course_content" + tool_use_block.id = "tool_123" + tool_use_block.input = {"query": "machine learning"} + + response.content = [tool_use_block] + return response + + +@pytest.fixture +def mock_anthropic_response_text(): + """Create mock Anthropic response with text only""" + response = Mock() + response.stop_reason = "end_turn" + + text_block = Mock() + text_block.type = "text" + text_block.text = "Here is the answer about machine learning." + + response.content = [text_block] + return response diff --git a/backend/tests/test_ai_generator.py b/backend/tests/test_ai_generator.py new file mode 100644 index 00000000..cc7e395f --- /dev/null +++ b/backend/tests/test_ai_generator.py @@ -0,0 +1,564 @@ +"""Tests for AIGenerator - Claude API integration with tool calling""" + +import os +import sys +from unittest.mock import Mock, patch + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from ai_generator import AIGenerator + + +class TestAIGeneratorResponse: + """Test AIGenerator.generate_response() method""" + + @patch("ai_generator.anthropic.Anthropic") + def test_generate_response_direct_answer(self, mock_anthropic_class): + """Test direct response without tool use""" + mock_client = Mock() + mock_response = Mock() + mock_response.stop_reason = "end_turn" + mock_response.content = [Mock(type="text", text="Direct answer")] + mock_client.messages.create.return_value = mock_response + mock_anthropic_class.return_value = mock_client + + generator = AIGenerator(api_key="test-key", model="test-model") + result = generator.generate_response("Hello") + + assert result == "Direct answer" + mock_client.messages.create.assert_called_once() + + @patch("ai_generator.anthropic.Anthropic") + def test_generate_response_with_tool_use(self, mock_anthropic_class): + """Test response that uses tools""" + mock_client = Mock() + + # First response requests tool use + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "search_course_content" + tool_block.id = "tool_123" + tool_block.input = {"query": "machine learning"} + + first_response = Mock() + first_response.stop_reason = "tool_use" + first_response.content = [tool_block] + + # Second response after tool execution + final_response = Mock() + final_response.stop_reason = "end_turn" + final_response.content = [Mock(type="text", text="Answer based on search")] + + mock_client.messages.create.side_effect = [first_response, final_response] + mock_anthropic_class.return_value = mock_client + + # Mock tool manager + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.return_value = "[Course Content] ML basics..." + + generator = AIGenerator(api_key="test-key", model="test-model") + result = generator.generate_response( + query="What is ML?", + tools=[{"name": "search_course_content"}], + tool_manager=mock_tool_manager, + ) + + assert result == "Answer based on search" + mock_tool_manager.execute_tool.assert_called_once_with( + "search_course_content", query="machine learning" + ) + + @patch("ai_generator.anthropic.Anthropic") + def test_tool_receives_empty_results_from_bug(self, mock_anthropic_class): + """CRITICAL: Test behavior when tool returns empty due to MAX_RESULTS=0 + + When the search tool returns "No relevant content found" (due to + MAX_RESULTS=0 bug), the AI has no context and returns an unhelpful + response. + """ + mock_client = Mock() + + # First response requests tool use + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "search_course_content" + tool_block.id = "tool_123" + tool_block.input = {"query": "machine learning"} + + first_response = Mock() + first_response.stop_reason = "tool_use" + first_response.content = [tool_block] + + # Second response after empty tool result + final_response = Mock() + final_response.stop_reason = "end_turn" + final_response.content = [ + Mock( + type="text", + text="I couldn't find any relevant content in the course materials.", + ) + ] + + mock_client.messages.create.side_effect = [first_response, final_response] + mock_anthropic_class.return_value = mock_client + + # Mock tool manager returning empty results (the bug) + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.return_value = "No relevant content found." + + generator = AIGenerator(api_key="test-key", model="test-model") + result = generator.generate_response( + query="What is ML?", + tools=[{"name": "search_course_content"}], + tool_manager=mock_tool_manager, + ) + + # The AI has no context and gives unhelpful response + assert "couldn't find" in result.lower() + + @patch("ai_generator.anthropic.Anthropic") + def test_generate_response_with_conversation_history(self, mock_anthropic_class): + """Test response with conversation context""" + mock_client = Mock() + mock_response = Mock() + mock_response.stop_reason = "end_turn" + mock_response.content = [Mock(type="text", text="Context-aware answer")] + mock_client.messages.create.return_value = mock_response + mock_anthropic_class.return_value = mock_client + + generator = AIGenerator(api_key="test-key", model="test-model") + result = generator.generate_response( + query="Follow up question", + conversation_history="User: Previous question\nAssistant: Previous answer", + ) + + # Verify history was included in system prompt + call_args = mock_client.messages.create.call_args + assert "Previous question" in call_args.kwargs["system"] + assert result == "Context-aware answer" + + @patch("ai_generator.anthropic.Anthropic") + def test_generate_response_passes_tools_to_api(self, mock_anthropic_class): + """Test that tools are correctly passed to Claude API""" + mock_client = Mock() + mock_response = Mock() + mock_response.stop_reason = "end_turn" + mock_response.content = [Mock(type="text", text="Answer")] + mock_client.messages.create.return_value = mock_response + mock_anthropic_class.return_value = mock_client + + tools = [ + {"name": "search_course_content", "description": "Search courses"}, + {"name": "get_course_outline", "description": "Get outline"}, + ] + + generator = AIGenerator(api_key="test-key", model="test-model") + generator.generate_response(query="Test", tools=tools) + + call_args = mock_client.messages.create.call_args + assert call_args.kwargs["tools"] == tools + assert call_args.kwargs["tool_choice"] == {"type": "auto"} + + +class TestSequentialToolCalling: + """Test sequential tool calling behavior (multi-round tool use)""" + + @patch("ai_generator.anthropic.Anthropic") + def test_single_tool_round_sufficient(self, mock_anthropic_class): + """Test: 1 tool call → answer (2 API calls)""" + mock_client = Mock() + + # First call: tool use + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "search_course_content" + tool_block.id = "tool_1" + tool_block.input = {"query": "python"} + + first_response = Mock() + first_response.stop_reason = "tool_use" + first_response.content = [tool_block] + + # Second call: final answer + final_response = Mock() + final_response.stop_reason = "end_turn" + final_response.content = [Mock(type="text", text="Python is a language")] + + mock_client.messages.create.side_effect = [first_response, final_response] + mock_anthropic_class.return_value = mock_client + + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.return_value = "Python content here" + + generator = AIGenerator(api_key="test-key", model="test-model") + result = generator.generate_response( + query="What is Python?", + tools=[{"name": "search_course_content"}], + tool_manager=mock_tool_manager, + ) + + assert result == "Python is a language" + assert mock_client.messages.create.call_count == 2 + assert mock_tool_manager.execute_tool.call_count == 1 + + @patch("ai_generator.anthropic.Anthropic") + def test_two_sequential_tool_rounds(self, mock_anthropic_class): + """Test: 2 tool calls → answer (3 API calls)""" + mock_client = Mock() + + # First call: first tool use + tool_block_1 = Mock() + tool_block_1.type = "tool_use" + tool_block_1.name = "get_course_outline" + tool_block_1.id = "tool_1" + tool_block_1.input = {"course_name": "MCP"} + + first_response = Mock() + first_response.stop_reason = "tool_use" + first_response.content = [tool_block_1] + + # Second call: second tool use + tool_block_2 = Mock() + tool_block_2.type = "tool_use" + tool_block_2.name = "search_course_content" + tool_block_2.id = "tool_2" + tool_block_2.input = {"query": "tool creation"} + + second_response = Mock() + second_response.stop_reason = "tool_use" + second_response.content = [tool_block_2] + + # Third call: final answer + final_response = Mock() + final_response.stop_reason = "end_turn" + final_response.content = [ + Mock(type="text", text="Lesson 3 covers tool creation") + ] + + mock_client.messages.create.side_effect = [ + first_response, + second_response, + final_response, + ] + mock_anthropic_class.return_value = mock_client + + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.side_effect = [ + "Lesson 3: Tool Creation", + "Content about tool creation...", + ] + + generator = AIGenerator(api_key="test-key", model="test-model") + result = generator.generate_response( + query="What topic is covered in lesson 3 of MCP?", + tools=[{"name": "get_course_outline"}, {"name": "search_course_content"}], + tool_manager=mock_tool_manager, + ) + + assert result == "Lesson 3 covers tool creation" + assert mock_client.messages.create.call_count == 3 + assert mock_tool_manager.execute_tool.call_count == 2 + + @patch("ai_generator.anthropic.Anthropic") + def test_max_rounds_enforced(self, mock_anthropic_class): + """Test: Stops at 2 rounds even if Claude wants more""" + mock_client = Mock() + + # Create tool blocks for each round + def create_tool_block(tool_id): + block = Mock() + block.type = "tool_use" + block.name = "search_course_content" + block.id = tool_id + block.input = {"query": "test"} + return block + + # All responses request more tools + responses = [] + for i in range(5): # Try to do 5 rounds + response = Mock() + response.stop_reason = "tool_use" + response.content = [create_tool_block(f"tool_{i}")] + responses.append(response) + + mock_client.messages.create.side_effect = responses + mock_anthropic_class.return_value = mock_client + + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.return_value = "Result" + + generator = AIGenerator(api_key="test-key", model="test-model") + generator.generate_response( + query="Complex query", + tools=[{"name": "search_course_content"}], + tool_manager=mock_tool_manager, + ) + + # Should stop at 3 API calls (initial + 2 rounds max) + assert mock_client.messages.create.call_count == 3 + assert mock_tool_manager.execute_tool.call_count == 2 + + @patch("ai_generator.anthropic.Anthropic") + def test_early_termination_no_tools(self, mock_anthropic_class): + """Test: Exits when Claude stops requesting tools""" + mock_client = Mock() + + # First call: direct answer (no tool use) + response = Mock() + response.stop_reason = "end_turn" + response.content = [Mock(type="text", text="Direct answer")] + + mock_client.messages.create.return_value = response + mock_anthropic_class.return_value = mock_client + + mock_tool_manager = Mock() + + generator = AIGenerator(api_key="test-key", model="test-model") + result = generator.generate_response( + query="General question", + tools=[{"name": "search_course_content"}], + tool_manager=mock_tool_manager, + ) + + assert result == "Direct answer" + assert mock_client.messages.create.call_count == 1 + mock_tool_manager.execute_tool.assert_not_called() + + @patch("ai_generator.anthropic.Anthropic") + def test_tool_error_handled_gracefully(self, mock_anthropic_class): + """Test: Error returned as tool result, Claude continues""" + mock_client = Mock() + + # First call: tool use + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "search_course_content" + tool_block.id = "tool_1" + tool_block.input = {"query": "test"} + + first_response = Mock() + first_response.stop_reason = "tool_use" + first_response.content = [tool_block] + + # Second call: final answer + final_response = Mock() + final_response.stop_reason = "end_turn" + final_response.content = [Mock(type="text", text="Sorry, encountered an error")] + + mock_client.messages.create.side_effect = [first_response, final_response] + mock_anthropic_class.return_value = mock_client + + # Tool raises an exception + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.side_effect = Exception( + "Database connection failed" + ) + + generator = AIGenerator(api_key="test-key", model="test-model") + result = generator.generate_response( + query="Test query", + tools=[{"name": "search_course_content"}], + tool_manager=mock_tool_manager, + ) + + # Should continue and return a response (not crash) + assert result == "Sorry, encountered an error" + assert mock_client.messages.create.call_count == 2 + + # Verify error was passed to Claude as tool result + second_call = mock_client.messages.create.call_args_list[1] + messages = second_call.kwargs["messages"] + tool_result_msg = messages[-1] + assert tool_result_msg["content"][0]["is_error"] is True + assert "Database connection failed" in tool_result_msg["content"][0]["content"] + + @patch("ai_generator.anthropic.Anthropic") + def test_tools_included_in_followup_calls(self, mock_anthropic_class): + """CRITICAL: Verifies tools param is included in round 2 API calls""" + mock_client = Mock() + + # First call: tool use + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "search_course_content" + tool_block.id = "tool_1" + tool_block.input = {"query": "test"} + + first_response = Mock() + first_response.stop_reason = "tool_use" + first_response.content = [tool_block] + + # Second call: final answer + final_response = Mock() + final_response.stop_reason = "end_turn" + final_response.content = [Mock(type="text", text="Answer")] + + mock_client.messages.create.side_effect = [first_response, final_response] + mock_anthropic_class.return_value = mock_client + + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.return_value = "Result" + + tools = [{"name": "search_course_content"}, {"name": "get_course_outline"}] + + generator = AIGenerator(api_key="test-key", model="test-model") + generator.generate_response( + query="Test", tools=tools, tool_manager=mock_tool_manager + ) + + # CRITICAL: Both API calls should include tools + first_call = mock_client.messages.create.call_args_list[0] + second_call = mock_client.messages.create.call_args_list[1] + + assert first_call.kwargs["tools"] == tools + assert second_call.kwargs["tools"] == tools # This was the bug! + + @patch("ai_generator.anthropic.Anthropic") + def test_message_history_preserved(self, mock_anthropic_class): + """Test: Full context passed through all rounds""" + mock_client = Mock() + + # First call: tool use + tool_block_1 = Mock() + tool_block_1.type = "tool_use" + tool_block_1.name = "get_course_outline" + tool_block_1.id = "tool_1" + tool_block_1.input = {"course_name": "Test"} + + first_response = Mock() + first_response.stop_reason = "tool_use" + first_response.content = [tool_block_1] + + # Second call: another tool use + tool_block_2 = Mock() + tool_block_2.type = "tool_use" + tool_block_2.name = "search_course_content" + tool_block_2.id = "tool_2" + tool_block_2.input = {"query": "details"} + + second_response = Mock() + second_response.stop_reason = "tool_use" + second_response.content = [tool_block_2] + + # Third call: final answer + final_response = Mock() + final_response.stop_reason = "end_turn" + final_response.content = [Mock(type="text", text="Final answer")] + + mock_client.messages.create.side_effect = [ + first_response, + second_response, + final_response, + ] + mock_anthropic_class.return_value = mock_client + + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.side_effect = ["Outline result", "Search result"] + + generator = AIGenerator(api_key="test-key", model="test-model") + generator.generate_response( + query="Complex question", + tools=[{"name": "get_course_outline"}, {"name": "search_course_content"}], + tool_manager=mock_tool_manager, + ) + + # Check third API call has full history + third_call = mock_client.messages.create.call_args_list[2] + messages = third_call.kwargs["messages"] + + # Should have: user, assistant (tool_use), user (tool_result), assistant (tool_use), user (tool_result) + assert len(messages) == 5 + assert messages[0]["role"] == "user" + assert messages[1]["role"] == "assistant" + assert messages[2]["role"] == "user" + assert messages[3]["role"] == "assistant" + assert messages[4]["role"] == "user" + + +class TestAIGeneratorToolExecution: + """Test tool execution handling""" + + @patch("ai_generator.anthropic.Anthropic") + def test_handle_multiple_tool_calls_in_one_response(self, mock_anthropic_class): + """Test handling multiple tool calls in one response""" + mock_client = Mock() + + # Response with multiple tool calls + tool_block_1 = Mock() + tool_block_1.type = "tool_use" + tool_block_1.name = "search_course_content" + tool_block_1.id = "tool_1" + tool_block_1.input = {"query": "topic A"} + + tool_block_2 = Mock() + tool_block_2.type = "tool_use" + tool_block_2.name = "get_course_outline" + tool_block_2.id = "tool_2" + tool_block_2.input = {"course_name": "ML Course"} + + first_response = Mock() + first_response.stop_reason = "tool_use" + first_response.content = [tool_block_1, tool_block_2] + + final_response = Mock() + final_response.stop_reason = "end_turn" + final_response.content = [Mock(type="text", text="Combined answer")] + + mock_client.messages.create.side_effect = [first_response, final_response] + mock_anthropic_class.return_value = mock_client + + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.side_effect = ["Result 1", "Result 2"] + + generator = AIGenerator(api_key="test-key", model="test-model") + result = generator.generate_response( + query="Complex question", + tools=[{"name": "search_course_content"}, {"name": "get_course_outline"}], + tool_manager=mock_tool_manager, + ) + + assert result == "Combined answer" + assert mock_tool_manager.execute_tool.call_count == 2 + + @patch("ai_generator.anthropic.Anthropic") + def test_tool_results_sent_back_to_api(self, mock_anthropic_class): + """Test that tool results are correctly sent back to Claude""" + mock_client = Mock() + + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "search_course_content" + tool_block.id = "tool_abc" + tool_block.input = {"query": "test"} + + first_response = Mock() + first_response.stop_reason = "tool_use" + first_response.content = [tool_block] + + final_response = Mock() + final_response.stop_reason = "end_turn" + final_response.content = [Mock(type="text", text="Final answer")] + + mock_client.messages.create.side_effect = [first_response, final_response] + mock_anthropic_class.return_value = mock_client + + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.return_value = "Tool output here" + + generator = AIGenerator(api_key="test-key", model="test-model") + generator.generate_response( + query="Test", + tools=[{"name": "search_course_content"}], + tool_manager=mock_tool_manager, + ) + + # Check the second API call includes tool results + second_call_args = mock_client.messages.create.call_args_list[1] + messages = second_call_args.kwargs["messages"] + + # Should have: user message, assistant tool_use, user tool_result + assert len(messages) == 3 + assert messages[2]["role"] == "user" + assert messages[2]["content"][0]["type"] == "tool_result" + assert messages[2]["content"][0]["tool_use_id"] == "tool_abc" + assert messages[2]["content"][0]["content"] == "Tool output here" diff --git a/backend/tests/test_api.py b/backend/tests/test_api.py new file mode 100644 index 00000000..744994fa --- /dev/null +++ b/backend/tests/test_api.py @@ -0,0 +1,393 @@ +"""Tests for FastAPI endpoints + +This module defines a test app inline to avoid import issues with the main app, +which mounts static files that don't exist in the test environment. +""" + +import pytest +from unittest.mock import Mock, patch +from fastapi import FastAPI, HTTPException +from fastapi.testclient import TestClient +from pydantic import BaseModel +from typing import List, Optional +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + + +# ============================================================================ +# Test App Definition (mirrors app.py endpoints without static file mounts) +# ============================================================================ + +class QueryRequest(BaseModel): + """Request model for course queries""" + query: str + session_id: Optional[str] = None + + +class QueryResponse(BaseModel): + """Response model for course queries""" + answer: str + sources: List[str] + session_id: str + + +class CourseStats(BaseModel): + """Response model for course statistics""" + total_courses: int + course_titles: List[str] + + +def create_test_app(mock_rag_system): + """Create a test FastAPI app with injected mock RAGSystem""" + app = FastAPI(title="Test Course Materials RAG System") + + @app.post("/api/query", response_model=QueryResponse) + async def query_documents(request: QueryRequest): + """Process a query and return response with sources""" + try: + session_id = request.session_id + if not session_id: + session_id = mock_rag_system.session_manager.create_session() + + answer, sources = mock_rag_system.query(request.query, session_id) + + # Convert source dicts to strings if needed + source_strings = [] + for s in sources: + if isinstance(s, dict): + source_strings.append(f"{s.get('text', '')} - {s.get('link', '')}") + else: + source_strings.append(str(s)) + + return QueryResponse( + answer=answer, + sources=source_strings, + session_id=session_id + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.get("/api/courses", response_model=CourseStats) + async def get_course_stats(): + """Get course analytics and statistics""" + try: + analytics = mock_rag_system.get_course_analytics() + return CourseStats( + total_courses=analytics["total_courses"], + course_titles=analytics["course_titles"] + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.delete("/api/session/{session_id}") + async def clear_session(session_id: str): + """Clear a conversation session""" + try: + mock_rag_system.session_manager.clear_session(session_id) + return {"status": "success"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + return app + + +# ============================================================================ +# Query Endpoint Tests +# ============================================================================ + +class TestQueryEndpoint: + """Tests for POST /api/query endpoint""" + + def test_query_success(self, mock_rag_system): + """Test successful query returns answer and sources""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + response = client.post( + "/api/query", + json={"query": "What is machine learning?"} + ) + + assert response.status_code == 200 + data = response.json() + assert "answer" in data + assert "sources" in data + assert "session_id" in data + assert data["answer"] == "This is a test answer about the course material." + + def test_query_creates_session_when_not_provided(self, mock_rag_system): + """Test that a new session is created when session_id is not provided""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + response = client.post( + "/api/query", + json={"query": "Test question"} + ) + + assert response.status_code == 200 + data = response.json() + assert data["session_id"] == "test-session-123" + mock_rag_system.session_manager.create_session.assert_called_once() + + def test_query_uses_existing_session(self, mock_rag_system): + """Test that existing session_id is used when provided""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + response = client.post( + "/api/query", + json={"query": "Follow up question", "session_id": "existing-session"} + ) + + assert response.status_code == 200 + data = response.json() + assert data["session_id"] == "existing-session" + mock_rag_system.session_manager.create_session.assert_not_called() + + def test_query_calls_rag_system(self, mock_rag_system): + """Test that query endpoint calls RAGSystem.query with correct args""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + client.post( + "/api/query", + json={"query": "What is Python?", "session_id": "my-session"} + ) + + mock_rag_system.query.assert_called_once_with("What is Python?", "my-session") + + def test_query_returns_sources(self, mock_rag_system): + """Test that sources are returned in the response""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + response = client.post( + "/api/query", + json={"query": "Tell me about lessons"} + ) + + data = response.json() + assert len(data["sources"]) == 1 + assert "ML Fundamentals" in data["sources"][0] + + def test_query_empty_query_validation(self, mock_rag_system): + """Test that empty query returns validation error""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + response = client.post( + "/api/query", + json={} + ) + + assert response.status_code == 422 # Validation error + + def test_query_error_returns_500(self, mock_rag_system_error): + """Test that RAGSystem errors return 500 status""" + app = create_test_app(mock_rag_system_error) + client = TestClient(app) + + response = client.post( + "/api/query", + json={"query": "This will fail"} + ) + + assert response.status_code == 500 + assert "Database connection failed" in response.json()["detail"] + + def test_query_with_empty_sources(self, mock_rag_system_empty): + """Test query when no sources are found""" + app = create_test_app(mock_rag_system_empty) + client = TestClient(app) + + response = client.post( + "/api/query", + json={"query": "Unknown topic"} + ) + + assert response.status_code == 200 + data = response.json() + assert data["sources"] == [] + + +# ============================================================================ +# Courses Endpoint Tests +# ============================================================================ + +class TestCoursesEndpoint: + """Tests for GET /api/courses endpoint""" + + def test_get_courses_success(self, mock_rag_system): + """Test successful retrieval of course statistics""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + response = client.get("/api/courses") + + assert response.status_code == 200 + data = response.json() + assert data["total_courses"] == 3 + assert len(data["course_titles"]) == 3 + assert "ML Fundamentals" in data["course_titles"] + + def test_get_courses_empty(self, mock_rag_system_empty): + """Test courses endpoint when no courses exist""" + app = create_test_app(mock_rag_system_empty) + client = TestClient(app) + + response = client.get("/api/courses") + + assert response.status_code == 200 + data = response.json() + assert data["total_courses"] == 0 + assert data["course_titles"] == [] + + def test_get_courses_error(self, mock_rag_system_error): + """Test courses endpoint when error occurs""" + app = create_test_app(mock_rag_system_error) + client = TestClient(app) + + response = client.get("/api/courses") + + assert response.status_code == 500 + assert "Analytics unavailable" in response.json()["detail"] + + +# ============================================================================ +# Session Endpoint Tests +# ============================================================================ + +class TestSessionEndpoint: + """Tests for DELETE /api/session/{session_id} endpoint""" + + def test_clear_session_success(self, mock_rag_system): + """Test successful session clearing""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + response = client.delete("/api/session/test-session-123") + + assert response.status_code == 200 + assert response.json()["status"] == "success" + mock_rag_system.session_manager.clear_session.assert_called_once_with( + "test-session-123" + ) + + def test_clear_session_not_found(self, mock_rag_system_error): + """Test clearing non-existent session returns error""" + app = create_test_app(mock_rag_system_error) + client = TestClient(app) + + response = client.delete("/api/session/nonexistent") + + assert response.status_code == 500 + assert "Session not found" in response.json()["detail"] + + +# ============================================================================ +# Request/Response Model Tests +# ============================================================================ + +class TestRequestModels: + """Tests for Pydantic request/response models""" + + def test_query_request_with_all_fields(self): + """Test QueryRequest with all fields""" + request = QueryRequest(query="test", session_id="abc123") + assert request.query == "test" + assert request.session_id == "abc123" + + def test_query_request_with_optional_session(self): + """Test QueryRequest with optional session_id""" + request = QueryRequest(query="test") + assert request.query == "test" + assert request.session_id is None + + def test_query_response_model(self): + """Test QueryResponse model""" + response = QueryResponse( + answer="Test answer", + sources=["Source 1", "Source 2"], + session_id="session-123" + ) + assert response.answer == "Test answer" + assert len(response.sources) == 2 + assert response.session_id == "session-123" + + def test_course_stats_model(self): + """Test CourseStats model""" + stats = CourseStats( + total_courses=5, + course_titles=["Course A", "Course B"] + ) + assert stats.total_courses == 5 + assert len(stats.course_titles) == 2 + + +# ============================================================================ +# Integration-style Tests (with more realistic mock behavior) +# ============================================================================ + +class TestAPIIntegration: + """Integration-style tests for API workflows""" + + def test_query_then_clear_session_workflow(self, mock_rag_system): + """Test typical user workflow: query then clear session""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + # Make initial query + query_response = client.post( + "/api/query", + json={"query": "What is ML?"} + ) + session_id = query_response.json()["session_id"] + + # Clear the session + clear_response = client.delete(f"/api/session/{session_id}") + + assert query_response.status_code == 200 + assert clear_response.status_code == 200 + + def test_multiple_queries_same_session(self, mock_rag_system): + """Test multiple queries with the same session""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + # First query + response1 = client.post( + "/api/query", + json={"query": "First question"} + ) + session_id = response1.json()["session_id"] + + # Second query with same session + response2 = client.post( + "/api/query", + json={"query": "Follow up", "session_id": session_id} + ) + + assert response1.status_code == 200 + assert response2.status_code == 200 + assert response2.json()["session_id"] == session_id + + def test_get_courses_and_query(self, mock_rag_system): + """Test getting courses then querying""" + app = create_test_app(mock_rag_system) + client = TestClient(app) + + # Get available courses + courses_response = client.get("/api/courses") + courses = courses_response.json()["course_titles"] + + # Query about first course + query_response = client.post( + "/api/query", + json={"query": f"Tell me about {courses[0]}"} + ) + + assert courses_response.status_code == 200 + assert query_response.status_code == 200 diff --git a/backend/tests/test_config.py b/backend/tests/test_config.py new file mode 100644 index 00000000..7dd25165 --- /dev/null +++ b/backend/tests/test_config.py @@ -0,0 +1,74 @@ +"""Tests to validate configuration settings and catch misconfigurations""" + +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from config import config + + +class TestConfigValidation: + """Test that configuration values are valid""" + + def test_max_results_is_positive(self): + """CRITICAL: MAX_RESULTS must be > 0 for search to work + + This test will FAIL with current config (MAX_RESULTS=0). + The bug is in config.py line 21. + """ + assert config.MAX_RESULTS > 0, ( + f"BUG FOUND: MAX_RESULTS is {config.MAX_RESULTS}, but must be > 0. " + "Setting MAX_RESULTS=0 causes all searches to return empty results! " + "Fix: Change MAX_RESULTS: int = 0 to MAX_RESULTS: int = 5 in config.py" + ) + + def test_max_results_is_reasonable(self): + """MAX_RESULTS should be a reasonable value (1-20)""" + assert ( + 1 <= config.MAX_RESULTS <= 20 + ), f"MAX_RESULTS={config.MAX_RESULTS} is outside reasonable range 1-20" + + def test_chunk_size_is_positive(self): + """CHUNK_SIZE must be > 0""" + assert config.CHUNK_SIZE > 0, "CHUNK_SIZE must be positive" + + def test_chunk_overlap_less_than_size(self): + """CHUNK_OVERLAP must be less than CHUNK_SIZE""" + assert config.CHUNK_OVERLAP < config.CHUNK_SIZE, ( + f"CHUNK_OVERLAP ({config.CHUNK_OVERLAP}) must be less than " + f"CHUNK_SIZE ({config.CHUNK_SIZE})" + ) + + def test_anthropic_api_key_is_string(self): + """ANTHROPIC_API_KEY should be a string""" + assert isinstance(config.ANTHROPIC_API_KEY, str) + + def test_chroma_path_is_set(self): + """CHROMA_PATH must be set""" + assert config.CHROMA_PATH, "CHROMA_PATH must be set" + assert len(config.CHROMA_PATH) > 0 + + +class TestConfigDefaults: + """Test that default values match expected values""" + + def test_default_max_results_should_be_5(self): + """The expected default for MAX_RESULTS is 5 + + This test documents the expected value and will FAIL + because the current default is incorrectly set to 0. + """ + expected = 5 + assert config.MAX_RESULTS == expected, ( + f"MAX_RESULTS should default to {expected}, got {config.MAX_RESULTS}. " + "Fix config.py line 21: change 'MAX_RESULTS: int = 0' to 'MAX_RESULTS: int = 5'" + ) + + def test_default_chunk_size(self): + """CHUNK_SIZE should default to 800""" + assert config.CHUNK_SIZE == 800 + + def test_default_chunk_overlap(self): + """CHUNK_OVERLAP should default to 100""" + assert config.CHUNK_OVERLAP == 100 diff --git a/backend/tests/test_rag_system.py b/backend/tests/test_rag_system.py new file mode 100644 index 00000000..a8f69cc9 --- /dev/null +++ b/backend/tests/test_rag_system.py @@ -0,0 +1,254 @@ +"""Tests for RAGSystem - main orchestrator""" + +import os +import sys +from unittest.mock import Mock, patch + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + + +class TestRAGSystemQuery: + """Test RAGSystem.query() method""" + + @patch("rag_system.AIGenerator") + @patch("rag_system.VectorStore") + @patch("rag_system.DocumentProcessor") + @patch("rag_system.SessionManager") + def test_query_returns_response_and_sources( + self, mock_session, mock_doc_proc, mock_vector, mock_ai + ): + """Test successful query returns response and sources""" + from rag_system import RAGSystem + + # Setup mocks + mock_ai_instance = Mock() + mock_ai_instance.generate_response.return_value = "Test answer" + mock_ai.return_value = mock_ai_instance + + mock_vector_instance = Mock() + mock_vector.return_value = mock_vector_instance + + mock_session_instance = Mock() + mock_session_instance.get_conversation_history.return_value = None + mock_session.return_value = mock_session_instance + + # Create config with proper MAX_RESULTS + config = Mock() + config.MAX_RESULTS = 5 + config.ANTHROPIC_API_KEY = "test-key" + config.ANTHROPIC_MODEL = "test-model" + config.CHUNK_SIZE = 800 + config.CHUNK_OVERLAP = 100 + config.CHROMA_PATH = "/tmp/test" + config.EMBEDDING_MODEL = "test-embed" + config.MAX_HISTORY = 2 + + rag = RAGSystem(config) + + # Mock the tool manager + rag.tool_manager.get_last_sources = Mock( + return_value=[{"text": "Course - Lesson 1", "link": "https://example.com"}] + ) + rag.tool_manager.reset_sources = Mock() + + response, sources = rag.query("What is ML?") + + assert response == "Test answer" + assert len(sources) == 1 + mock_ai_instance.generate_response.assert_called_once() + + @patch("rag_system.AIGenerator") + @patch("rag_system.VectorStore") + @patch("rag_system.DocumentProcessor") + @patch("rag_system.SessionManager") + def test_query_with_session_uses_history( + self, mock_session, mock_doc_proc, mock_vector, mock_ai + ): + """Test that session history is passed to AI generator""" + from rag_system import RAGSystem + + mock_ai_instance = Mock() + mock_ai_instance.generate_response.return_value = "Answer" + mock_ai.return_value = mock_ai_instance + + mock_session_instance = Mock() + mock_session_instance.get_conversation_history.return_value = "Previous chat" + mock_session.return_value = mock_session_instance + + config = Mock() + config.MAX_RESULTS = 5 + config.ANTHROPIC_API_KEY = "test-key" + config.ANTHROPIC_MODEL = "test-model" + config.CHUNK_SIZE = 800 + config.CHUNK_OVERLAP = 100 + config.CHROMA_PATH = "/tmp/test" + config.EMBEDDING_MODEL = "test-embed" + config.MAX_HISTORY = 2 + + rag = RAGSystem(config) + rag.tool_manager.get_last_sources = Mock(return_value=[]) + rag.tool_manager.reset_sources = Mock() + + rag.query("Follow up", session_id="session_1") + + call_args = mock_ai_instance.generate_response.call_args + assert call_args.kwargs["conversation_history"] == "Previous chat" + + @patch("rag_system.AIGenerator") + @patch("rag_system.VectorStore") + @patch("rag_system.DocumentProcessor") + @patch("rag_system.SessionManager") + def test_query_updates_session_history( + self, mock_session, mock_doc_proc, mock_vector, mock_ai + ): + """Test that session history is updated after query""" + from rag_system import RAGSystem + + mock_ai_instance = Mock() + mock_ai_instance.generate_response.return_value = "The answer" + mock_ai.return_value = mock_ai_instance + + mock_session_instance = Mock() + mock_session_instance.get_conversation_history.return_value = None + mock_session.return_value = mock_session_instance + + config = Mock() + config.MAX_RESULTS = 5 + config.ANTHROPIC_API_KEY = "test-key" + config.ANTHROPIC_MODEL = "test-model" + config.CHUNK_SIZE = 800 + config.CHUNK_OVERLAP = 100 + config.CHROMA_PATH = "/tmp/test" + config.EMBEDDING_MODEL = "test-embed" + config.MAX_HISTORY = 2 + + rag = RAGSystem(config) + rag.tool_manager.get_last_sources = Mock(return_value=[]) + rag.tool_manager.reset_sources = Mock() + + rag.query("Test question", session_id="session_1") + + mock_session_instance.add_exchange.assert_called_once_with( + "session_1", "Test question", "The answer" + ) + + @patch("rag_system.AIGenerator") + @patch("rag_system.VectorStore") + @patch("rag_system.DocumentProcessor") + @patch("rag_system.SessionManager") + def test_query_resets_sources_after_retrieval( + self, mock_session, mock_doc_proc, mock_vector, mock_ai + ): + """Test that sources are reset after being retrieved""" + from rag_system import RAGSystem + + mock_ai_instance = Mock() + mock_ai_instance.generate_response.return_value = "Answer" + mock_ai.return_value = mock_ai_instance + + mock_session_instance = Mock() + mock_session_instance.get_conversation_history.return_value = None + mock_session.return_value = mock_session_instance + + config = Mock() + config.MAX_RESULTS = 5 + config.ANTHROPIC_API_KEY = "test-key" + config.ANTHROPIC_MODEL = "test-model" + config.CHUNK_SIZE = 800 + config.CHUNK_OVERLAP = 100 + config.CHROMA_PATH = "/tmp/test" + config.EMBEDDING_MODEL = "test-embed" + config.MAX_HISTORY = 2 + + rag = RAGSystem(config) + mock_reset = Mock() + rag.tool_manager.get_last_sources = Mock(return_value=[{"text": "src"}]) + rag.tool_manager.reset_sources = mock_reset + + rag.query("Test") + + mock_reset.assert_called_once() + + +class TestRAGSystemWithConfigBug: + """Tests that demonstrate the MAX_RESULTS=0 bug""" + + def test_diagnose_config_bug(self): + """Directly test for the configuration bug + + This test checks the actual config value and will FAIL + if MAX_RESULTS is 0, documenting the bug. + """ + from config import config + + if config.MAX_RESULTS == 0: + pytest.fail( + "BUG FOUND: MAX_RESULTS is 0 in config.py line 21.\n" + "This causes VectorStore.search() to request 0 results from ChromaDB.\n" + "All content searches return empty, making the chatbot unable to answer.\n" + "FIX: Change 'MAX_RESULTS: int = 0' to 'MAX_RESULTS: int = 5'" + ) + + def test_config_propagates_to_vector_store(self): + """Test that config MAX_RESULTS reaches VectorStore""" + from config import config + + # Document the propagation path + propagation = f""" + Config Propagation Path: + 1. config.py:21 - MAX_RESULTS = {config.MAX_RESULTS} + 2. rag_system.py:18 - VectorStore(..., config.MAX_RESULTS) + 3. vector_store.py:37 - self.max_results = max_results + 4. vector_store.py:90 - search_limit = self.max_results + 5. vector_store.py:95 - n_results=search_limit + + When MAX_RESULTS=0, ChromaDB returns 0 results for every search. + """ + + if config.MAX_RESULTS == 0: + pytest.fail(f"Bug in config propagation:{propagation}") + + +class TestRAGSystemToolIntegration: + """Test tool integration in RAGSystem""" + + @patch("rag_system.AIGenerator") + @patch("rag_system.VectorStore") + @patch("rag_system.DocumentProcessor") + @patch("rag_system.SessionManager") + def test_query_passes_tools_to_ai_generator( + self, mock_session, mock_doc_proc, mock_vector, mock_ai + ): + """Test that tools are passed to AIGenerator""" + from rag_system import RAGSystem + + mock_ai_instance = Mock() + mock_ai_instance.generate_response.return_value = "Answer" + mock_ai.return_value = mock_ai_instance + + mock_session_instance = Mock() + mock_session_instance.get_conversation_history.return_value = None + mock_session.return_value = mock_session_instance + + config = Mock() + config.MAX_RESULTS = 5 + config.ANTHROPIC_API_KEY = "test-key" + config.ANTHROPIC_MODEL = "test-model" + config.CHUNK_SIZE = 800 + config.CHUNK_OVERLAP = 100 + config.CHROMA_PATH = "/tmp/test" + config.EMBEDDING_MODEL = "test-embed" + config.MAX_HISTORY = 2 + + rag = RAGSystem(config) + rag.tool_manager.get_last_sources = Mock(return_value=[]) + rag.tool_manager.reset_sources = Mock() + + rag.query("Test question") + + call_args = mock_ai_instance.generate_response.call_args + assert "tools" in call_args.kwargs + assert "tool_manager" in call_args.kwargs + assert len(call_args.kwargs["tools"]) == 2 # search + outline tools diff --git a/backend/tests/test_search_tools.py b/backend/tests/test_search_tools.py new file mode 100644 index 00000000..c587de71 --- /dev/null +++ b/backend/tests/test_search_tools.py @@ -0,0 +1,231 @@ +"""Tests for CourseSearchTool - the search tool used by AI""" + +import os +import sys +from unittest.mock import Mock + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from search_tools import CourseOutlineTool, CourseSearchTool, ToolManager +from vector_store import SearchResults + + +class TestCourseSearchToolExecute: + """Test CourseSearchTool.execute() method""" + + def test_execute_returns_formatted_results( + self, mock_vector_store, sample_search_results + ): + """Test successful search returns formatted content""" + mock_vector_store.search.return_value = sample_search_results + + tool = CourseSearchTool(mock_vector_store) + result = tool.execute(query="machine learning") + + assert "[ML Fundamentals - Lesson 1]" in result + assert "machine learning basics" in result + mock_vector_store.search.assert_called_once_with( + query="machine learning", course_name=None, lesson_number=None + ) + + def test_execute_with_empty_results_returns_message(self, mock_vector_store): + """Test empty results returns informative message""" + mock_vector_store.search.return_value = SearchResults( + documents=[], metadata=[], distances=[] + ) + + tool = CourseSearchTool(mock_vector_store) + result = tool.execute(query="nonexistent topic") + + assert "No relevant content found" in result + + def test_execute_with_error_returns_error_message(self, mock_vector_store): + """Test search error is propagated correctly""" + mock_vector_store.search.return_value = SearchResults.empty( + "No course found matching 'BadCourse'" + ) + + tool = CourseSearchTool(mock_vector_store) + result = tool.execute(query="test", course_name="BadCourse") + + assert "No course found matching 'BadCourse'" in result + + def test_execute_with_course_filter(self, mock_vector_store, sample_search_results): + """Test search with course name filter""" + mock_vector_store.search.return_value = sample_search_results + + tool = CourseSearchTool(mock_vector_store) + tool.execute(query="test", course_name="ML Course") + + mock_vector_store.search.assert_called_with( + query="test", course_name="ML Course", lesson_number=None + ) + + def test_execute_with_lesson_filter(self, mock_vector_store, sample_search_results): + """Test search with lesson number filter""" + mock_vector_store.search.return_value = sample_search_results + + tool = CourseSearchTool(mock_vector_store) + tool.execute(query="test", lesson_number=2) + + mock_vector_store.search.assert_called_with( + query="test", course_name=None, lesson_number=2 + ) + + def test_execute_with_both_filters(self, mock_vector_store, sample_search_results): + """Test search with both course and lesson filters""" + mock_vector_store.search.return_value = sample_search_results + + tool = CourseSearchTool(mock_vector_store) + tool.execute(query="test", course_name="ML Course", lesson_number=3) + + mock_vector_store.search.assert_called_with( + query="test", course_name="ML Course", lesson_number=3 + ) + + def test_execute_tracks_sources(self, mock_vector_store, sample_search_results): + """Test that sources are tracked for UI display""" + mock_vector_store.search.return_value = sample_search_results + mock_vector_store.get_lesson_link.return_value = "https://example.com/lesson" + + tool = CourseSearchTool(mock_vector_store) + tool.execute(query="test") + + assert len(tool.last_sources) == 1 + assert tool.last_sources[0]["text"] == "ML Fundamentals - Lesson 1" + assert tool.last_sources[0]["link"] == "https://example.com/lesson" + + def test_execute_with_max_results_zero_bug(self, mock_vector_store_zero_results): + """CRITICAL: Demonstrate the MAX_RESULTS=0 bug effect on tool + + When VectorStore is configured with max_results=0, all searches + return empty results, causing this tool to always return + "No relevant content found" regardless of the query. + """ + tool = CourseSearchTool(mock_vector_store_zero_results) + result = tool.execute(query="machine learning") + + # With MAX_RESULTS=0, we always get "No relevant content found" + assert "No relevant content found" in result + + def test_execute_with_course_filter_empty_results(self, mock_vector_store): + """Test empty results with course filter shows filter info""" + mock_vector_store.search.return_value = SearchResults( + documents=[], metadata=[], distances=[] + ) + + tool = CourseSearchTool(mock_vector_store) + result = tool.execute(query="test", course_name="ML Course") + + assert "No relevant content found" in result + assert "ML Course" in result + + +class TestCourseOutlineTool: + """Test CourseOutlineTool functionality""" + + def test_execute_returns_formatted_outline(self): + """Test successful outline retrieval""" + mock_store = Mock() + mock_store.get_course_metadata.return_value = { + "title": "ML Fundamentals", + "course_link": "https://example.com/ml", + "lessons": [ + {"lesson_number": 1, "lesson_title": "Intro"}, + {"lesson_number": 2, "lesson_title": "Basics"}, + ], + } + + tool = CourseOutlineTool(mock_store) + result = tool.execute(course_name="ML") + + assert "ML Fundamentals" in result + assert "Lesson 1: Intro" in result + assert "Lesson 2: Basics" in result + + def test_execute_course_not_found(self): + """Test outline for non-existent course""" + mock_store = Mock() + mock_store.get_course_metadata.return_value = None + + tool = CourseOutlineTool(mock_store) + result = tool.execute(course_name="NonExistent") + + assert "No course found matching 'NonExistent'" in result + + +class TestToolManager: + """Test ToolManager functionality""" + + def test_register_and_execute_tool(self, mock_vector_store, sample_search_results): + """Test tool registration and execution""" + mock_vector_store.search.return_value = sample_search_results + + manager = ToolManager() + tool = CourseSearchTool(mock_vector_store) + manager.register_tool(tool) + + result = manager.execute_tool("search_course_content", query="test") + + assert "ML Fundamentals" in result + + def test_execute_unknown_tool(self): + """Test executing non-existent tool""" + manager = ToolManager() + result = manager.execute_tool("unknown_tool", query="test") + + assert "Tool 'unknown_tool' not found" in result + + def test_get_tool_definitions(self, mock_vector_store): + """Test getting tool definitions for Claude API""" + manager = ToolManager() + manager.register_tool(CourseSearchTool(mock_vector_store)) + + definitions = manager.get_tool_definitions() + + assert len(definitions) == 1 + assert definitions[0]["name"] == "search_course_content" + assert "input_schema" in definitions[0] + assert definitions[0]["input_schema"]["properties"]["query"] + + def test_get_last_sources(self, mock_vector_store, sample_search_results): + """Test retrieving sources after search""" + mock_vector_store.search.return_value = sample_search_results + mock_vector_store.get_lesson_link.return_value = "https://example.com" + + manager = ToolManager() + tool = CourseSearchTool(mock_vector_store) + manager.register_tool(tool) + + manager.execute_tool("search_course_content", query="test") + sources = manager.get_last_sources() + + assert len(sources) == 1 + + def test_reset_sources(self, mock_vector_store, sample_search_results): + """Test resetting sources after retrieval""" + mock_vector_store.search.return_value = sample_search_results + mock_vector_store.get_lesson_link.return_value = None + + manager = ToolManager() + tool = CourseSearchTool(mock_vector_store) + manager.register_tool(tool) + + manager.execute_tool("search_course_content", query="test") + manager.reset_sources() + + sources = manager.get_last_sources() + assert len(sources) == 0 + + def test_register_multiple_tools(self, mock_vector_store): + """Test registering multiple tools""" + manager = ToolManager() + manager.register_tool(CourseSearchTool(mock_vector_store)) + manager.register_tool(CourseOutlineTool(mock_vector_store)) + + definitions = manager.get_tool_definitions() + + assert len(definitions) == 2 + names = [d["name"] for d in definitions] + assert "search_course_content" in names + assert "get_course_outline" in names diff --git a/backend/vector_store.py b/backend/vector_store.py index 390abe71..28b5db75 100644 --- a/backend/vector_store.py +++ b/backend/vector_store.py @@ -1,77 +1,92 @@ +from dataclasses import dataclass +from typing import Any + import chromadb from chromadb.config import Settings -from typing import List, Dict, Any, Optional -from dataclasses import dataclass from models import Course, CourseChunk -from sentence_transformers import SentenceTransformer + @dataclass class SearchResults: """Container for search results with metadata""" - documents: List[str] - metadata: List[Dict[str, Any]] - distances: List[float] - error: Optional[str] = None - + + documents: list[str] + metadata: list[dict[str, Any]] + distances: list[float] + error: str | None = None + @classmethod - def from_chroma(cls, chroma_results: Dict) -> 'SearchResults': + def from_chroma(cls, chroma_results: dict) -> "SearchResults": """Create SearchResults from ChromaDB query results""" return cls( - documents=chroma_results['documents'][0] if chroma_results['documents'] else [], - metadata=chroma_results['metadatas'][0] if chroma_results['metadatas'] else [], - distances=chroma_results['distances'][0] if chroma_results['distances'] else [] + documents=( + chroma_results["documents"][0] if chroma_results["documents"] else [] + ), + metadata=( + chroma_results["metadatas"][0] if chroma_results["metadatas"] else [] + ), + distances=( + chroma_results["distances"][0] if chroma_results["distances"] else [] + ), ) - + @classmethod - def empty(cls, error_msg: str) -> 'SearchResults': + def empty(cls, error_msg: str) -> "SearchResults": """Create empty results with error message""" return cls(documents=[], metadata=[], distances=[], error=error_msg) - + def is_empty(self) -> bool: """Check if results are empty""" return len(self.documents) == 0 + class VectorStore: """Vector storage using ChromaDB for course content and metadata""" - + def __init__(self, chroma_path: str, embedding_model: str, max_results: int = 5): self.max_results = max_results # Initialize ChromaDB client self.client = chromadb.PersistentClient( - path=chroma_path, - settings=Settings(anonymized_telemetry=False) + path=chroma_path, settings=Settings(anonymized_telemetry=False) ) - + # Set up sentence transformer embedding function - self.embedding_function = chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction( - model_name=embedding_model + self.embedding_function = ( + chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction( + model_name=embedding_model + ) ) - + # Create collections for different types of data - self.course_catalog = self._create_collection("course_catalog") # Course titles/instructors - self.course_content = self._create_collection("course_content") # Actual course material - + self.course_catalog = self._create_collection( + "course_catalog" + ) # Course titles/instructors + self.course_content = self._create_collection( + "course_content" + ) # Actual course material + def _create_collection(self, name: str): """Create or get a ChromaDB collection""" return self.client.get_or_create_collection( - name=name, - embedding_function=self.embedding_function + name=name, embedding_function=self.embedding_function ) - - def search(self, - query: str, - course_name: Optional[str] = None, - lesson_number: Optional[int] = None, - limit: Optional[int] = None) -> SearchResults: + + def search( + self, + query: str, + course_name: str | None = None, + lesson_number: int | None = None, + limit: int | None = None, + ) -> SearchResults: """ Main search interface that handles course resolution and content search. - + Args: query: What to search for in course content course_name: Optional course name/title to filter by lesson_number: Optional lesson number to filter by limit: Maximum results to return - + Returns: SearchResults object with documents and metadata """ @@ -81,104 +96,111 @@ def search(self, course_title = self._resolve_course_name(course_name) if not course_title: return SearchResults.empty(f"No course found matching '{course_name}'") - + # Step 2: Build filter for content search filter_dict = self._build_filter(course_title, lesson_number) - + # Step 3: Search course content # Use provided limit or fall back to configured max_results search_limit = limit if limit is not None else self.max_results - + try: results = self.course_content.query( - query_texts=[query], - n_results=search_limit, - where=filter_dict + query_texts=[query], n_results=search_limit, where=filter_dict ) return SearchResults.from_chroma(results) except Exception as e: return SearchResults.empty(f"Search error: {str(e)}") - - def _resolve_course_name(self, course_name: str) -> Optional[str]: + + def _resolve_course_name(self, course_name: str) -> str | None: """Use vector search to find best matching course by name""" try: - results = self.course_catalog.query( - query_texts=[course_name], - n_results=1 - ) - - if results['documents'][0] and results['metadatas'][0]: + results = self.course_catalog.query(query_texts=[course_name], n_results=1) + + if results["documents"][0] and results["metadatas"][0]: # Return the title (which is now the ID) - return results['metadatas'][0][0]['title'] + return results["metadatas"][0][0]["title"] except Exception as e: print(f"Error resolving course name: {e}") - + return None - - def _build_filter(self, course_title: Optional[str], lesson_number: Optional[int]) -> Optional[Dict]: + + def _build_filter( + self, course_title: str | None, lesson_number: int | None + ) -> dict | None: """Build ChromaDB filter from search parameters""" if not course_title and lesson_number is None: return None - + # Handle different filter combinations if course_title and lesson_number is not None: - return {"$and": [ - {"course_title": course_title}, - {"lesson_number": lesson_number} - ]} - + return { + "$and": [ + {"course_title": course_title}, + {"lesson_number": lesson_number}, + ] + } + if course_title: return {"course_title": course_title} - + return {"lesson_number": lesson_number} - + def add_course_metadata(self, course: Course): """Add course information to the catalog for semantic search""" import json course_text = course.title - + # Build lessons metadata and serialize as JSON string lessons_metadata = [] for lesson in course.lessons: - lessons_metadata.append({ - "lesson_number": lesson.lesson_number, - "lesson_title": lesson.title, - "lesson_link": lesson.lesson_link - }) - + lessons_metadata.append( + { + "lesson_number": lesson.lesson_number, + "lesson_title": lesson.title, + "lesson_link": lesson.lesson_link, + } + ) + self.course_catalog.add( documents=[course_text], - metadatas=[{ - "title": course.title, - "instructor": course.instructor, - "course_link": course.course_link, - "lessons_json": json.dumps(lessons_metadata), # Serialize as JSON string - "lesson_count": len(course.lessons) - }], - ids=[course.title] + metadatas=[ + { + "title": course.title, + "instructor": course.instructor, + "course_link": course.course_link, + "lessons_json": json.dumps( + lessons_metadata + ), # Serialize as JSON string + "lesson_count": len(course.lessons), + } + ], + ids=[course.title], ) - - def add_course_content(self, chunks: List[CourseChunk]): + + def add_course_content(self, chunks: list[CourseChunk]): """Add course content chunks to the vector store""" if not chunks: return - + documents = [chunk.content for chunk in chunks] - metadatas = [{ - "course_title": chunk.course_title, - "lesson_number": chunk.lesson_number, - "chunk_index": chunk.chunk_index - } for chunk in chunks] + metadatas = [ + { + "course_title": chunk.course_title, + "lesson_number": chunk.lesson_number, + "chunk_index": chunk.chunk_index, + } + for chunk in chunks + ] # Use title with chunk index for unique IDs - ids = [f"{chunk.course_title.replace(' ', '_')}_{chunk.chunk_index}" for chunk in chunks] - - self.course_content.add( - documents=documents, - metadatas=metadatas, - ids=ids - ) - + ids = [ + f"{chunk.course_title.replace(' ', '_')}_{chunk.chunk_index}" + for chunk in chunks + ] + + self.course_content.add(documents=documents, metadatas=metadatas, ids=ids) + def clear_all_data(self): """Clear all data from both collections""" try: @@ -189,43 +211,46 @@ def clear_all_data(self): self.course_content = self._create_collection("course_content") except Exception as e: print(f"Error clearing data: {e}") - - def get_existing_course_titles(self) -> List[str]: + + def get_existing_course_titles(self) -> list[str]: """Get all existing course titles from the vector store""" try: # Get all documents from the catalog results = self.course_catalog.get() - if results and 'ids' in results: - return results['ids'] + if results and "ids" in results: + return results["ids"] return [] except Exception as e: print(f"Error getting existing course titles: {e}") return [] - + def get_course_count(self) -> int: """Get the total number of courses in the vector store""" try: results = self.course_catalog.get() - if results and 'ids' in results: - return len(results['ids']) + if results and "ids" in results: + return len(results["ids"]) return 0 except Exception as e: print(f"Error getting course count: {e}") return 0 - - def get_all_courses_metadata(self) -> List[Dict[str, Any]]: + + def get_all_courses_metadata(self) -> list[dict[str, Any]]: """Get metadata for all courses in the vector store""" import json + try: results = self.course_catalog.get() - if results and 'metadatas' in results: + if results and "metadatas" in results: # Parse lessons JSON for each course parsed_metadata = [] - for metadata in results['metadatas']: + for metadata in results["metadatas"]: course_meta = metadata.copy() - if 'lessons_json' in course_meta: - course_meta['lessons'] = json.loads(course_meta['lessons_json']) - del course_meta['lessons_json'] # Remove the JSON string version + if "lessons_json" in course_meta: + course_meta["lessons"] = json.loads(course_meta["lessons_json"]) + del course_meta[ + "lessons_json" + ] # Remove the JSON string version parsed_metadata.append(course_meta) return parsed_metadata return [] @@ -233,35 +258,58 @@ def get_all_courses_metadata(self) -> List[Dict[str, Any]]: print(f"Error getting courses metadata: {e}") return [] - def get_course_link(self, course_title: str) -> Optional[str]: + def get_course_link(self, course_title: str) -> str | None: """Get course link for a given course title""" try: # Get course by ID (title is the ID) results = self.course_catalog.get(ids=[course_title]) - if results and 'metadatas' in results and results['metadatas']: - metadata = results['metadatas'][0] - return metadata.get('course_link') + if results and "metadatas" in results and results["metadatas"]: + metadata = results["metadatas"][0] + return metadata.get("course_link") return None except Exception as e: print(f"Error getting course link: {e}") return None - - def get_lesson_link(self, course_title: str, lesson_number: int) -> Optional[str]: + + def get_lesson_link(self, course_title: str, lesson_number: int) -> str | None: """Get lesson link for a given course title and lesson number""" import json + try: # Get course by ID (title is the ID) results = self.course_catalog.get(ids=[course_title]) - if results and 'metadatas' in results and results['metadatas']: - metadata = results['metadatas'][0] - lessons_json = metadata.get('lessons_json') + if results and "metadatas" in results and results["metadatas"]: + metadata = results["metadatas"][0] + lessons_json = metadata.get("lessons_json") if lessons_json: lessons = json.loads(lessons_json) # Find the lesson with matching number for lesson in lessons: - if lesson.get('lesson_number') == lesson_number: - return lesson.get('lesson_link') + if lesson.get("lesson_number") == lesson_number: + return lesson.get("lesson_link") return None except Exception as e: print(f"Error getting lesson link: {e}") - \ No newline at end of file + return None + + def get_course_metadata(self, course_name: str) -> dict[str, Any] | None: + """Get full metadata for a single course by name (supports fuzzy matching).""" + import json + + # Use semantic search to resolve course name + resolved_title = self._resolve_course_name(course_name) + if not resolved_title: + return None + + try: + results = self.course_catalog.get(ids=[resolved_title]) + if results and "metadatas" in results and results["metadatas"]: + metadata = results["metadatas"][0].copy() + if "lessons_json" in metadata: + metadata["lessons"] = json.loads(metadata["lessons_json"]) + del metadata["lessons_json"] + return metadata + return None + except Exception as e: + print(f"Error getting course metadata: {e}") + return None diff --git a/frontend-changes.md b/frontend-changes.md new file mode 100644 index 00000000..1b4b9586 --- /dev/null +++ b/frontend-changes.md @@ -0,0 +1,143 @@ +# Frontend Changes: Dark/Light Theme Toggle + +## Overview +Implemented a dark/light mode toggle button that allows users to switch between themes. The button is positioned in the top-right corner, uses sun/moon icons, and includes smooth transition animations. + +## Files Modified + +### 1. `frontend/index.html` +**Changes:** +- Added theme toggle button with sun and moon SVG icons +- Button is placed at the top of the body, before the main container +- Includes proper accessibility attributes (`aria-label`, `title`) +- Updated CSS and JS version cache busters (v11 -> v12, v10 -> v11) + +**New HTML structure:** +```html + +``` + +### 2. `frontend/style.css` +**Changes:** + +#### Light Theme Color Palette +Created `[data-theme="light"]` selector with carefully chosen colors for accessibility: + +| Variable | Value | Purpose | Contrast Ratio | +|----------|-------|---------|----------------| +| `--background` | `#f8fafc` | Page background | Base | +| `--surface` | `#ffffff` | Cards, sidebar | - | +| `--surface-hover` | `#f1f5f9` | Hover states | - | +| `--text-primary` | `#0f172a` | Main text | ~15.8:1 (AAA) | +| `--text-secondary` | `#475569` | Secondary text | ~7.1:1 (AAA) | +| `--border-color` | `#cbd5e1` | Borders | Visible contrast | +| `--assistant-message` | `#e2e8f0` | Chat bubbles | Good readability | +| `--welcome-bg` | `#dbeafe` | Welcome message | Soft blue tint | +| `--code-bg` | `rgba(0,0,0,0.06)` | Code blocks | Subtle distinction | +| `--error-text` | `#dc2626` | Error messages | 4.5:1+ on light bg | +| `--success-text` | `#16a34a` | Success messages | 4.5:1+ on light bg | + +#### Theme Toggle Button Styles +- `.theme-toggle`: Fixed position button (top-right corner) + - 44x44px circular button (WCAG touch target) + - Surface background with border + - Hover: scale up to 1.05x + - Focus: visible focus ring (`--focus-ring: rgba(37, 99, 235, 0.3)`) + - Active: scale down for click feedback + +- Icon visibility toggling: + - Sun icon visible in dark mode (indicates "click to make it light") + - Moon icon visible in light mode (indicates "click to make it dark") + +- Icon rotation animation on toggle (`@keyframes iconRotate`) + +#### Updated Styles for Theme Compatibility +- Code blocks now use `var(--code-bg)` instead of hardcoded values +- Error messages use `var(--error-bg)` and `var(--error-text)` +- Success messages use `var(--success-bg)` and `var(--success-text)` + +### 3. `frontend/script.js` +**Changes:** + +#### New DOM Element +- Added `themeToggle` to the list of cached DOM elements + +#### New Event Listeners +- Click handler for theme toggle button +- Keyboard handler (Enter/Space) for accessibility + +#### New Functions + +**`initializeTheme()`** +- Reads saved theme from localStorage +- Defaults to dark theme if no preference saved +- Applies theme on page load + +**`setTheme(theme)`** +- Sets or removes `data-theme` attribute on `` element +- Saves preference to localStorage +- Updates accessibility labels dynamically + +**`toggleTheme()`** +- Toggles between dark and light themes +- Triggers icon rotation animation +- Updates localStorage + +## Features + +### Design +- Circular button matching existing design aesthetic +- Positioned in top-right corner (fixed position, always visible) +- Sun icon in dark mode, moon icon in light mode +- Smooth 0.3s transitions on all theme-related color changes + +### Accessibility (WCAG 2.1 Compliance) +- **Color Contrast**: All text meets WCAG AAA standards (7:1+ ratio) + - Primary text: ~15.8:1 contrast ratio + - Secondary text: ~7.1:1 contrast ratio + - Error/success text: 4.5:1+ contrast ratio +- **Keyboard Navigation**: Full keyboard support (Tab, Enter, Space) +- **ARIA Labels**: Dynamic labels update based on current theme +- **Focus Indicators**: Visible focus ring on keyboard focus +- **Touch Targets**: 44x44px button size (meets WCAG guidelines) + +### Animation +- Icon rotation animation (360 degrees) when toggling +- Scale animation on hover (1.05x) and click (0.95x) +- Smooth color transitions across all themed elements + +### Persistence +- Theme preference saved to localStorage +- Automatically restores on page reload +- Defaults to dark theme for new users + +## Color Comparison + +### Dark Theme (Default) +```css +--background: #0f172a; /* Very dark blue */ +--surface: #1e293b; /* Dark slate */ +--text-primary: #f1f5f9; /* Off-white */ +--text-secondary: #94a3b8; /* Muted gray */ +--border-color: #334155; /* Slate */ +``` + +### Light Theme +```css +--background: #f8fafc; /* Very light gray */ +--surface: #ffffff; /* Pure white */ +--text-primary: #0f172a; /* Very dark (high contrast) */ +--text-secondary: #475569; /* Medium slate (AAA compliant) */ +--border-color: #cbd5e1; /* Light gray */ +``` + +## Technical Notes + +- Theme is applied via `data-theme="light"` attribute on `` element +- Dark theme is default (no attribute needed) +- CSS custom properties (variables) enable instant theme switching +- No flash of wrong theme on load (theme applied before content renders) +- All colors use the Tailwind CSS color palette for consistency diff --git a/frontend/index.html b/frontend/index.html index f8e25a62..1d2ae642 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -7,9 +7,29 @@ Course Materials Assistant - + + + +

Course Materials Assistant

@@ -19,6 +39,11 @@

Course Materials Assistant

+ +
+ +
+
@@ -76,6 +101,6 @@

Course Materials Assistant

- + \ No newline at end of file diff --git a/frontend/script.js b/frontend/script.js index 562a8a36..7b21da77 100644 --- a/frontend/script.js +++ b/frontend/script.js @@ -5,7 +5,7 @@ const API_URL = '/api'; let currentSessionId = null; // DOM elements -let chatMessages, chatInput, sendButton, totalCourses, courseTitles; +let chatMessages, chatInput, sendButton, totalCourses, courseTitles, newChatBtn, themeToggle; // Initialize document.addEventListener('DOMContentLoaded', () => { @@ -15,8 +15,11 @@ document.addEventListener('DOMContentLoaded', () => { sendButton = document.getElementById('sendButton'); totalCourses = document.getElementById('totalCourses'); courseTitles = document.getElementById('courseTitles'); - + newChatBtn = document.getElementById('newChatBtn'); + themeToggle = document.getElementById('themeToggle'); + setupEventListeners(); + initializeTheme(); createNewSession(); loadCourseStats(); }); @@ -28,8 +31,19 @@ function setupEventListeners() { chatInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); }); - - + + // New chat button + newChatBtn.addEventListener('click', handleNewChat); + + // Theme toggle button + themeToggle.addEventListener('click', toggleTheme); + themeToggle.addEventListener('keydown', (e) => { + if (e.key === 'Enter' || e.key === ' ') { + e.preventDefault(); + toggleTheme(); + } + }); + // Suggested questions document.querySelectorAll('.suggested-item').forEach(button => { button.addEventListener('click', (e) => { @@ -122,10 +136,22 @@ function addMessage(content, type, sources = null, isWelcome = false) { let html = `
${displayContent}
`; if (sources && sources.length > 0) { + // Format sources as clickable link pills + const formattedSources = sources.map(source => { + if (typeof source === 'object' && source.text) { + if (source.link) { + return `${escapeHtml(source.text)}`; + } + return `${escapeHtml(source.text)}`; + } + // Backwards compatibility: plain string sources + return `${escapeHtml(source)}`; + }).join(''); + html += `
Sources -
${sources.join(', ')}
+
${formattedSources}
`; } @@ -152,6 +178,24 @@ async function createNewSession() { addMessage('Welcome to the Course Materials Assistant! I can help you with questions about courses, lessons and specific content. What would you like to know?', 'assistant', null, true); } +async function handleNewChat() { + const oldSessionId = currentSessionId; + + // Reset frontend immediately + createNewSession(); + + // Clean up old session on backend (async, non-blocking) + if (oldSessionId) { + try { + await fetch(`${API_URL}/session/${oldSessionId}`, { + method: 'DELETE' + }); + } catch (error) { + console.warn('Failed to clear old session:', error); + } + } +} + // Load course statistics async function loadCourseStats() { try { @@ -188,4 +232,41 @@ async function loadCourseStats() { courseTitles.innerHTML = 'Failed to load courses'; } } +} + +// Theme Functions +function initializeTheme() { + // Check for saved theme preference, default to dark + const savedTheme = localStorage.getItem('theme') || 'dark'; + setTheme(savedTheme); +} + +function setTheme(theme) { + if (theme === 'light') { + document.documentElement.setAttribute('data-theme', 'light'); + } else { + document.documentElement.removeAttribute('data-theme'); + } + localStorage.setItem('theme', theme); + + // Update aria-label for accessibility + const isLight = theme === 'light'; + themeToggle.setAttribute('aria-label', isLight ? 'Switch to dark mode' : 'Switch to light mode'); + themeToggle.setAttribute('title', isLight ? 'Switch to dark mode' : 'Switch to light mode'); +} + +function toggleTheme() { + // Add animation class + themeToggle.classList.add('animating'); + + // Determine current theme and toggle + const currentTheme = document.documentElement.getAttribute('data-theme'); + const newTheme = currentTheme === 'light' ? 'dark' : 'light'; + + setTheme(newTheme); + + // Remove animation class after animation completes + setTimeout(() => { + themeToggle.classList.remove('animating'); + }, 300); } \ No newline at end of file diff --git a/frontend/style.css b/frontend/style.css index 825d0367..67d61db0 100644 --- a/frontend/style.css +++ b/frontend/style.css @@ -5,7 +5,7 @@ padding: 0; } -/* CSS Variables */ +/* CSS Variables - Dark Theme (default) */ :root { --primary-color: #2563eb; --primary-hover: #1d4ed8; @@ -22,6 +22,34 @@ --focus-ring: rgba(37, 99, 235, 0.2); --welcome-bg: #1e3a5f; --welcome-border: #2563eb; + --code-bg: rgba(0, 0, 0, 0.2); + --error-bg: rgba(239, 68, 68, 0.1); + --error-text: #f87171; + --success-bg: rgba(34, 197, 94, 0.1); + --success-text: #4ade80; +} + +/* Light Theme */ +[data-theme="light"] { + --primary-color: #2563eb; + --primary-hover: #1d4ed8; + --background: #f8fafc; + --surface: #ffffff; + --surface-hover: #f1f5f9; + --text-primary: #0f172a; + --text-secondary: #475569; + --border-color: #cbd5e1; + --user-message: #2563eb; + --assistant-message: #e2e8f0; + --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); + --focus-ring: rgba(37, 99, 235, 0.3); + --welcome-bg: #dbeafe; + --welcome-border: #2563eb; + --code-bg: rgba(0, 0, 0, 0.06); + --error-bg: rgba(239, 68, 68, 0.1); + --error-text: #dc2626; + --success-bg: rgba(34, 197, 94, 0.1); + --success-text: #16a34a; } /* Base Styles */ @@ -241,8 +269,38 @@ header h1 { } .sources-content { - padding: 0 0.5rem 0.25rem 1.5rem; - color: var(--text-secondary); + padding: 0.5rem 0.5rem 0.25rem 0.5rem; + display: flex; + flex-wrap: wrap; + gap: 0.5rem; +} + +.source-link { + display: inline-block; + padding: 0.4rem 0.75rem; + background: var(--primary-color); + color: white; + text-decoration: none; + border-radius: 16px; + font-size: 0.8rem; + font-weight: 500; + transition: all 0.2s ease; +} + +.source-link:hover { + background: var(--primary-hover); + transform: translateY(-1px); + box-shadow: 0 2px 8px rgba(37, 99, 235, 0.4); +} + +.source-text { + display: inline-block; + padding: 0.4rem 0.75rem; + background: var(--surface-hover); + color: var(--text-primary); + border-radius: 16px; + font-size: 0.8rem; + font-weight: 500; } /* Markdown formatting styles */ @@ -277,7 +335,7 @@ header h1 { } .message-content code { - background-color: rgba(0, 0, 0, 0.2); + background-color: var(--code-bg); padding: 0.125rem 0.25rem; border-radius: 3px; font-family: 'Fira Code', 'Consolas', monospace; @@ -285,7 +343,7 @@ header h1 { } .message-content pre { - background-color: rgba(0, 0, 0, 0.2); + background-color: var(--code-bg); padding: 0.75rem; border-radius: 4px; overflow-x: auto; @@ -427,8 +485,8 @@ header h1 { /* Error Message */ .error-message { - background: rgba(239, 68, 68, 0.1); - color: #f87171; + background: var(--error-bg); + color: var(--error-text); padding: 0.75rem 1.25rem; border-radius: 8px; border: 1px solid rgba(239, 68, 68, 0.2); @@ -437,8 +495,8 @@ header h1 { /* Success Message */ .success-message { - background: rgba(34, 197, 94, 0.1); - color: #4ade80; + background: var(--success-bg); + color: var(--success-text); padding: 0.75rem 1.25rem; border-radius: 8px; border: 1px solid rgba(34, 197, 94, 0.2); @@ -491,6 +549,31 @@ details[open] .suggested-header::before { transform: rotate(90deg); } +/* New Chat Button */ +.new-chat-section { + margin-bottom: 1.5rem; +} + +.new-chat-btn { + font-size: 0.875rem; + font-weight: 600; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.5px; + cursor: pointer; + padding: 0.5rem 0; + border: none; + background: none; + text-align: left; + transition: color 0.2s ease; +} + +.new-chat-btn:hover, +.new-chat-btn:focus { + color: var(--primary-color); + outline: none; +} + /* Course Stats in Sidebar */ .course-stats { display: flex; @@ -634,6 +717,84 @@ details[open] .suggested-header::before { transform: translateX(2px); } +/* Theme Toggle Button */ +.theme-toggle { + position: fixed; + top: 1rem; + right: 1rem; + z-index: 1000; + background: var(--surface); + border: 1px solid var(--border-color); + border-radius: 50%; + width: 44px; + height: 44px; + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + transition: all 0.3s ease; + box-shadow: var(--shadow); +} + +.theme-toggle:hover { + background: var(--surface-hover); + transform: scale(1.05); +} + +.theme-toggle:focus { + outline: none; + box-shadow: 0 0 0 3px var(--focus-ring); +} + +.theme-toggle:active { + transform: scale(0.95); +} + +.theme-toggle svg { + width: 22px; + height: 22px; + color: var(--text-primary); + transition: all 0.3s ease; +} + +/* Sun icon (shown in dark mode) */ +.theme-toggle .icon-sun { + display: block; +} + +.theme-toggle .icon-moon { + display: none; +} + +/* Moon icon (shown in light mode) */ +[data-theme="light"] .theme-toggle .icon-sun { + display: none; +} + +[data-theme="light"] .theme-toggle .icon-moon { + display: block; +} + +/* Icon rotation animation on toggle */ +.theme-toggle.animating svg { + animation: iconRotate 0.3s ease-out; +} + +@keyframes iconRotate { + 0% { + transform: rotate(0deg) scale(1); + opacity: 1; + } + 50% { + transform: rotate(180deg) scale(0.8); + opacity: 0.5; + } + 100% { + transform: rotate(360deg) scale(1); + opacity: 1; + } +} + /* Responsive Design */ @media (max-width: 768px) { .main-content { diff --git a/pyproject.toml b/pyproject.toml index 3f05e2de..f022ee5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,3 +13,64 @@ dependencies = [ "python-multipart==0.0.20", "python-dotenv==1.1.1", ] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", + "pytest-asyncio>=0.23.0", + "httpx>=0.27.0", + "black>=24.0.0", + "ruff>=0.4.0", +] + +[tool.pytest.ini_options] +testpaths = ["backend/tests"] +python_files = ["test_*.py"] +python_functions = ["test_*"] +addopts = "-v --tb=short" +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +filterwarnings = [ + "ignore::DeprecationWarning", + "ignore::pytest.PytestUnraisableExceptionWarning", +] + +[tool.black] +line-length = 88 +target-version = ["py313"] +include = '\.pyi?$' +extend-exclude = ''' +/( + \.git + | \.venv + | __pycache__ + | chroma_db +)/ +''' + +[tool.ruff] +line-length = 88 +target-version = "py313" +exclude = [ + ".git", + ".venv", + "__pycache__", + "chroma_db", +] + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # Pyflakes + "I", # isort + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "UP", # pyupgrade +] +ignore = [ + "E501", # line too long (handled by black) +] + +[tool.ruff.lint.isort] +known-first-party = ["backend"] diff --git a/scripts/format.sh b/scripts/format.sh new file mode 100755 index 00000000..9d4b045d --- /dev/null +++ b/scripts/format.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Format code with black and fix imports with ruff + +set -e + +cd "$(dirname "$0")/.." + +echo "Running black formatter..." +uv run black backend/ main.py + +echo "Running ruff import sorting..." +uv run ruff check --fix --select I backend/ main.py + +echo "Formatting complete!" diff --git a/scripts/lint.sh b/scripts/lint.sh new file mode 100755 index 00000000..58658ace --- /dev/null +++ b/scripts/lint.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Run linting checks with ruff + +set -e + +cd "$(dirname "$0")/.." + +echo "Running ruff linter..." +uv run ruff check backend/ main.py + +echo "Linting complete!" diff --git a/scripts/quality.sh b/scripts/quality.sh new file mode 100755 index 00000000..09bc967f --- /dev/null +++ b/scripts/quality.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Run all code quality checks (format check + lint) + +set -e + +cd "$(dirname "$0")/.." + +echo "=== Code Quality Checks ===" +echo + +echo "1. Checking formatting with black..." +uv run black --check backend/ main.py +echo " Formatting OK!" +echo + +echo "2. Running ruff linter..." +uv run ruff check backend/ main.py +echo " Linting OK!" +echo + +echo "=== All quality checks passed! ===" diff --git a/uv.lock b/uv.lock index 9ae65c55..7f92c395 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.13" [[package]] @@ -110,6 +110,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a9/cf/45fb5261ece3e6b9817d3d82b2f343a505fd58674a92577923bc500bd1aa/bcrypt-4.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:e53e074b120f2877a35cc6c736b8eb161377caae8925c17688bd46ba56daaa5b", size = 152799, upload-time = "2025-02-28T01:23:53.139Z" }, ] +[[package]] +name = "black" +version = "26.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "mypy-extensions" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "platformdirs" }, + { name = "pytokens" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/13/88/560b11e521c522440af991d46848a2bde64b5f7202ec14e1f46f9509d328/black-26.1.0.tar.gz", hash = "sha256:d294ac3340eef9c9eb5d29288e96dc719ff269a88e27b396340459dd85da4c58", size = 658785, upload-time = "2026-01-18T04:50:11.993Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/04/fa2f4784f7237279332aa735cdfd5ae2e7730db0072fb2041dadda9ae551/black-26.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ba1d768fbfb6930fc93b0ecc32a43d8861ded16f47a40f14afa9bb04ab93d304", size = 1877781, upload-time = "2026-01-18T04:59:39.054Z" }, + { url = "https://files.pythonhosted.org/packages/cf/ad/5a131b01acc0e5336740a039628c0ab69d60cf09a2c87a4ec49f5826acda/black-26.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2b807c240b64609cb0e80d2200a35b23c7df82259f80bef1b2c96eb422b4aac9", size = 1699670, upload-time = "2026-01-18T04:59:41.005Z" }, + { url = "https://files.pythonhosted.org/packages/da/7c/b05f22964316a52ab6b4265bcd52c0ad2c30d7ca6bd3d0637e438fc32d6e/black-26.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1de0f7d01cc894066a1153b738145b194414cc6eeaad8ef4397ac9abacf40f6b", size = 1775212, upload-time = "2026-01-18T04:59:42.545Z" }, + { url = "https://files.pythonhosted.org/packages/a6/a3/e8d1526bea0446e040193185353920a9506eab60a7d8beb062029129c7d2/black-26.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:91a68ae46bf07868963671e4d05611b179c2313301bd756a89ad4e3b3db2325b", size = 1409953, upload-time = "2026-01-18T04:59:44.357Z" }, + { url = "https://files.pythonhosted.org/packages/c7/5a/d62ebf4d8f5e3a1daa54adaab94c107b57be1b1a2f115a0249b41931e188/black-26.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:be5e2fe860b9bd9edbf676d5b60a9282994c03fbbd40fe8f5e75d194f96064ca", size = 1217707, upload-time = "2026-01-18T04:59:45.719Z" }, + { url = "https://files.pythonhosted.org/packages/6a/83/be35a175aacfce4b05584ac415fd317dd6c24e93a0af2dcedce0f686f5d8/black-26.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9dc8c71656a79ca49b8d3e2ce8103210c9481c57798b48deeb3a8bb02db5f115", size = 1871864, upload-time = "2026-01-18T04:59:47.586Z" }, + { url = "https://files.pythonhosted.org/packages/a5/f5/d33696c099450b1274d925a42b7a030cd3ea1f56d72e5ca8bbed5f52759c/black-26.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b22b3810451abe359a964cc88121d57f7bce482b53a066de0f1584988ca36e79", size = 1701009, upload-time = "2026-01-18T04:59:49.443Z" }, + { url = "https://files.pythonhosted.org/packages/1b/87/670dd888c537acb53a863bc15abbd85b22b429237d9de1b77c0ed6b79c42/black-26.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:53c62883b3f999f14e5d30b5a79bd437236658ad45b2f853906c7cbe79de00af", size = 1767806, upload-time = "2026-01-18T04:59:50.769Z" }, + { url = "https://files.pythonhosted.org/packages/fe/9c/cd3deb79bfec5bcf30f9d2100ffeec63eecce826eb63e3961708b9431ff1/black-26.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:f016baaadc423dc960cdddf9acae679e71ee02c4c341f78f3179d7e4819c095f", size = 1433217, upload-time = "2026-01-18T04:59:52.218Z" }, + { url = "https://files.pythonhosted.org/packages/4e/29/f3be41a1cf502a283506f40f5d27203249d181f7a1a2abce1c6ce188035a/black-26.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:66912475200b67ef5a0ab665011964bf924745103f51977a78b4fb92a9fc1bf0", size = 1245773, upload-time = "2026-01-18T04:59:54.457Z" }, + { url = "https://files.pythonhosted.org/packages/e4/3d/51bdb3ecbfadfaf825ec0c75e1de6077422b4afa2091c6c9ba34fbfc0c2d/black-26.1.0-py3-none-any.whl", hash = "sha256:1054e8e47ebd686e078c0bb0eaf31e6ce69c966058d122f2c0c950311f9f3ede", size = 204010, upload-time = "2026-01-18T04:50:09.978Z" }, +] + [[package]] name = "build" version = "1.2.2.post1" @@ -470,6 +497,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461, upload-time = "2025-01-03T18:51:54.306Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -658,6 +694,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, ] +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "networkx" version = "3.5" @@ -983,6 +1028,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "pathspec" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841, upload-time = "2026-01-09T15:46:46.009Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" }, +] + [[package]] name = "pillow" version = "11.3.0" @@ -1038,6 +1092,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" }, ] +[[package]] +name = "platformdirs" +version = "4.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload-time = "2025-12-05T13:52:58.638Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + [[package]] name = "posthog" version = "5.4.0" @@ -1207,6 +1279,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" }, ] +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1237,6 +1337,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" }, ] +[[package]] +name = "pytokens" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e5/16/4b9cfd90d55e66ffdb277d7ebe3bc25250c2311336ec3fc73b2673c794d5/pytokens-0.4.0.tar.gz", hash = "sha256:6b0b03e6ea7c9f9d47c5c61164b69ad30f4f0d70a5d9fe7eac4d19f24f77af2d", size = 15039, upload-time = "2026-01-19T07:59:50.623Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/63/627b7e71d557383da5a97f473ad50f8d9c2c1f55c7d3c2531a120c796f6e/pytokens-0.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73eff3bdd8ad08da679867992782568db0529b887bed4c85694f84cdf35eafc6", size = 159744, upload-time = "2026-01-19T07:59:16.88Z" }, + { url = "https://files.pythonhosted.org/packages/28/d7/16f434c37ec3824eba6bcb6e798e5381a8dc83af7a1eda0f95c16fe3ade5/pytokens-0.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d97cc1f91b1a8e8ebccf31c367f28225699bea26592df27141deade771ed0afb", size = 253207, upload-time = "2026-01-19T07:59:18.069Z" }, + { url = "https://files.pythonhosted.org/packages/ab/96/04102856b9527701ae57d74a6393d1aca5bad18a1b1ca48ccffb3c93b392/pytokens-0.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a2c8952c537cb73a1a74369501a83b7f9d208c3cf92c41dd88a17814e68d48ce", size = 267452, upload-time = "2026-01-19T07:59:19.328Z" }, + { url = "https://files.pythonhosted.org/packages/0e/ef/0936eb472b89ab2d2c2c24bb81c50417e803fa89c731930d9fb01176fe9f/pytokens-0.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5dbf56f3c748aed9310b310d5b8b14e2c96d3ad682ad5a943f381bdbbdddf753", size = 265965, upload-time = "2026-01-19T07:59:20.613Z" }, + { url = "https://files.pythonhosted.org/packages/ae/f5/64f3d6f7df4a9e92ebda35ee85061f6260e16eac82df9396020eebbca775/pytokens-0.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:e131804513597f2dff2b18f9911d9b6276e21ef3699abeffc1c087c65a3d975e", size = 102813, upload-time = "2026-01-19T07:59:22.012Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f1/d07e6209f18ef378fc2ae9dee8d1dfe91fd2447c2e2dbfa32867b6dd30cf/pytokens-0.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0d7374c917197106d3c4761374718bc55ea2e9ac0fb94171588ef5840ee1f016", size = 159968, upload-time = "2026-01-19T07:59:23.07Z" }, + { url = "https://files.pythonhosted.org/packages/0a/73/0eb111400abd382a04f253b269819db9fcc748aa40748441cebdcb6d068f/pytokens-0.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cd3fa1caf9e47a72ee134a29ca6b5bea84712724bba165d6628baa190c6ea5b", size = 253373, upload-time = "2026-01-19T07:59:24.381Z" }, + { url = "https://files.pythonhosted.org/packages/bd/8d/9e4e2fdb5bcaba679e54afcc304e9f13f488eb4d626e6b613f9553e03dbd/pytokens-0.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c6986576b7b07fe9791854caa5347923005a80b079d45b63b0be70d50cce5f1", size = 267024, upload-time = "2026-01-19T07:59:25.74Z" }, + { url = "https://files.pythonhosted.org/packages/cb/b7/e0a370321af2deb772cff14ff337e1140d1eac2c29a8876bfee995f486f0/pytokens-0.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9940f7c2e2f54fb1cb5fe17d0803c54da7a2bf62222704eb4217433664a186a7", size = 270912, upload-time = "2026-01-19T07:59:27.072Z" }, + { url = "https://files.pythonhosted.org/packages/7c/54/4348f916c440d4c3e68b53b4ed0e66b292d119e799fa07afa159566dcc86/pytokens-0.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:54691cf8f299e7efabcc25adb4ce715d3cef1491e1c930eaf555182f898ef66a", size = 103836, upload-time = "2026-01-19T07:59:28.112Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f8/a693c0cfa9c783a2a8c4500b7b2a8bab420f8ca4f2d496153226bf1c12e3/pytokens-0.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:94ff5db97a0d3cd7248a5b07ba2167bd3edc1db92f76c6db00137bbaf068ddf8", size = 167643, upload-time = "2026-01-19T07:59:29.292Z" }, + { url = "https://files.pythonhosted.org/packages/c0/dd/a64eb1e9f3ec277b69b33ef1b40ffbcc8f0a3bafcde120997efc7bdefebf/pytokens-0.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0dd6261cd9cc95fae1227b1b6ebee023a5fd4a4b6330b071c73a516f5f59b63", size = 289553, upload-time = "2026-01-19T07:59:30.537Z" }, + { url = "https://files.pythonhosted.org/packages/df/22/06c1079d93dbc3bca5d013e1795f3d8b9ed6c87290acd6913c1c526a6bb2/pytokens-0.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cdca8159df407dbd669145af4171a0d967006e0be25f3b520896bc7068f02c4", size = 302490, upload-time = "2026-01-19T07:59:32.352Z" }, + { url = "https://files.pythonhosted.org/packages/8d/de/a6f5e43115b4fbf4b93aa87d6c83c79932cdb084f9711daae04549e1e4ad/pytokens-0.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4b5770abeb2a24347380a1164a558f0ebe06e98aedbd54c45f7929527a5fb26e", size = 305652, upload-time = "2026-01-19T07:59:33.685Z" }, + { url = "https://files.pythonhosted.org/packages/ab/3d/c136e057cb622e36e0c3ff7a8aaa19ff9720050c4078235691da885fe6ee/pytokens-0.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:74500d72c561dad14c037a9e86a657afd63e277dd5a3bb7570932ab7a3b12551", size = 115472, upload-time = "2026-01-19T07:59:34.734Z" }, + { url = "https://files.pythonhosted.org/packages/7c/3c/6941a82f4f130af6e1c68c076b6789069ef10c04559bd4733650f902fd3b/pytokens-0.4.0-py3-none-any.whl", hash = "sha256:0508d11b4de157ee12063901603be87fb0253e8f4cb9305eb168b1202ab92068", size = 13224, upload-time = "2026-01-19T07:59:49.822Z" }, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -1405,6 +1529,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, ] +[[package]] +name = "ruff" +version = "0.14.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" }, + { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" }, + { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" }, + { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" }, + { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" }, + { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" }, + { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" }, + { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" }, + { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" }, + { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" }, + { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" }, + { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" }, + { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" }, + { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" }, + { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" }, + { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" }, + { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" }, + { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, +] + [[package]] name = "safetensors" version = "0.5.3" @@ -1561,16 +1711,31 @@ dependencies = [ { name = "uvicorn" }, ] +[package.optional-dependencies] +dev = [ + { name = "black" }, + { name = "httpx" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "ruff" }, +] + [package.metadata] requires-dist = [ { name = "anthropic", specifier = "==0.58.2" }, + { name = "black", marker = "extra == 'dev'", specifier = ">=24.0.0" }, { name = "chromadb", specifier = "==1.0.15" }, { name = "fastapi", specifier = "==0.116.1" }, + { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.27.0" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" }, { name = "python-dotenv", specifier = "==1.1.1" }, { name = "python-multipart", specifier = "==0.0.20" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" }, { name = "sentence-transformers", specifier = "==5.0.0" }, { name = "uvicorn", specifier = "==0.35.0" }, ] +provides-extras = ["dev"] [[package]] name = "sympy"