""" Utility for loading prompts and instructions from external JSON files. """ import os import json from pathlib import Path from typing import Dict, Optional, List, Any import logging logger = logging.getLogger(__name__) class PromptLoader: """Loads prompts and instructions from external JSON files.""" def __init__(self, base_dir: Optional[Path] = None): """Initialize with base directory.""" if base_dir is None: # Default to the project root directory self.base_dir = Path(__file__).parent.parent else: self.base_dir = Path(base_dir) self.prompts_dir = self.base_dir / "prompts" self.instructions_dir = self.base_dir / "instructions" # Cache for loaded files self._cache: Dict[str, Dict[str, Any]] = {} def load_prompt(self, prompt_name: str, **kwargs) -> str: """ Load a prompt from the prompts directory. Supports both .txt (plain text) and .json formats. Args: prompt_name: Name of the prompt file (without extension) **kwargs: Variables to substitute in the prompt Returns: The loaded prompt with variables substituted """ # Try .txt format first (preferred), then fall back to .json txt_path = self.prompts_dir / f"{prompt_name}.txt" json_path = self.prompts_dir / f"{prompt_name}.json" if txt_path.exists(): # Load plain text file logger.debug(f"Loading prompt from .txt file: {txt_path}") with open(txt_path, 'r', encoding='utf-8') as f: prompt_text = f.read().strip() elif json_path.exists(): # Load JSON file (legacy format) logger.debug(f"Loading prompt from .json file: {json_path}") data = self._load_json_file(json_path) prompt_data = data.get("prompt", "") # Handle both string and list formats if isinstance(prompt_data, list): # Join list elements with newlines to create a single string prompt_text = "\n".join(prompt_data) else: prompt_text = prompt_data else: raise FileNotFoundError(f"Prompt file not found: {prompt_name} (checked .txt and .json)") # Substitute variables if provided if kwargs: try: logger.debug(f"Formatting prompt {prompt_name} with variables: {list(kwargs.keys())}") prompt_text = prompt_text.format(**kwargs) logger.debug(f"Successfully formatted prompt {prompt_name}") except KeyError as e: logger.warning(f"Missing variable {e} in prompt {prompt_name}") except Exception as e: logger.error(f"Error formatting prompt {prompt_name}: {e}") logger.error(f"Available variables: {list(kwargs.keys())}") return prompt_text def load_instruction(self, instruction_name: str) -> str: """ Load instructions from the instructions directory as a single string. Args: instruction_name: Name of the instruction file (without .json extension) Returns: The loaded instructions as a joined string """ instructions_list = self.load_instructions_as_list(instruction_name) return "\n".join(instructions_list) def load_instructions_as_list(self, instruction_name: str) -> List[str]: """ Load instructions and return as a list of strings. Args: instruction_name: Name of the instruction file (without .json extension) Returns: List of instruction strings """ instruction_path = self.instructions_dir / f"{instruction_name}.json" data = self._load_json_file(instruction_path) instructions = data.get("instructions", []) # Filter out empty strings return [instruction for instruction in instructions if instruction.strip()] def _load_json_file(self, file_path: Path) -> Dict[str, Any]: """Load JSON file content with caching.""" cache_key = str(file_path) # Check cache first if cache_key in self._cache: return self._cache[cache_key] try: if not file_path.exists(): raise FileNotFoundError(f"File not found: {file_path}") with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) # Cache the data self._cache[cache_key] = data logger.debug(f"Loaded {file_path.name}: {type(data)} with {len(data)} keys") return data except json.JSONDecodeError as e: logger.error(f"Invalid JSON in file {file_path}: {e}") raise except Exception as e: logger.error(f"Error loading file {file_path}: {e}") raise def clear_cache(self): """Clear the file cache.""" self._cache.clear() logger.debug("Prompt loader cache cleared") def list_prompts(self) -> List[str]: """List all available prompt files.""" if not self.prompts_dir.exists(): return [] prompts = [] for file_path in self.prompts_dir.rglob("*.json"): # Get relative path from prompts dir rel_path = file_path.relative_to(self.prompts_dir) # Remove .json extension and convert to forward slashes prompt_name = str(rel_path.with_suffix('')) prompts.append(prompt_name) return sorted(prompts) def list_instructions(self) -> List[str]: """List all available instruction files.""" if not self.instructions_dir.exists(): return [] instructions = [] for file_path in self.instructions_dir.rglob("*.json"): # Get relative path from instructions dir rel_path = file_path.relative_to(self.instructions_dir) # Remove .json extension and convert to forward slashes instruction_name = str(rel_path.with_suffix('')) instructions.append(instruction_name) return sorted(instructions) def get_info(self) -> dict: """Get information about the prompt loader.""" return { "base_dir": str(self.base_dir), "prompts_dir": str(self.prompts_dir), "instructions_dir": str(self.instructions_dir), "prompts_dir_exists": self.prompts_dir.exists(), "instructions_dir_exists": self.instructions_dir.exists(), "available_prompts": self.list_prompts(), "available_instructions": self.list_instructions(), "cache_size": len(self._cache) } # Global instance prompt_loader = PromptLoader()