import os import yaml from dotenv import load_dotenv, find_dotenv dotenv_path = find_dotenv() load_dotenv(dotenv_path) # This loads the variables from .env # Define the path to the prompts file relative to this config file PROMPTS_FILE_PATH = os.path.join(os.path.dirname(__file__), 'prompts.yaml') def load_prompts(file_path): """Loads prompts from a YAML file.""" try: with open(file_path, 'r', encoding='utf-8') as f: prompts = yaml.safe_load(f) if not isinstance(prompts, dict): print(f"Warning: Prompts file '{file_path}' did not load as a dictionary.") return {} # Return empty dict if not loaded correctly return prompts except FileNotFoundError: print(f"Error: Prompts file not found at '{file_path}'") return {} # Return empty dict if file not found except yaml.YAMLError as e: print(f"Error parsing YAML file '{file_path}': {e}") return {} # Return empty dict on parsing error except Exception as e: print(f"An unexpected error occurred while loading prompts from '{file_path}': {e}") return {} # Return empty dict on other errors class Config: # Load prompts from YAML file PROMPTS = load_prompts(PROMPTS_FILE_PATH) # Telegram Bot Token TELEGRAM_BOT_TOKEN = os.environ.get('TELEGRAM_BOT_TOKEN') # For tracking conversation history USER_SESSIONS = {} # Optional: API keys for external services GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', None) VECTOR_STORE_PATH = os.environ.get('VECTOR_STORE_PATH', None) DATA_PATH = os.environ.get('DATA_PATH', None) INDEXED_DATA_PATH = os.environ.get('INDEXED_DATA_PATH', None) WEBHOOK_URL = os.environ.get('WEBHOOK_URL', None) PORT = int(os.environ.get('PORT', 5000)) LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gemini-2.5-flash-preview-04-17') # LANGUAGE = os.environ.get('LANGUAGE', 'en') # Removed global language setting EMBEDDING_MODEL_NAME = os.environ.get('EMBEDDING_MODEL_NAME', 'all-MiniLM-L6-v2') RERANKER_MODEL_NAME = os.environ.get('RERANKER_MODEL_NAME', 'cross-encoder/ms-marco-MiniLM-L-6-v2') SEMANTIC_CHUNKING = False TEMPERATURE = 0 CONVERSATION_HISTORY_LIMIT = 10 JUDGE_LLM_MODEL_NAME = "gemini-2.0-flash" K = 10 # Initial retrieval K for non-reranking flow (can be kept or removed if INITIAL_RETRIEVAL_K is always used) SEARCH_TYPE = "similarity" SCORE_THRESHOLD = 0.5 # Reranking specific config INITIAL_RETRIEVAL_K = 40 # Number of docs to fetch initially for reranking RERANK_TOP_N = 10 # Number of docs to keep after reranking CHUNK_SIZE = 1000 # Size of chunks for semantic chunking CHUNK_OVERLAP = 100 # Overlap size for semantic chunking # --- Accessor methods for prompts (optional but good practice) --- @classmethod def get_bk_persona_language_instruction(cls, language_code: str): """Gets the language-specific instruction for the BK persona prompt.""" # Use provided language_code lang = language_code.lower() # Ensure lowercase for matching keys return cls.PROMPTS.get('language_instructions', {}).get('bk_persona', {}).get(lang, '') # Default to empty string @classmethod def get_final_answer_language_instruction(cls, language_code: str): """Gets the language-specific instruction for the final answer system prompt.""" lang = language_code.lower() return cls.PROMPTS.get('language_instructions', {}).get('final_answer_system', {}).get(lang, '') # Default to empty string @classmethod def get_system_prompt(cls, language_code: str): """Gets the combined system prompt including base persona, guidance, and language instruction.""" base_persona = cls.PROMPTS.get('system_prompt', {}).get('bk_persona', '') guidance = cls.PROMPTS.get('system_prompt', {}).get('question_guidance', '') lang_instruction = cls.get_bk_persona_language_instruction(language_code) # Fetch dynamic instruction based on arg # Combine, adding a newline before the instruction if it exists return f"{base_persona}\n{guidance}\n{lang_instruction}".strip() @classmethod def get_bk_persona_prompt(cls): """Gets the base BK persona system prompt (without language instruction - remains language-agnostic).""" # This now returns only the base English text return cls.PROMPTS.get('system_prompt', {}).get('bk_persona', '') @classmethod def get_question_guidance_prompt(cls): """Gets the question guidance prompt.""" return cls.PROMPTS.get('system_prompt', {}).get('question_guidance', '') @classmethod def get_evaluate_context_prompt(cls): """Gets the evaluate context prompt.""" return cls.PROMPTS.get('evaluate_context_prompt', '') @classmethod def get_reframe_question_prompt(cls): """Gets the reframe question prompt.""" return cls.PROMPTS.get('reframe_question_prompt', '') @classmethod def get_final_answer_system_prompt_template(cls, language_code: str): """Gets the final answer system prompt template including language instruction.""" base_template = cls.PROMPTS.get('final_answer_prompt_system', '') lang_instruction = cls.get_final_answer_language_instruction(language_code) # Fetch dynamic instruction based on arg # Append the instruction. Add logic if needed to insert it cleanly. # For now, just appending. Consider placement relative to JSON format instruction. # Let's append it before the JSON format instruction for clarity. # Find the position of 'IMPORTANT: Provide your final answer...' insertion_point_str = "CRITICAL INSTRUCTION:" insertion_point = base_template.find(insertion_point_str) if lang_instruction and insertion_point != -1: # Insert instruction before the JSON format part return f"{base_template[:insertion_point]}{lang_instruction}\n{base_template[insertion_point:]}".strip() else: # If instruction is empty or insertion point not found, return base return base_template @classmethod def get_final_answer_human_prompt_template(cls): """Gets the final answer human prompt template.""" return cls.PROMPTS.get('final_answer_prompt_human', '') @classmethod def get_judge_prompt_template(cls): """Gets the judge prompt template.""" return cls.PROMPTS.get('judge_prompt_template', '') @classmethod def get_user_message(cls, message_key: str, default: str = ""): """Gets a specific user-facing message string from prompts.yaml.""" return cls.PROMPTS.get('user_messages', {}).get(message_key, default) # Example usage (optional, for testing) if __name__ == "__main__": print("\nSystem Prompt:") print(Config.get_system_prompt('en')) # Example for English print("\nReframe Question Prompt:") print(Config.get_reframe_question_prompt()) print("\nJudge Prompt Template:") print(Config.get_judge_prompt_template()) print(f"\nTelegram Token: {Config.TELEGRAM_BOT_TOKEN}")