Spaces:
Building
Building
import os | |
import yaml | |
from dotenv import load_dotenv, find_dotenv | |
dotenv_path = find_dotenv() | |
load_dotenv(dotenv_path) # This loads the variables from .env | |
# Define the path to the prompts file relative to this config file | |
PROMPTS_FILE_PATH = os.path.join(os.path.dirname(__file__), 'prompts.yaml') | |
def load_prompts(file_path): | |
"""Loads prompts from a YAML file.""" | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
prompts = yaml.safe_load(f) | |
if not isinstance(prompts, dict): | |
print(f"Warning: Prompts file '{file_path}' did not load as a dictionary.") | |
return {} # Return empty dict if not loaded correctly | |
return prompts | |
except FileNotFoundError: | |
print(f"Error: Prompts file not found at '{file_path}'") | |
return {} # Return empty dict if file not found | |
except yaml.YAMLError as e: | |
print(f"Error parsing YAML file '{file_path}': {e}") | |
return {} # Return empty dict on parsing error | |
except Exception as e: | |
print(f"An unexpected error occurred while loading prompts from '{file_path}': {e}") | |
return {} # Return empty dict on other errors | |
class Config: | |
# Load prompts from YAML file | |
PROMPTS = load_prompts(PROMPTS_FILE_PATH) | |
# Telegram Bot Token | |
TELEGRAM_BOT_TOKEN = os.environ.get('TELEGRAM_BOT_TOKEN') | |
# For tracking conversation history | |
USER_SESSIONS = {} | |
# Optional: API keys for external services | |
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', None) | |
VECTOR_STORE_PATH = os.environ.get('VECTOR_STORE_PATH', None) | |
DATA_PATH = os.environ.get('DATA_PATH', None) | |
INDEXED_DATA_PATH = os.environ.get('INDEXED_DATA_PATH', None) | |
WEBHOOK_URL = os.environ.get('WEBHOOK_URL', None) | |
PORT = int(os.environ.get('PORT', 5000)) | |
LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gemini-2.5-flash-preview-04-17') | |
# LANGUAGE = os.environ.get('LANGUAGE', 'en') # Removed global language setting | |
EMBEDDING_MODEL_NAME = os.environ.get('EMBEDDING_MODEL_NAME', 'all-MiniLM-L6-v2') | |
RERANKER_MODEL_NAME = os.environ.get('RERANKER_MODEL_NAME', 'cross-encoder/ms-marco-MiniLM-L-6-v2') | |
SEMANTIC_CHUNKING = False | |
TEMPERATURE = 0 | |
CONVERSATION_HISTORY_LIMIT = 10 | |
JUDGE_LLM_MODEL_NAME = "gemini-2.0-flash" | |
K = 10 # Initial retrieval K for non-reranking flow (can be kept or removed if INITIAL_RETRIEVAL_K is always used) | |
SEARCH_TYPE = "similarity" | |
SCORE_THRESHOLD = 0.5 | |
# Reranking specific config | |
INITIAL_RETRIEVAL_K = 40 # Number of docs to fetch initially for reranking | |
RERANK_TOP_N = 10 # Number of docs to keep after reranking | |
CHUNK_SIZE = 1000 # Size of chunks for semantic chunking | |
CHUNK_OVERLAP = 100 # Overlap size for semantic chunking | |
# --- Accessor methods for prompts (optional but good practice) --- | |
def get_bk_persona_language_instruction(cls, language_code: str): | |
"""Gets the language-specific instruction for the BK persona prompt.""" | |
# Use provided language_code | |
lang = language_code.lower() # Ensure lowercase for matching keys | |
return cls.PROMPTS.get('language_instructions', {}).get('bk_persona', {}).get(lang, '') # Default to empty string | |
def get_final_answer_language_instruction(cls, language_code: str): | |
"""Gets the language-specific instruction for the final answer system prompt.""" | |
lang = language_code.lower() | |
return cls.PROMPTS.get('language_instructions', {}).get('final_answer_system', {}).get(lang, '') # Default to empty string | |
def get_system_prompt(cls, language_code: str): | |
"""Gets the combined system prompt including base persona, guidance, and language instruction.""" | |
base_persona = cls.PROMPTS.get('system_prompt', {}).get('bk_persona', '') | |
guidance = cls.PROMPTS.get('system_prompt', {}).get('question_guidance', '') | |
lang_instruction = cls.get_bk_persona_language_instruction(language_code) # Fetch dynamic instruction based on arg | |
# Combine, adding a newline before the instruction if it exists | |
return f"{base_persona}\n{guidance}\n{lang_instruction}".strip() | |
def get_bk_persona_prompt(cls): | |
"""Gets the base BK persona system prompt (without language instruction - remains language-agnostic).""" | |
# This now returns only the base English text | |
return cls.PROMPTS.get('system_prompt', {}).get('bk_persona', '') | |
def get_question_guidance_prompt(cls): | |
"""Gets the question guidance prompt.""" | |
return cls.PROMPTS.get('system_prompt', {}).get('question_guidance', '') | |
def get_evaluate_context_prompt(cls): | |
"""Gets the evaluate context prompt.""" | |
return cls.PROMPTS.get('evaluate_context_prompt', '') | |
def get_reframe_question_prompt(cls): | |
"""Gets the reframe question prompt.""" | |
return cls.PROMPTS.get('reframe_question_prompt', '') | |
def get_final_answer_system_prompt_template(cls, language_code: str): | |
"""Gets the final answer system prompt template including language instruction.""" | |
base_template = cls.PROMPTS.get('final_answer_prompt_system', '') | |
lang_instruction = cls.get_final_answer_language_instruction(language_code) # Fetch dynamic instruction based on arg | |
# Append the instruction. Add logic if needed to insert it cleanly. | |
# For now, just appending. Consider placement relative to JSON format instruction. | |
# Let's append it before the JSON format instruction for clarity. | |
# Find the position of 'IMPORTANT: Provide your final answer...' | |
insertion_point_str = "CRITICAL INSTRUCTION:" | |
insertion_point = base_template.find(insertion_point_str) | |
if lang_instruction and insertion_point != -1: | |
# Insert instruction before the JSON format part | |
return f"{base_template[:insertion_point]}{lang_instruction}\n{base_template[insertion_point:]}".strip() | |
else: | |
# If instruction is empty or insertion point not found, return base | |
return base_template | |
def get_final_answer_human_prompt_template(cls): | |
"""Gets the final answer human prompt template.""" | |
return cls.PROMPTS.get('final_answer_prompt_human', '') | |
def get_judge_prompt_template(cls): | |
"""Gets the judge prompt template.""" | |
return cls.PROMPTS.get('judge_prompt_template', '') | |
def get_user_message(cls, message_key: str, default: str = ""): | |
"""Gets a specific user-facing message string from prompts.yaml.""" | |
return cls.PROMPTS.get('user_messages', {}).get(message_key, default) | |
# Example usage (optional, for testing) | |
if __name__ == "__main__": | |
print("\nSystem Prompt:") | |
print(Config.get_system_prompt('en')) # Example for English | |
print("\nReframe Question Prompt:") | |
print(Config.get_reframe_question_prompt()) | |
print("\nJudge Prompt Template:") | |
print(Config.get_judge_prompt_template()) | |
print(f"\nTelegram Token: {Config.TELEGRAM_BOT_TOKEN}") | |