Spaces:
Building
Building
File size: 7,220 Bytes
3f61806 24ae72d 3f61806 24ae72d d4c609e 24ae72d d4c609e 24ae72d d4c609e 24ae72d d4c609e 3f61806 24ae72d 3f61806 24ae72d 3f61806 24ae72d 3f61806 872c2a9 b9ccd0b 3f61806 b9ccd0b 3f61806 b9ccd0b 24ae72d 3f61806 24ae72d b9ccd0b 24ae72d 7361b6f 24ae72d b9ccd0b 24ae72d b9ccd0b 24ae72d b9ccd0b 1d4e6f0 b9ccd0b 24ae72d b9ccd0b 24ae72d b9ccd0b 24ae72d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import yaml
from dotenv import load_dotenv, find_dotenv
dotenv_path = find_dotenv()
load_dotenv(dotenv_path) # This loads the variables from .env
# Define the path to the prompts file relative to this config file
PROMPTS_FILE_PATH = os.path.join(os.path.dirname(__file__), 'prompts.yaml')
def load_prompts(file_path):
"""Loads prompts from a YAML file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
prompts = yaml.safe_load(f)
if not isinstance(prompts, dict):
print(f"Warning: Prompts file '{file_path}' did not load as a dictionary.")
return {} # Return empty dict if not loaded correctly
return prompts
except FileNotFoundError:
print(f"Error: Prompts file not found at '{file_path}'")
return {} # Return empty dict if file not found
except yaml.YAMLError as e:
print(f"Error parsing YAML file '{file_path}': {e}")
return {} # Return empty dict on parsing error
except Exception as e:
print(f"An unexpected error occurred while loading prompts from '{file_path}': {e}")
return {} # Return empty dict on other errors
class Config:
# Load prompts from YAML file
PROMPTS = load_prompts(PROMPTS_FILE_PATH)
# Telegram Bot Token
TELEGRAM_BOT_TOKEN = os.environ.get('TELEGRAM_BOT_TOKEN')
# For tracking conversation history
USER_SESSIONS = {}
# Optional: API keys for external services
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', None)
VECTOR_STORE_PATH = os.environ.get('VECTOR_STORE_PATH', None)
DATA_PATH = os.environ.get('DATA_PATH', None)
INDEXED_DATA_PATH = os.environ.get('INDEXED_DATA_PATH', None)
WEBHOOK_URL = os.environ.get('WEBHOOK_URL', None)
PORT = int(os.environ.get('PORT', 5000))
LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gemini-2.5-flash-preview-04-17')
# LANGUAGE = os.environ.get('LANGUAGE', 'en') # Removed global language setting
EMBEDDING_MODEL_NAME = os.environ.get('EMBEDDING_MODEL_NAME', 'all-MiniLM-L6-v2')
RERANKER_MODEL_NAME = os.environ.get('RERANKER_MODEL_NAME', 'cross-encoder/ms-marco-MiniLM-L-6-v2')
SEMANTIC_CHUNKING = False
TEMPERATURE = 0
CONVERSATION_HISTORY_LIMIT = 10
JUDGE_LLM_MODEL_NAME = "gemini-2.0-flash"
K = 10 # Initial retrieval K for non-reranking flow (can be kept or removed if INITIAL_RETRIEVAL_K is always used)
SEARCH_TYPE = "similarity"
SCORE_THRESHOLD = 0.5
# Reranking specific config
INITIAL_RETRIEVAL_K = 40 # Number of docs to fetch initially for reranking
RERANK_TOP_N = 10 # Number of docs to keep after reranking
CHUNK_SIZE = 1000 # Size of chunks for semantic chunking
CHUNK_OVERLAP = 100 # Overlap size for semantic chunking
# --- Accessor methods for prompts (optional but good practice) ---
@classmethod
def get_bk_persona_language_instruction(cls, language_code: str):
"""Gets the language-specific instruction for the BK persona prompt."""
# Use provided language_code
lang = language_code.lower() # Ensure lowercase for matching keys
return cls.PROMPTS.get('language_instructions', {}).get('bk_persona', {}).get(lang, '') # Default to empty string
@classmethod
def get_final_answer_language_instruction(cls, language_code: str):
"""Gets the language-specific instruction for the final answer system prompt."""
lang = language_code.lower()
return cls.PROMPTS.get('language_instructions', {}).get('final_answer_system', {}).get(lang, '') # Default to empty string
@classmethod
def get_system_prompt(cls, language_code: str):
"""Gets the combined system prompt including base persona, guidance, and language instruction."""
base_persona = cls.PROMPTS.get('system_prompt', {}).get('bk_persona', '')
guidance = cls.PROMPTS.get('system_prompt', {}).get('question_guidance', '')
lang_instruction = cls.get_bk_persona_language_instruction(language_code) # Fetch dynamic instruction based on arg
# Combine, adding a newline before the instruction if it exists
return f"{base_persona}\n{guidance}\n{lang_instruction}".strip()
@classmethod
def get_bk_persona_prompt(cls):
"""Gets the base BK persona system prompt (without language instruction - remains language-agnostic)."""
# This now returns only the base English text
return cls.PROMPTS.get('system_prompt', {}).get('bk_persona', '')
@classmethod
def get_question_guidance_prompt(cls):
"""Gets the question guidance prompt."""
return cls.PROMPTS.get('system_prompt', {}).get('question_guidance', '')
@classmethod
def get_evaluate_context_prompt(cls):
"""Gets the evaluate context prompt."""
return cls.PROMPTS.get('evaluate_context_prompt', '')
@classmethod
def get_reframe_question_prompt(cls):
"""Gets the reframe question prompt."""
return cls.PROMPTS.get('reframe_question_prompt', '')
@classmethod
def get_final_answer_system_prompt_template(cls, language_code: str):
"""Gets the final answer system prompt template including language instruction."""
base_template = cls.PROMPTS.get('final_answer_prompt_system', '')
lang_instruction = cls.get_final_answer_language_instruction(language_code) # Fetch dynamic instruction based on arg
# Append the instruction. Add logic if needed to insert it cleanly.
# For now, just appending. Consider placement relative to JSON format instruction.
# Let's append it before the JSON format instruction for clarity.
# Find the position of 'IMPORTANT: Provide your final answer...'
insertion_point_str = "CRITICAL INSTRUCTION:"
insertion_point = base_template.find(insertion_point_str)
if lang_instruction and insertion_point != -1:
# Insert instruction before the JSON format part
return f"{base_template[:insertion_point]}{lang_instruction}\n{base_template[insertion_point:]}".strip()
else:
# If instruction is empty or insertion point not found, return base
return base_template
@classmethod
def get_final_answer_human_prompt_template(cls):
"""Gets the final answer human prompt template."""
return cls.PROMPTS.get('final_answer_prompt_human', '')
@classmethod
def get_judge_prompt_template(cls):
"""Gets the judge prompt template."""
return cls.PROMPTS.get('judge_prompt_template', '')
@classmethod
def get_user_message(cls, message_key: str, default: str = ""):
"""Gets a specific user-facing message string from prompts.yaml."""
return cls.PROMPTS.get('user_messages', {}).get(message_key, default)
# Example usage (optional, for testing)
if __name__ == "__main__":
print("\nSystem Prompt:")
print(Config.get_system_prompt('en')) # Example for English
print("\nReframe Question Prompt:")
print(Config.get_reframe_question_prompt())
print("\nJudge Prompt Template:")
print(Config.get_judge_prompt_template())
print(f"\nTelegram Token: {Config.TELEGRAM_BOT_TOKEN}")
|