File size: 7,220 Bytes
3f61806
24ae72d
3f61806
 
 
 
 
24ae72d
 
 
 
d4c609e
 
 
 
24ae72d
 
 
d4c609e
 
24ae72d
 
d4c609e
24ae72d
 
 
 
 
d4c609e
 
3f61806
24ae72d
 
3f61806
24ae72d
3f61806
24ae72d
3f61806
 
 
872c2a9
b9ccd0b
3f61806
 
b9ccd0b
 
 
 
3f61806
 
b9ccd0b
 
24ae72d
3f61806
24ae72d
 
b9ccd0b
24ae72d
7361b6f
 
24ae72d
 
 
b9ccd0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24ae72d
 
 
b9ccd0b
 
24ae72d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9ccd0b
 
 
 
 
 
 
 
1d4e6f0
b9ccd0b
 
 
 
 
 
 
24ae72d
 
 
 
 
 
 
 
 
 
b9ccd0b
 
 
 
 
 
24ae72d
 
 
b9ccd0b
24ae72d
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import yaml 
from dotenv import load_dotenv, find_dotenv

dotenv_path = find_dotenv()
load_dotenv(dotenv_path)  # This loads the variables from .env

# Define the path to the prompts file relative to this config file
PROMPTS_FILE_PATH = os.path.join(os.path.dirname(__file__), 'prompts.yaml') 

def load_prompts(file_path):
    """Loads prompts from a YAML file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            prompts = yaml.safe_load(f)
        if not isinstance(prompts, dict):
            print(f"Warning: Prompts file '{file_path}' did not load as a dictionary.")
            return {} # Return empty dict if not loaded correctly
        return prompts
    except FileNotFoundError:
        print(f"Error: Prompts file not found at '{file_path}'")
        return {} # Return empty dict if file not found
    except yaml.YAMLError as e:
        print(f"Error parsing YAML file '{file_path}': {e}")
        return {} # Return empty dict on parsing error
    except Exception as e:
        print(f"An unexpected error occurred while loading prompts from '{file_path}': {e}")
        return {} # Return empty dict on other errors


class Config:
    # Load prompts from YAML file
    PROMPTS = load_prompts(PROMPTS_FILE_PATH)
    # Telegram Bot Token
    TELEGRAM_BOT_TOKEN = os.environ.get('TELEGRAM_BOT_TOKEN')
    # For tracking conversation history
    USER_SESSIONS = {}
    # Optional: API keys for external services
    GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', None)
    VECTOR_STORE_PATH = os.environ.get('VECTOR_STORE_PATH', None)
    DATA_PATH = os.environ.get('DATA_PATH', None)
    INDEXED_DATA_PATH = os.environ.get('INDEXED_DATA_PATH', None)
    WEBHOOK_URL = os.environ.get('WEBHOOK_URL', None)
    PORT = int(os.environ.get('PORT', 5000))
    LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gemini-2.5-flash-preview-04-17')
    # LANGUAGE = os.environ.get('LANGUAGE', 'en') # Removed global language setting
    EMBEDDING_MODEL_NAME = os.environ.get('EMBEDDING_MODEL_NAME', 'all-MiniLM-L6-v2')
    RERANKER_MODEL_NAME = os.environ.get('RERANKER_MODEL_NAME', 'cross-encoder/ms-marco-MiniLM-L-6-v2')
    SEMANTIC_CHUNKING = False
    TEMPERATURE = 0
    CONVERSATION_HISTORY_LIMIT = 10    
    JUDGE_LLM_MODEL_NAME = "gemini-2.0-flash"    
    K = 10 # Initial retrieval K for non-reranking flow (can be kept or removed if INITIAL_RETRIEVAL_K is always used)
    SEARCH_TYPE = "similarity"
    SCORE_THRESHOLD = 0.5
    # Reranking specific config
    INITIAL_RETRIEVAL_K = 40 # Number of docs to fetch initially for reranking    
    RERANK_TOP_N = 10 # Number of docs to keep after reranking
    CHUNK_SIZE = 1000 # Size of chunks for semantic chunking
    CHUNK_OVERLAP = 100 # Overlap size for semantic chunking

    # --- Accessor methods for prompts (optional but good practice) ---
    @classmethod
    def get_bk_persona_language_instruction(cls, language_code: str):
        """Gets the language-specific instruction for the BK persona prompt."""
        # Use provided language_code
        lang = language_code.lower() # Ensure lowercase for matching keys
        return cls.PROMPTS.get('language_instructions', {}).get('bk_persona', {}).get(lang, '') # Default to empty string

    @classmethod
    def get_final_answer_language_instruction(cls, language_code: str):
        """Gets the language-specific instruction for the final answer system prompt."""
        lang = language_code.lower()
        return cls.PROMPTS.get('language_instructions', {}).get('final_answer_system', {}).get(lang, '') # Default to empty string

    @classmethod
    def get_system_prompt(cls, language_code: str):
        """Gets the combined system prompt including base persona, guidance, and language instruction."""
        base_persona = cls.PROMPTS.get('system_prompt', {}).get('bk_persona', '')
        guidance = cls.PROMPTS.get('system_prompt', {}).get('question_guidance', '')
        lang_instruction = cls.get_bk_persona_language_instruction(language_code) # Fetch dynamic instruction based on arg
        # Combine, adding a newline before the instruction if it exists
        return f"{base_persona}\n{guidance}\n{lang_instruction}".strip()

    @classmethod
    def get_bk_persona_prompt(cls):
        """Gets the base BK persona system prompt (without language instruction - remains language-agnostic)."""
        # This now returns only the base English text
        return cls.PROMPTS.get('system_prompt', {}).get('bk_persona', '')

    @classmethod
    def get_question_guidance_prompt(cls):
        """Gets the question guidance prompt."""
        return cls.PROMPTS.get('system_prompt', {}).get('question_guidance', '')

    @classmethod
    def get_evaluate_context_prompt(cls):
        """Gets the evaluate context prompt."""
        return cls.PROMPTS.get('evaluate_context_prompt', '')

    @classmethod
    def get_reframe_question_prompt(cls):
        """Gets the reframe question prompt."""
        return cls.PROMPTS.get('reframe_question_prompt', '')

    @classmethod
    def get_final_answer_system_prompt_template(cls, language_code: str):
        """Gets the final answer system prompt template including language instruction."""
        base_template = cls.PROMPTS.get('final_answer_prompt_system', '')
        lang_instruction = cls.get_final_answer_language_instruction(language_code) # Fetch dynamic instruction based on arg
        # Append the instruction. Add logic if needed to insert it cleanly.
        # For now, just appending. Consider placement relative to JSON format instruction.
        # Let's append it before the JSON format instruction for clarity.
        # Find the position of 'IMPORTANT: Provide your final answer...'
        insertion_point_str = "CRITICAL INSTRUCTION:"
        insertion_point = base_template.find(insertion_point_str)
        if lang_instruction and insertion_point != -1:
             # Insert instruction before the JSON format part
             return f"{base_template[:insertion_point]}{lang_instruction}\n{base_template[insertion_point:]}".strip()
        else:
             # If instruction is empty or insertion point not found, return base
             return base_template

    @classmethod
    def get_final_answer_human_prompt_template(cls):
        """Gets the final answer human prompt template."""
        return cls.PROMPTS.get('final_answer_prompt_human', '')

    @classmethod
    def get_judge_prompt_template(cls):
        """Gets the judge prompt template."""
        return cls.PROMPTS.get('judge_prompt_template', '')

    @classmethod
    def get_user_message(cls, message_key: str, default: str = ""):
        """Gets a specific user-facing message string from prompts.yaml."""
        return cls.PROMPTS.get('user_messages', {}).get(message_key, default)

# Example usage (optional, for testing)
if __name__ == "__main__":    
    print("\nSystem Prompt:")
    print(Config.get_system_prompt('en')) # Example for English
    print("\nReframe Question Prompt:")
    print(Config.get_reframe_question_prompt())
    print("\nJudge Prompt Template:")
    print(Config.get_judge_prompt_template())
    print(f"\nTelegram Token: {Config.TELEGRAM_BOT_TOKEN}")