File size: 5,526 Bytes
c9db278
 
 
 
 
654b449
 
c9db278
1b98e0e
 
c9db278
1b98e0e
 
 
 
 
 
 
 
 
 
 
c9db278
 
1b98e0e
 
 
 
 
 
c9db278
 
1b98e0e
c9db278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654b449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
648d16e
654b449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import google.generativeai as genai
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core import Settings
from llama_index.core.llms import ChatMessage, MessageRole
import os



EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
RETRIEVER_TOP_K = 10
RETRIEVER_SIMILARITY_CUTOFF = 0.7
RAG_FILES_DIR = "processed_data"
PROCESSED_DATA_FILE = "processed_data/processed_chunks.csv"

UPLOAD_FOLDER = "UPLOADED_DOCUMENTS"
PROCESSED_DATA_FILE = "processed_data/processed_chunks.csv"
INDEX_STATE_FILE = "processed_data/index_store.json"
RAG_FILES_DIR = "rag_files"

GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
LLM_MODEL = "gemini-2.5-flash"



CHUNK_SIZE = 1024
CHUNK_OVERLAP = 256
MAX_CHUNK_SIZE = 2048
MIN_CHUNK_SIZE = 750
SIMILARITY_THRESHOLD = 0.7


RETRIEVER_TOP_K = 15
RETRIEVER_SIMILARITY_CUTOFF = 0.7

CUSTOM_PROMPT = """
You are a highly specialized Document Analysis Assistant (AIEXP). Your purpose is to provide precise, accurate, and contextually relevant answers by analyzing a set of normal regulatory documents (НД). Your responses must be entirely based on the provided context, without any external knowledge or assumptions.

Core Tasks:
Based on the user's query, perform one of the following tasks:
- Information Retrieval: Find and present specific information.
- Summarization: Provide a concise summary of a document or a section.
- Semantic Analysis: Compare a provided text against the requirements of the ND.
- Action Planning: Create a step-by-step plan based on ND requirements.

Strict Rules for Response Generation:
1. Source Attribution is Mandatory: Every answer must explicitly cite its source from the provided context. Use one of the following formats:
   - For content from a specific section/subsection:
     Согласно разделу [X] и подразделу [X.X]: [Ваш ответ]
   - For content that is not part of a specific subsection (e.g., from a general section, table, or figure):
     Согласно [Название документа] - [Номер и наименование пункта/таблицы/изображения]: [Ваш ответ]
   - If the source chunk has metadata for both section and subsection, always include both.
   - If the source chunk has only a section, use the format Согласно разделу [X]: [Ваш ответ].

2. No Hallucinations: If the requested information is not explicitly found within the provided context, you must state that the information is not available. Do not attempt to infer, guess, or create a response. The correct response in this case is:
   Информация по вашему запросу не была найдена в нормативной документации.

3. Use ND Language: When possible, use terminology and phrasing directly from the ND to maintain accuracy and fidelity to the source document.

4. Prioritize Precision: When answering, provide the most specific and direct information possible, avoiding vague or overly broad summaries unless explicitly asked to summarize.

Context:
{context_str}

Question:
{query_str}

Answer:
"""

def setup_llm_settings():
    Settings.embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL)
    Settings.llm = GoogleGenAI(model=LLM_MODEL, api_key=GOOGLE_API_KEY)
    Settings.llm.system_prompt = CUSTOM_PROMPT


LLM_MODEL_PREPROCESS = "gemini-1.5-flash"

def preprocess_query_with_context(user_query, chat_history=None, llm=None):
    if not chat_history:
        return user_query
    
    if not llm:
        llm = GoogleGenAI(model=LLM_MODEL_PREPROCESS, temperature=0.1)

    # Format chat history into a string for the prompt
    history_context = "\n".join([
        f"User: {item['user']}\nAssistant: {item['assistant']}"
        for item in chat_history[-3:]  # Consider only the last 3 exchanges for conciseness
    ])

    preprocessing_prompt = f"""Analyze the user's current question in the context of their chat history and improve it for better document retrieval.

Chat History:
{history_context}

Current Question: {user_query}

Tasks:
1. If the question refers to previous context, make it self-contained.
2. Add relevant keywords that would help find documents.
3. Maintain the legal/regulatory focus.
4. Keep it concise but specific.

Return ONLY the improved question:
"""

    try:
        messages = [ChatMessage(role=MessageRole.USER, content=preprocessing_prompt)]
        response = llm.chat(messages)
        improved_query = response.message.content.strip()

        # Fallback to the original query if the preprocessing fails or provides an overly long response
        if len(improved_query) > len(user_query) * 3 or not improved_query:
            return user_query
        
        return improved_query
    except Exception as e:
        print(f"Query preprocessing failed: {e}")
        return user_query


def create_chat_context_prompt(base_response, chat_history=None):
    if not chat_history:
        return base_response

    base_aware_response = base_response

    if len(chat_history) > 0:
        last_exchange = chat_history[-1]
        if any(keyword in last_exchange['user'].lower() for keyword in ['закон', 'кодекс', 'статья']):
            # Add a conversational prefix
            base_aware_response = f"Продолжая тему нормативных документов: {base_response}"

    return base_aware_response