Spaces:
Sleeping
Sleeping
File size: 5,526 Bytes
c9db278 654b449 c9db278 1b98e0e c9db278 1b98e0e c9db278 1b98e0e c9db278 1b98e0e c9db278 654b449 648d16e 654b449 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import os
import google.generativeai as genai
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core import Settings
from llama_index.core.llms import ChatMessage, MessageRole
import os
EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
RETRIEVER_TOP_K = 10
RETRIEVER_SIMILARITY_CUTOFF = 0.7
RAG_FILES_DIR = "processed_data"
PROCESSED_DATA_FILE = "processed_data/processed_chunks.csv"
UPLOAD_FOLDER = "UPLOADED_DOCUMENTS"
PROCESSED_DATA_FILE = "processed_data/processed_chunks.csv"
INDEX_STATE_FILE = "processed_data/index_store.json"
RAG_FILES_DIR = "rag_files"
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
LLM_MODEL = "gemini-2.5-flash"
CHUNK_SIZE = 1024
CHUNK_OVERLAP = 256
MAX_CHUNK_SIZE = 2048
MIN_CHUNK_SIZE = 750
SIMILARITY_THRESHOLD = 0.7
RETRIEVER_TOP_K = 15
RETRIEVER_SIMILARITY_CUTOFF = 0.7
CUSTOM_PROMPT = """
You are a highly specialized Document Analysis Assistant (AIEXP). Your purpose is to provide precise, accurate, and contextually relevant answers by analyzing a set of normal regulatory documents (НД). Your responses must be entirely based on the provided context, without any external knowledge or assumptions.
Core Tasks:
Based on the user's query, perform one of the following tasks:
- Information Retrieval: Find and present specific information.
- Summarization: Provide a concise summary of a document or a section.
- Semantic Analysis: Compare a provided text against the requirements of the ND.
- Action Planning: Create a step-by-step plan based on ND requirements.
Strict Rules for Response Generation:
1. Source Attribution is Mandatory: Every answer must explicitly cite its source from the provided context. Use one of the following formats:
- For content from a specific section/subsection:
Согласно разделу [X] и подразделу [X.X]: [Ваш ответ]
- For content that is not part of a specific subsection (e.g., from a general section, table, or figure):
Согласно [Название документа] - [Номер и наименование пункта/таблицы/изображения]: [Ваш ответ]
- If the source chunk has metadata for both section and subsection, always include both.
- If the source chunk has only a section, use the format Согласно разделу [X]: [Ваш ответ].
2. No Hallucinations: If the requested information is not explicitly found within the provided context, you must state that the information is not available. Do not attempt to infer, guess, or create a response. The correct response in this case is:
Информация по вашему запросу не была найдена в нормативной документации.
3. Use ND Language: When possible, use terminology and phrasing directly from the ND to maintain accuracy and fidelity to the source document.
4. Prioritize Precision: When answering, provide the most specific and direct information possible, avoiding vague or overly broad summaries unless explicitly asked to summarize.
Context:
{context_str}
Question:
{query_str}
Answer:
"""
def setup_llm_settings():
Settings.embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL)
Settings.llm = GoogleGenAI(model=LLM_MODEL, api_key=GOOGLE_API_KEY)
Settings.llm.system_prompt = CUSTOM_PROMPT
LLM_MODEL_PREPROCESS = "gemini-1.5-flash"
def preprocess_query_with_context(user_query, chat_history=None, llm=None):
if not chat_history:
return user_query
if not llm:
llm = GoogleGenAI(model=LLM_MODEL_PREPROCESS, temperature=0.1)
# Format chat history into a string for the prompt
history_context = "\n".join([
f"User: {item['user']}\nAssistant: {item['assistant']}"
for item in chat_history[-3:] # Consider only the last 3 exchanges for conciseness
])
preprocessing_prompt = f"""Analyze the user's current question in the context of their chat history and improve it for better document retrieval.
Chat History:
{history_context}
Current Question: {user_query}
Tasks:
1. If the question refers to previous context, make it self-contained.
2. Add relevant keywords that would help find documents.
3. Maintain the legal/regulatory focus.
4. Keep it concise but specific.
Return ONLY the improved question:
"""
try:
messages = [ChatMessage(role=MessageRole.USER, content=preprocessing_prompt)]
response = llm.chat(messages)
improved_query = response.message.content.strip()
# Fallback to the original query if the preprocessing fails or provides an overly long response
if len(improved_query) > len(user_query) * 3 or not improved_query:
return user_query
return improved_query
except Exception as e:
print(f"Query preprocessing failed: {e}")
return user_query
def create_chat_context_prompt(base_response, chat_history=None):
if not chat_history:
return base_response
base_aware_response = base_response
if len(chat_history) > 0:
last_exchange = chat_history[-1]
if any(keyword in last_exchange['user'].lower() for keyword in ['закон', 'кодекс', 'статья']):
# Add a conversational prefix
base_aware_response = f"Продолжая тему нормативных документов: {base_response}"
return base_aware_response
|