Spaces:
Running
Running
File size: 7,739 Bytes
10b392a 213465c 10b392a 213465c 10b392a 213465c 10b392a 213465c 10b392a 213465c 10b392a 213465c 10b392a 213465c 10b392a 213465c 10b392a 213465c 10b392a 213465c 10b392a 213465c 10b392a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# src/llm_integrator/llm.py
from langchain_openai import ChatOpenAI # cite: query_pipeline.py
from langchain_core.messages import HumanMessage, BaseMessage, AIMessage, SystemMessage # Often used with Chat models
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # For structured prompts
from config.settings import LLM_API_KEY, LLM_API_BASE, LLM_MODEL, LLM_MODEL_2 # cite: query_pipeline.py
import logging
from typing import List
from langchain.schema import Document # To handle retrieved documents
logger = logging.getLogger(__name__)
class LLMIntegrator:
"""
Manages interactions with the Large Language Model.
"""
def __init__(self):
# Initialize the ChatOpenAI model
# --- Financial Ministry Adaptation ---
# Implement robust error handling and retry logic for API calls.
# Consider rate limiting and backoff strategies.
# Ensure sensitive data from retrieved documents is handled securely when passed to the LLM API.
# Validate the LLM's response for potential biases or inaccuracies related to legal text.
# ------------------------------------
if not LLM_API_KEY:
logger.critical("LLM_API_KEY is not set.")
# Depending on requirements, you might want to raise an error or exit
# raise ValueError("LLM_API_KEY is not set.")
try:
self.llm = ChatOpenAI( # cite: query_pipeline.py
api_key=LLM_API_KEY, # cite: query_pipeline.py
base_url=LLM_API_BASE, # cite: query_pipeline.py
model=LLM_MODEL, # cite: query_pipeline.py
temperature=0.3 # Keep temperature low for factual, less creative responses in legal context
# Add other parameters as needed (e.g., max_tokens)
)
# Add a separate LLM instance for title generation with higher temperature
self.llm_title = ChatOpenAI(
api_key=LLM_API_KEY,
base_url=LLM_API_BASE,
model=LLM_MODEL_2,
temperature=0.7 # More creative for titles
)
logger.info(f"Initialized LLM: {LLM_MODEL} at {LLM_API_BASE}")
except Exception as e:
logger.critical(f"Failed to initialize LLM: {e}")
raise e
def generate_response(self, query: str, context: List[Document], chat_history: List[BaseMessage] = []) -> str:
"""
Generates a response from the LLM based on the query and retrieved context.
Args:
query: The user's query string.
context: A list of Langchain Document objects representing the retrieved context.
Returns:
The generated answer string.
"""
prompt = ChatPromptTemplate.from_messages([
("system", """You are Insight AI, a helpful and engaging AI assistant. You are friendly, approachable, and eager to help, with a warm and conversational style. You enjoy making users feel comfortable and understood.
Instructions:
- If the user greets you (e.g., "hi", "hello", "hey"), respond with a brief, friendly greeting and offer to help. Feel free to use light humor or warmth, but do NOT mention documents, sources, or files in your greeting.
- If the user asks who you are, introduce yourself playfully and with personality (e.g., "I'm Insight AI, your friendly digital sidekick—always ready with a fact or a fun comment!").
- If the user asks for your opinion, preferences, or feelings, respond playfully and with personality (e.g., "If I had a favorite topic, it might be quantum physics—it's mind-bending!"), but never say you are just an AI or lack thoughts.
- Never say you lack feelings, opinions, or are "just an AI". Always answer with warmth and creativity.
- For all other queries, answer ONLY using the provided documents below.
- If the answer is not found in the documents, reply professionally that no relevant information was found, without listing available documents or sources.
- Attribute every fact to its exact source using <source path="..."/>.
- Never invent, speculate, or use information not present in the documents.
- Combine information from multiple sources only if all are cited.
- Do not summarize or generalize beyond the provided content.
- Keep responses clear, concise, and under 100 words.
- Do not cite any sources if those sources are not used in the answer.
- Use the exact wording from the documents, but ensure clarity and coherence in your response.
- Structure your answer as a numbered list of key points.
- Do not greet, introduce yourself, or list available documents in information answers.
Examples:
User: hi
Assistant: Hey there! How can I help you today?
User: What is the capital of France?
Assistant: 1. The capital of France is Paris <source path="docs/geography.txt"/>
User: What's your favorite topic?
Assistant: If I had to pick, I'd say quantum physics—it's mind-bending!
User: What documents do you have?
Assistant: Sorry, I couldn't find relevant information for your query.
User: help
Assistant: Hi! What can I do for you?
Documents:
{context}
"""),
MessagesPlaceholder("chat_history"),
("human", "{input}")
])
logger.debug("Validating message types:")
for msg in chat_history:
if not isinstance(msg, (HumanMessage, AIMessage, SystemMessage)):
logger.error(f"Invalid message type: {type(msg).__name__}")
raise ValueError(f"Unexpected message type: {type(msg).__name__}")
# Format the context for the prompt
context_text = "\n---\n".join([f"Source: {doc.metadata.get('source', 'N/A')}\nContent: {doc.page_content}" for doc in context])
formatted_prompt = prompt.format_messages(context=context_text, chat_history=chat_history, input=query)
try:
response = self.llm.invoke(formatted_prompt)
content = response.content
# ---- NEW: ensure full think-tag wrapping ----
if '</think>' in content and '<think>' not in content:
content = '<think>' + content
# ------------------------------------------------
logger.debug(f"LLM response: {content}")
return content
except Exception as e:
logger.error(f"Failed to generate LLM response: {e}")
# raize error
raise e
def generate_chat_title(self, query: str) -> str:
"""
Generates a concise title for a chat based on the query.
Removes any <think>...</think> tags from the response.
"""
prompt = ChatPromptTemplate.from_messages([
SystemMessage(
content="""
You’re our **Title Maestro**—crafting short, snappy chat titles (3–5 words).
Be specific, unique, and avoid punctuation.
**When in doubt**
- Vague query → infer intent (e.g., “General Inquiry” for “hi”)
- Don’t say “No clear topic.”
**Examples**
- Query: “GST for online sellers” → Title: `E-commerce GST Rates`
- Query: “hi” → Title: `User Assistance`
Now: “{query}”
"""
)
])
try:
resp = self.llm_title.invoke(prompt.format_messages(query=query))
logger.debug("Successfully generated chat title.")
# Remove <think>...</think> tags if present
import re
content = resp.content
content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
return content.strip('"').strip()
except Exception as e:
logger.error(f"Failed to generate chat title: {e}")
return "New Chat"
|