Spaces:
Running
Running
Chandima Prabhath
commited on
Commit
·
213465c
1
Parent(s):
0aa7bc9
Update LLM model settings and enhance response handling in LLMIntegrator
Browse files- config/settings.py +3 -2
- src/llm_integrator/llm.py +34 -39
config/settings.py
CHANGED
@@ -21,10 +21,11 @@ CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "10")) # Default chunk overlap
|
|
21 |
# --- LLM Settings (OpenAI Compatible) ---
|
22 |
LLM_API_KEY = os.getenv("LLM_API_KEY")
|
23 |
LLM_API_BASE = os.getenv("LLM_API_BASE", "https://llm.chutes.ai/v1") # Default API base
|
24 |
-
LLM_MODEL = os.getenv("LLM_MODEL", "
|
|
|
25 |
|
26 |
# --- Retrieval Settings ---
|
27 |
-
TOP_K = int(os.getenv("TOP_K", "
|
28 |
CHROMADB_COLLECTION_NAME = os.getenv("CHROMADB_COLLECTION_NAME", "my_rulings_collection") # Unique collection name
|
29 |
|
30 |
# --- Security Settings (Placeholders - Implement according to government standards) ---
|
|
|
21 |
# --- LLM Settings (OpenAI Compatible) ---
|
22 |
LLM_API_KEY = os.getenv("LLM_API_KEY")
|
23 |
LLM_API_BASE = os.getenv("LLM_API_BASE", "https://llm.chutes.ai/v1") # Default API base
|
24 |
+
LLM_MODEL = os.getenv("LLM_MODEL", "microsoft/MAI-DS-R1-FP8") # Default LLM model
|
25 |
+
LLM_MODEL_2 = os.getenv("LLM_MODEL_2", "Qwen/Qwen3-8B")
|
26 |
|
27 |
# --- Retrieval Settings ---
|
28 |
+
TOP_K = int(os.getenv("TOP_K", "10")) # Default number of documents to retrieve
|
29 |
CHROMADB_COLLECTION_NAME = os.getenv("CHROMADB_COLLECTION_NAME", "my_rulings_collection") # Unique collection name
|
30 |
|
31 |
# --- Security Settings (Placeholders - Implement according to government standards) ---
|
src/llm_integrator/llm.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
from langchain_openai import ChatOpenAI # cite: query_pipeline.py
|
3 |
from langchain_core.messages import HumanMessage, BaseMessage, AIMessage, SystemMessage # Often used with Chat models
|
4 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # For structured prompts
|
5 |
-
from config.settings import LLM_API_KEY, LLM_API_BASE, LLM_MODEL # cite: query_pipeline.py
|
6 |
import logging
|
7 |
from typing import List
|
8 |
from langchain.schema import Document # To handle retrieved documents
|
@@ -38,8 +38,8 @@ class LLMIntegrator:
|
|
38 |
self.llm_title = ChatOpenAI(
|
39 |
api_key=LLM_API_KEY,
|
40 |
base_url=LLM_API_BASE,
|
41 |
-
model=
|
42 |
-
temperature=0.
|
43 |
)
|
44 |
logger.info(f"Initialized LLM: {LLM_MODEL} at {LLM_API_BASE}")
|
45 |
except Exception as e:
|
@@ -81,9 +81,6 @@ Examples:
|
|
81 |
User: hi
|
82 |
Assistant: Hey there! How can I help you today?
|
83 |
|
84 |
-
User: Who are you?
|
85 |
-
Assistant: I'm Insight AI, your friendly digital sidekick—always ready with a fact or a fun comment!
|
86 |
-
|
87 |
User: What is the capital of France?
|
88 |
Assistant: 1. The capital of France is Paris <source path="docs/geography.txt"/>
|
89 |
|
@@ -114,55 +111,53 @@ Documents:
|
|
114 |
formatted_prompt = prompt.format_messages(context=context_text, chat_history=chat_history, input=query)
|
115 |
|
116 |
try:
|
117 |
-
# Invoke the LLM with the formatted prompt
|
118 |
response = self.llm.invoke(formatted_prompt)
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
except Exception as e:
|
122 |
logger.error(f"Failed to generate LLM response: {e}")
|
123 |
-
#
|
124 |
-
|
125 |
|
126 |
def generate_chat_title(self, query: str) -> str:
|
127 |
"""
|
128 |
Generates a concise title for a chat based on the query.
|
129 |
-
|
130 |
-
Args:
|
131 |
-
query: The user's query string.
|
132 |
-
|
133 |
-
Returns:
|
134 |
-
A short title string.
|
135 |
"""
|
136 |
prompt = ChatPromptTemplate.from_messages([
|
137 |
-
(
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
Examples:
|
142 |
-
Query: Tax implications for foreign investments
|
143 |
-
Title: Foreign Investment Taxes
|
144 |
|
145 |
-
|
146 |
-
|
|
|
147 |
|
148 |
-
|
149 |
-
Title:
|
|
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
Query: help
|
155 |
-
Title: User Assistance
|
156 |
-
|
157 |
-
Query: {query}""")
|
158 |
])
|
159 |
|
160 |
try:
|
161 |
-
|
162 |
-
response = self.llm_title.invoke(prompt.format_messages(query=query))
|
163 |
logger.debug("Successfully generated chat title.")
|
164 |
-
|
|
|
|
|
|
|
|
|
165 |
except Exception as e:
|
166 |
logger.error(f"Failed to generate chat title: {e}")
|
167 |
-
# Provide a fallback title
|
168 |
return "New Chat"
|
|
|
2 |
from langchain_openai import ChatOpenAI # cite: query_pipeline.py
|
3 |
from langchain_core.messages import HumanMessage, BaseMessage, AIMessage, SystemMessage # Often used with Chat models
|
4 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # For structured prompts
|
5 |
+
from config.settings import LLM_API_KEY, LLM_API_BASE, LLM_MODEL, LLM_MODEL_2 # cite: query_pipeline.py
|
6 |
import logging
|
7 |
from typing import List
|
8 |
from langchain.schema import Document # To handle retrieved documents
|
|
|
38 |
self.llm_title = ChatOpenAI(
|
39 |
api_key=LLM_API_KEY,
|
40 |
base_url=LLM_API_BASE,
|
41 |
+
model=LLM_MODEL_2,
|
42 |
+
temperature=0.7 # More creative for titles
|
43 |
)
|
44 |
logger.info(f"Initialized LLM: {LLM_MODEL} at {LLM_API_BASE}")
|
45 |
except Exception as e:
|
|
|
81 |
User: hi
|
82 |
Assistant: Hey there! How can I help you today?
|
83 |
|
|
|
|
|
|
|
84 |
User: What is the capital of France?
|
85 |
Assistant: 1. The capital of France is Paris <source path="docs/geography.txt"/>
|
86 |
|
|
|
111 |
formatted_prompt = prompt.format_messages(context=context_text, chat_history=chat_history, input=query)
|
112 |
|
113 |
try:
|
|
|
114 |
response = self.llm.invoke(formatted_prompt)
|
115 |
+
content = response.content
|
116 |
+
|
117 |
+
# ---- NEW: ensure full think-tag wrapping ----
|
118 |
+
if '</think>' in content and '<think>' not in content:
|
119 |
+
content = '<think>' + content
|
120 |
+
# ------------------------------------------------
|
121 |
+
|
122 |
+
logger.debug(f"LLM response: {content}")
|
123 |
+
return content
|
124 |
except Exception as e:
|
125 |
logger.error(f"Failed to generate LLM response: {e}")
|
126 |
+
# raize error
|
127 |
+
raise e
|
128 |
|
129 |
def generate_chat_title(self, query: str) -> str:
|
130 |
"""
|
131 |
Generates a concise title for a chat based on the query.
|
132 |
+
Removes any <think>...</think> tags from the response.
|
|
|
|
|
|
|
|
|
|
|
133 |
"""
|
134 |
prompt = ChatPromptTemplate.from_messages([
|
135 |
+
SystemMessage(
|
136 |
+
content="""
|
137 |
+
You’re our **Title Maestro**—crafting short, snappy chat titles (3–5 words).
|
138 |
+
Be specific, unique, and avoid punctuation.
|
|
|
|
|
|
|
139 |
|
140 |
+
**When in doubt**
|
141 |
+
- Vague query → infer intent (e.g., “General Inquiry” for “hi”)
|
142 |
+
- Don’t say “No clear topic.”
|
143 |
|
144 |
+
**Examples**
|
145 |
+
- Query: “GST for online sellers” → Title: `E-commerce GST Rates`
|
146 |
+
- Query: “hi” → Title: `User Assistance`
|
147 |
|
148 |
+
Now: “{query}”
|
149 |
+
"""
|
150 |
+
)
|
|
|
|
|
|
|
|
|
151 |
])
|
152 |
|
153 |
try:
|
154 |
+
resp = self.llm_title.invoke(prompt.format_messages(query=query))
|
|
|
155 |
logger.debug("Successfully generated chat title.")
|
156 |
+
# Remove <think>...</think> tags if present
|
157 |
+
import re
|
158 |
+
content = resp.content
|
159 |
+
content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
|
160 |
+
return content.strip('"').strip()
|
161 |
except Exception as e:
|
162 |
logger.error(f"Failed to generate chat title: {e}")
|
|
|
163 |
return "New Chat"
|