Chandima Prabhath commited on
Commit
213465c
·
1 Parent(s): 0aa7bc9

Update LLM model settings and enhance response handling in LLMIntegrator

Browse files
Files changed (2) hide show
  1. config/settings.py +3 -2
  2. src/llm_integrator/llm.py +34 -39
config/settings.py CHANGED
@@ -21,10 +21,11 @@ CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "10")) # Default chunk overlap
21
  # --- LLM Settings (OpenAI Compatible) ---
22
  LLM_API_KEY = os.getenv("LLM_API_KEY")
23
  LLM_API_BASE = os.getenv("LLM_API_BASE", "https://llm.chutes.ai/v1") # Default API base
24
- LLM_MODEL = os.getenv("LLM_MODEL", "chutesai/Llama-4-Scout-17B-16E-Instruct") # Default LLM model
 
25
 
26
  # --- Retrieval Settings ---
27
- TOP_K = int(os.getenv("TOP_K", "5")) # Default number of documents to retrieve
28
  CHROMADB_COLLECTION_NAME = os.getenv("CHROMADB_COLLECTION_NAME", "my_rulings_collection") # Unique collection name
29
 
30
  # --- Security Settings (Placeholders - Implement according to government standards) ---
 
21
  # --- LLM Settings (OpenAI Compatible) ---
22
  LLM_API_KEY = os.getenv("LLM_API_KEY")
23
  LLM_API_BASE = os.getenv("LLM_API_BASE", "https://llm.chutes.ai/v1") # Default API base
24
+ LLM_MODEL = os.getenv("LLM_MODEL", "microsoft/MAI-DS-R1-FP8") # Default LLM model
25
+ LLM_MODEL_2 = os.getenv("LLM_MODEL_2", "Qwen/Qwen3-8B")
26
 
27
  # --- Retrieval Settings ---
28
+ TOP_K = int(os.getenv("TOP_K", "10")) # Default number of documents to retrieve
29
  CHROMADB_COLLECTION_NAME = os.getenv("CHROMADB_COLLECTION_NAME", "my_rulings_collection") # Unique collection name
30
 
31
  # --- Security Settings (Placeholders - Implement according to government standards) ---
src/llm_integrator/llm.py CHANGED
@@ -2,7 +2,7 @@
2
  from langchain_openai import ChatOpenAI # cite: query_pipeline.py
3
  from langchain_core.messages import HumanMessage, BaseMessage, AIMessage, SystemMessage # Often used with Chat models
4
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # For structured prompts
5
- from config.settings import LLM_API_KEY, LLM_API_BASE, LLM_MODEL # cite: query_pipeline.py
6
  import logging
7
  from typing import List
8
  from langchain.schema import Document # To handle retrieved documents
@@ -38,8 +38,8 @@ class LLMIntegrator:
38
  self.llm_title = ChatOpenAI(
39
  api_key=LLM_API_KEY,
40
  base_url=LLM_API_BASE,
41
- model=LLM_MODEL,
42
- temperature=0.9 # More creative for titles
43
  )
44
  logger.info(f"Initialized LLM: {LLM_MODEL} at {LLM_API_BASE}")
45
  except Exception as e:
@@ -81,9 +81,6 @@ Examples:
81
  User: hi
82
  Assistant: Hey there! How can I help you today?
83
 
84
- User: Who are you?
85
- Assistant: I'm Insight AI, your friendly digital sidekick—always ready with a fact or a fun comment!
86
-
87
  User: What is the capital of France?
88
  Assistant: 1. The capital of France is Paris <source path="docs/geography.txt"/>
89
 
@@ -114,55 +111,53 @@ Documents:
114
  formatted_prompt = prompt.format_messages(context=context_text, chat_history=chat_history, input=query)
115
 
116
  try:
117
- # Invoke the LLM with the formatted prompt
118
  response = self.llm.invoke(formatted_prompt)
119
- logger.debug("Successfully generated LLM response.")
120
- return response.content # Get the string content of the AI message
 
 
 
 
 
 
 
121
  except Exception as e:
122
  logger.error(f"Failed to generate LLM response: {e}")
123
- # Depending on requirements, implement retry or return a specific error message
124
- return "An error occurred while generating the response." # Provide a user-friendly error
125
 
126
  def generate_chat_title(self, query: str) -> str:
127
  """
128
  Generates a concise title for a chat based on the query.
129
-
130
- Args:
131
- query: The user's query string.
132
-
133
- Returns:
134
- A short title string.
135
  """
136
  prompt = ChatPromptTemplate.from_messages([
137
- ("system", """Generate a clear, specific, unique and concise 3-5 word title for the following user query.
138
- If the query is vague, generic, or a greeting (e.g., "hi", "hello", "help"), infer a likely intent or use a default like "General Inquiry" or "User Assistance".
139
- Never reply with "No clear topic provided". Do not use markdown, quotes, or punctuation.
140
-
141
- Examples:
142
- Query: Tax implications for foreign investments
143
- Title: Foreign Investment Taxes
144
 
145
- Query: GST rates for e-commerce
146
- Title: E-commerce GST Rates
 
147
 
148
- Query: How to file quarterly TDS returns
149
- Title: Quarterly TDS Filing
 
150
 
151
- Query: hi
152
- Title: General Inquiry
153
-
154
- Query: help
155
- Title: User Assistance
156
-
157
- Query: {query}""")
158
  ])
159
 
160
  try:
161
- # Use the higher-temperature LLM for title generation
162
- response = self.llm_title.invoke(prompt.format_messages(query=query))
163
  logger.debug("Successfully generated chat title.")
164
- return response.content.strip('"').replace("Title:", "").strip()
 
 
 
 
165
  except Exception as e:
166
  logger.error(f"Failed to generate chat title: {e}")
167
- # Provide a fallback title
168
  return "New Chat"
 
2
  from langchain_openai import ChatOpenAI # cite: query_pipeline.py
3
  from langchain_core.messages import HumanMessage, BaseMessage, AIMessage, SystemMessage # Often used with Chat models
4
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # For structured prompts
5
+ from config.settings import LLM_API_KEY, LLM_API_BASE, LLM_MODEL, LLM_MODEL_2 # cite: query_pipeline.py
6
  import logging
7
  from typing import List
8
  from langchain.schema import Document # To handle retrieved documents
 
38
  self.llm_title = ChatOpenAI(
39
  api_key=LLM_API_KEY,
40
  base_url=LLM_API_BASE,
41
+ model=LLM_MODEL_2,
42
+ temperature=0.7 # More creative for titles
43
  )
44
  logger.info(f"Initialized LLM: {LLM_MODEL} at {LLM_API_BASE}")
45
  except Exception as e:
 
81
  User: hi
82
  Assistant: Hey there! How can I help you today?
83
 
 
 
 
84
  User: What is the capital of France?
85
  Assistant: 1. The capital of France is Paris <source path="docs/geography.txt"/>
86
 
 
111
  formatted_prompt = prompt.format_messages(context=context_text, chat_history=chat_history, input=query)
112
 
113
  try:
 
114
  response = self.llm.invoke(formatted_prompt)
115
+ content = response.content
116
+
117
+ # ---- NEW: ensure full think-tag wrapping ----
118
+ if '</think>' in content and '<think>' not in content:
119
+ content = '<think>' + content
120
+ # ------------------------------------------------
121
+
122
+ logger.debug(f"LLM response: {content}")
123
+ return content
124
  except Exception as e:
125
  logger.error(f"Failed to generate LLM response: {e}")
126
+ # raize error
127
+ raise e
128
 
129
  def generate_chat_title(self, query: str) -> str:
130
  """
131
  Generates a concise title for a chat based on the query.
132
+ Removes any <think>...</think> tags from the response.
 
 
 
 
 
133
  """
134
  prompt = ChatPromptTemplate.from_messages([
135
+ SystemMessage(
136
+ content="""
137
+ You’re our **Title Maestro**—crafting short, snappy chat titles (3–5 words).
138
+ Be specific, unique, and avoid punctuation.
 
 
 
139
 
140
+ **When in doubt**
141
+ - Vague query → infer intent (e.g., “General Inquiry” for “hi”)
142
+ - Don’t say “No clear topic.”
143
 
144
+ **Examples**
145
+ - Query: “GST for online sellers” → Title: `E-commerce GST Rates`
146
+ - Query: “hi” → Title: `User Assistance`
147
 
148
+ Now: “{query}”
149
+ """
150
+ )
 
 
 
 
151
  ])
152
 
153
  try:
154
+ resp = self.llm_title.invoke(prompt.format_messages(query=query))
 
155
  logger.debug("Successfully generated chat title.")
156
+ # Remove <think>...</think> tags if present
157
+ import re
158
+ content = resp.content
159
+ content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
160
+ return content.strip('"').strip()
161
  except Exception as e:
162
  logger.error(f"Failed to generate chat title: {e}")
 
163
  return "New Chat"