Update app.py
Browse files
app.py
CHANGED
@@ -154,45 +154,52 @@ async def generate(request: Request):
|
|
154 |
Generates a response from the LLM without retaining chat context.
|
155 |
Expects a JSON body with 'prompt'.
|
156 |
"""
|
157 |
-
logger.info("➡️ /generate endpoint received a request.")
|
158 |
data = await request.json()
|
159 |
-
|
160 |
|
161 |
-
if not
|
162 |
logger.warning("Prompt cannot be empty in /generate request.")
|
163 |
return {"error": "Prompt cannot be empty"}, 400
|
164 |
|
165 |
# Define the system prompt - sent with every request
|
166 |
system_prompt_content = (
|
167 |
-
"You are a highly efficient and objective data analysis API. "
|
168 |
-
"Your sole function is to process the
|
169 |
-
"**Crucially, do NOT include any conversational text, greetings, introductions
|
170 |
-
"Respond directly with the content. "
|
171 |
-
"
|
172 |
-
"Focus exclusively on data insights, statistics, trends, influencing factors, and actionable recommendations. "
|
173 |
-
"Be concise, professional, and factual. "
|
174 |
-
"If a request cannot be fulfilled due to data limitations or model capabilities, respond with: 'STATUS: FAILED_ANALYSIS; REASON: Unable to process this specific analytical request due to limitations.' No other text should be included."
|
175 |
)
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
# Construct messages for the current request only
|
179 |
messages_for_llm = [
|
180 |
{"role": "system", "content": system_prompt_content},
|
181 |
-
{"role": "user", "content":
|
182 |
]
|
183 |
|
184 |
# Calculate tokens in the user's prompt
|
185 |
-
prompt_tokens = count_tokens_in_text(
|
186 |
|
187 |
-
logger.info(f"🧾
|
188 |
logger.info(f"Tokens in prompt: {prompt_tokens}")
|
189 |
|
190 |
try:
|
191 |
response = llm.create_chat_completion(
|
192 |
messages=messages_for_llm,
|
193 |
-
max_tokens=800,
|
194 |
-
|
195 |
-
|
|
|
|
|
196 |
)
|
197 |
ai_response_content = response["choices"][0]["message"]["content"].strip()
|
198 |
|
@@ -201,9 +208,9 @@ async def generate(request: Request):
|
|
201 |
logger.info("✅ Response generated successfully.")
|
202 |
return {
|
203 |
"response": ai_response_content,
|
204 |
-
"prompt_tokens": prompt_tokens,
|
205 |
"response_token_count": response_token_count
|
206 |
}
|
207 |
except Exception as e:
|
208 |
-
logger.error(f"❌ Error during generation: {e}", exc_info=True)
|
209 |
return {"error": f"Failed to generate response: {e}. Please try again."}, 500
|
|
|
154 |
Generates a response from the LLM without retaining chat context.
|
155 |
Expects a JSON body with 'prompt'.
|
156 |
"""
|
157 |
+
logger.info("➡️ /generate endpoint received a request.")
|
158 |
data = await request.json()
|
159 |
+
user_input = data.get("prompt", "").strip() # Renamed to user_input for clarity
|
160 |
|
161 |
+
if not user_input:
|
162 |
logger.warning("Prompt cannot be empty in /generate request.")
|
163 |
return {"error": "Prompt cannot be empty"}, 400
|
164 |
|
165 |
# Define the system prompt - sent with every request
|
166 |
system_prompt_content = (
|
167 |
+
"You are a highly efficient and objective data analysis API. You are the 'assistant'. "
|
168 |
+
"Your sole function is to process the user's data and instructions, then output ONLY the requested analysis in the specified format. "
|
169 |
+
"**Crucially, do NOT include any conversational text, greetings, introductions, conclusions, or any remarks about being an AI.** "
|
170 |
+
"Respond directly with the content. Adhere strictly to all formatting requirements. "
|
171 |
+
"If a request cannot be fulfilled, respond ONLY with: 'STATUS: FAILED_ANALYSIS; REASON: Unable to process this specific analytical request due to limitations.'"
|
|
|
|
|
|
|
172 |
)
|
173 |
|
174 |
+
# === FIX: Wrap user input in a clear instruction to prevent role confusion ===
|
175 |
+
# This frames the user's text as 'data' for the model to analyze.
|
176 |
+
user_content_template = f"""Please analyze the following data based on the instructions within it.
|
177 |
+
Provide only the direct output as requested. Do not add any extra conversational text.
|
178 |
+
|
179 |
+
--- DATA ---
|
180 |
+
{user_input}
|
181 |
+
"""
|
182 |
|
183 |
# Construct messages for the current request only
|
184 |
messages_for_llm = [
|
185 |
{"role": "system", "content": system_prompt_content},
|
186 |
+
{"role": "user", "content": user_content_template} # Use the new template
|
187 |
]
|
188 |
|
189 |
# Calculate tokens in the user's prompt
|
190 |
+
prompt_tokens = count_tokens_in_text(user_input)
|
191 |
|
192 |
+
logger.info(f"🧾 Original user input: {user_input}")
|
193 |
logger.info(f"Tokens in prompt: {prompt_tokens}")
|
194 |
|
195 |
try:
|
196 |
response = llm.create_chat_completion(
|
197 |
messages=messages_for_llm,
|
198 |
+
max_tokens=800,
|
199 |
+
# === FIX: Lower temperature for more factual, less creative output ===
|
200 |
+
temperature=0.2,
|
201 |
+
# === FIX: Use the CORRECT stop token for the chatml format ===
|
202 |
+
stop=["<|im_end|>"]
|
203 |
)
|
204 |
ai_response_content = response["choices"][0]["message"]["content"].strip()
|
205 |
|
|
|
208 |
logger.info("✅ Response generated successfully.")
|
209 |
return {
|
210 |
"response": ai_response_content,
|
211 |
+
"prompt_tokens": prompt_tokens,
|
212 |
"response_token_count": response_token_count
|
213 |
}
|
214 |
except Exception as e:
|
215 |
+
logger.error(f"❌ Error during generation: {e}", exc_info=True)
|
216 |
return {"error": f"Failed to generate response: {e}. Please try again."}, 500
|