Priyanshukr-1 commited on
Commit
9ee58dc
·
verified ·
1 Parent(s): be3fe73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -19
app.py CHANGED
@@ -154,45 +154,52 @@ async def generate(request: Request):
154
  Generates a response from the LLM without retaining chat context.
155
  Expects a JSON body with 'prompt'.
156
  """
157
- logger.info("➡️ /generate endpoint received a request.") # Log at the very beginning
158
  data = await request.json()
159
- prompt = data.get("prompt", "").strip()
160
 
161
- if not prompt:
162
  logger.warning("Prompt cannot be empty in /generate request.")
163
  return {"error": "Prompt cannot be empty"}, 400
164
 
165
  # Define the system prompt - sent with every request
166
  system_prompt_content = (
167
- "You are a highly efficient and objective data analysis API. "
168
- "Your sole function is to process the provided data and instructions, then output ONLY the requested analysis in the specified format. "
169
- "**Crucially, do NOT include any conversational text, greetings, introductions (e.g., 'Here is the report', 'Below is the analysis'), conclusions, or any remarks about being an AI.** "
170
- "Respond directly with the content. "
171
- "Adhere strictly to all formatting requirements given in the user's prompt (e.g., 'summary:{}', 'introduction:{}', numbered lists, bullet points). "
172
- "Focus exclusively on data insights, statistics, trends, influencing factors, and actionable recommendations. "
173
- "Be concise, professional, and factual. "
174
- "If a request cannot be fulfilled due to data limitations or model capabilities, respond with: 'STATUS: FAILED_ANALYSIS; REASON: Unable to process this specific analytical request due to limitations.' No other text should be included."
175
  )
176
 
 
 
 
 
 
 
 
 
177
 
178
  # Construct messages for the current request only
179
  messages_for_llm = [
180
  {"role": "system", "content": system_prompt_content},
181
- {"role": "user", "content": prompt}
182
  ]
183
 
184
  # Calculate tokens in the user's prompt
185
- prompt_tokens = count_tokens_in_text(prompt)
186
 
187
- logger.info(f"🧾 Prompt received: {prompt}")
188
  logger.info(f"Tokens in prompt: {prompt_tokens}")
189
 
190
  try:
191
  response = llm.create_chat_completion(
192
  messages=messages_for_llm,
193
- max_tokens=800, # Keep response length short for maximum speed
194
- temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
195
- stop=["</s>"] # Stop sequence for TinyLlama Chat
 
 
196
  )
197
  ai_response_content = response["choices"][0]["message"]["content"].strip()
198
 
@@ -201,9 +208,9 @@ async def generate(request: Request):
201
  logger.info("✅ Response generated successfully.")
202
  return {
203
  "response": ai_response_content,
204
- "prompt_tokens": prompt_tokens, # Return tokens in the prompt
205
  "response_token_count": response_token_count
206
  }
207
  except Exception as e:
208
- logger.error(f"❌ Error during generation: {e}", exc_info=True) # Log exception details
209
  return {"error": f"Failed to generate response: {e}. Please try again."}, 500
 
154
  Generates a response from the LLM without retaining chat context.
155
  Expects a JSON body with 'prompt'.
156
  """
157
+ logger.info("➡️ /generate endpoint received a request.")
158
  data = await request.json()
159
+ user_input = data.get("prompt", "").strip() # Renamed to user_input for clarity
160
 
161
+ if not user_input:
162
  logger.warning("Prompt cannot be empty in /generate request.")
163
  return {"error": "Prompt cannot be empty"}, 400
164
 
165
  # Define the system prompt - sent with every request
166
  system_prompt_content = (
167
+ "You are a highly efficient and objective data analysis API. You are the 'assistant'. "
168
+ "Your sole function is to process the user's data and instructions, then output ONLY the requested analysis in the specified format. "
169
+ "**Crucially, do NOT include any conversational text, greetings, introductions, conclusions, or any remarks about being an AI.** "
170
+ "Respond directly with the content. Adhere strictly to all formatting requirements. "
171
+ "If a request cannot be fulfilled, respond ONLY with: 'STATUS: FAILED_ANALYSIS; REASON: Unable to process this specific analytical request due to limitations.'"
 
 
 
172
  )
173
 
174
+ # === FIX: Wrap user input in a clear instruction to prevent role confusion ===
175
+ # This frames the user's text as 'data' for the model to analyze.
176
+ user_content_template = f"""Please analyze the following data based on the instructions within it.
177
+ Provide only the direct output as requested. Do not add any extra conversational text.
178
+
179
+ --- DATA ---
180
+ {user_input}
181
+ """
182
 
183
  # Construct messages for the current request only
184
  messages_for_llm = [
185
  {"role": "system", "content": system_prompt_content},
186
+ {"role": "user", "content": user_content_template} # Use the new template
187
  ]
188
 
189
  # Calculate tokens in the user's prompt
190
+ prompt_tokens = count_tokens_in_text(user_input)
191
 
192
+ logger.info(f"🧾 Original user input: {user_input}")
193
  logger.info(f"Tokens in prompt: {prompt_tokens}")
194
 
195
  try:
196
  response = llm.create_chat_completion(
197
  messages=messages_for_llm,
198
+ max_tokens=800,
199
+ # === FIX: Lower temperature for more factual, less creative output ===
200
+ temperature=0.2,
201
+ # === FIX: Use the CORRECT stop token for the chatml format ===
202
+ stop=["<|im_end|>"]
203
  )
204
  ai_response_content = response["choices"][0]["message"]["content"].strip()
205
 
 
208
  logger.info("✅ Response generated successfully.")
209
  return {
210
  "response": ai_response_content,
211
+ "prompt_tokens": prompt_tokens,
212
  "response_token_count": response_token_count
213
  }
214
  except Exception as e:
215
+ logger.error(f"❌ Error during generation: {e}", exc_info=True)
216
  return {"error": f"Failed to generate response: {e}. Please try again."}, 500