Priyanshukr-1 commited on
Commit
347340b
·
verified ·
1 Parent(s): 630d5e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -188,15 +188,19 @@ async def generate(request: Request):
188
  try:
189
  response = llm.create_chat_completion(
190
  messages=messages_for_llm,
191
- max_tokens=300, # Keep response length short for maximum speed
192
  temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
193
  stop=["</s>"] # Stop sequence for TinyLlama Chat
194
  )
195
  ai_response_content = response["choices"][0]["message"]["content"].strip()
 
 
 
196
  logger.info("✅ Response generated successfully.")
197
  return {
198
  "response": ai_response_content,
199
- "prompt_tokens": prompt_tokens # Return tokens in the prompt
 
200
  }
201
  except Exception as e:
202
  logger.error(f"❌ Error during generation: {e}", exc_info=True) # Log exception details
 
188
  try:
189
  response = llm.create_chat_completion(
190
  messages=messages_for_llm,
191
+ max_tokens=1024, # Keep response length short for maximum speed
192
  temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
193
  stop=["</s>"] # Stop sequence for TinyLlama Chat
194
  )
195
  ai_response_content = response["choices"][0]["message"]["content"].strip()
196
+
197
+ response_token_count = count_tokens_in_text(ai_response_content)
198
+
199
  logger.info("✅ Response generated successfully.")
200
  return {
201
  "response": ai_response_content,
202
+ "prompt_tokens": prompt_tokens, # Return tokens in the prompt
203
+ "response_token_count": response_token_count
204
  }
205
  except Exception as e:
206
  logger.error(f"❌ Error during generation: {e}", exc_info=True) # Log exception details