Update app.py
Browse files
app.py
CHANGED
@@ -188,15 +188,19 @@ async def generate(request: Request):
|
|
188 |
try:
|
189 |
response = llm.create_chat_completion(
|
190 |
messages=messages_for_llm,
|
191 |
-
max_tokens=
|
192 |
temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
|
193 |
stop=["</s>"] # Stop sequence for TinyLlama Chat
|
194 |
)
|
195 |
ai_response_content = response["choices"][0]["message"]["content"].strip()
|
|
|
|
|
|
|
196 |
logger.info("✅ Response generated successfully.")
|
197 |
return {
|
198 |
"response": ai_response_content,
|
199 |
-
"prompt_tokens": prompt_tokens # Return tokens in the prompt
|
|
|
200 |
}
|
201 |
except Exception as e:
|
202 |
logger.error(f"❌ Error during generation: {e}", exc_info=True) # Log exception details
|
|
|
188 |
try:
|
189 |
response = llm.create_chat_completion(
|
190 |
messages=messages_for_llm,
|
191 |
+
max_tokens=1024, # Keep response length short for maximum speed
|
192 |
temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
|
193 |
stop=["</s>"] # Stop sequence for TinyLlama Chat
|
194 |
)
|
195 |
ai_response_content = response["choices"][0]["message"]["content"].strip()
|
196 |
+
|
197 |
+
response_token_count = count_tokens_in_text(ai_response_content)
|
198 |
+
|
199 |
logger.info("✅ Response generated successfully.")
|
200 |
return {
|
201 |
"response": ai_response_content,
|
202 |
+
"prompt_tokens": prompt_tokens, # Return tokens in the prompt
|
203 |
+
"response_token_count": response_token_count
|
204 |
}
|
205 |
except Exception as e:
|
206 |
logger.error(f"❌ Error during generation: {e}", exc_info=True) # Log exception details
|