Update app.py
Browse files
app.py
CHANGED
@@ -190,7 +190,7 @@ async def generate(request: Request):
|
|
190 |
try:
|
191 |
response = llm.create_chat_completion(
|
192 |
messages=messages_for_llm,
|
193 |
-
max_tokens=
|
194 |
temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
|
195 |
stop=["</s>"] # Stop sequence for TinyLlama Chat
|
196 |
)
|
|
|
190 |
try:
|
191 |
response = llm.create_chat_completion(
|
192 |
messages=messages_for_llm,
|
193 |
+
max_tokens=800, # Keep response length short for maximum speed
|
194 |
temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
|
195 |
stop=["</s>"] # Stop sequence for TinyLlama Chat
|
196 |
)
|