Update app.py
Browse files
app.py
CHANGED
@@ -166,7 +166,7 @@ async def generate(request: Request):
|
|
166 |
try:
|
167 |
response = llm.create_chat_completion(
|
168 |
messages=messages_for_llm,
|
169 |
-
max_tokens=
|
170 |
temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
|
171 |
stop=["</s>"] # Stop sequence for TinyLlama Chat
|
172 |
)
|
|
|
166 |
try:
|
167 |
response = llm.create_chat_completion(
|
168 |
messages=messages_for_llm,
|
169 |
+
max_tokens=1024, # Keep response length short for maximum speed
|
170 |
temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
|
171 |
stop=["</s>"] # Stop sequence for TinyLlama Chat
|
172 |
)
|