Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -157,6 +157,7 @@ async def generate(request: Request):
|
|
157 |
logger.info("➡️ /generate endpoint received a request.") # Log at the very beginning
|
158 |
data = await request.json()
|
159 |
prompt = data.get("prompt", "").strip()
|
|
|
160 |
|
161 |
if not prompt:
|
162 |
logger.warning("Prompt cannot be empty in /generate request.")
|
@@ -190,7 +191,7 @@ async def generate(request: Request):
|
|
190 |
try:
|
191 |
response = llm.create_chat_completion(
|
192 |
messages=messages_for_llm,
|
193 |
-
max_tokens=
|
194 |
temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
|
195 |
stop=["</s>"] # Stop sequence for TinyLlama Chat
|
196 |
)
|
|
|
157 |
logger.info("➡️ /generate endpoint received a request.") # Log at the very beginning
|
158 |
data = await request.json()
|
159 |
prompt = data.get("prompt", "").strip()
|
160 |
+
max_gen_token = data.get("max_tokens", 800).strip()
|
161 |
|
162 |
if not prompt:
|
163 |
logger.warning("Prompt cannot be empty in /generate request.")
|
|
|
191 |
try:
|
192 |
response = llm.create_chat_completion(
|
193 |
messages=messages_for_llm,
|
194 |
+
max_tokens=max_gen_token, # Keep response length short for maximum speed
|
195 |
temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
|
196 |
stop=["</s>"] # Stop sequence for TinyLlama Chat
|
197 |
)
|