Priyanshukr-1 commited on
Commit
fd281f1
·
verified ·
1 Parent(s): f0fa136

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -157,6 +157,7 @@ async def generate(request: Request):
157
  logger.info("➡️ /generate endpoint received a request.") # Log at the very beginning
158
  data = await request.json()
159
  prompt = data.get("prompt", "").strip()
 
160
 
161
  if not prompt:
162
  logger.warning("Prompt cannot be empty in /generate request.")
@@ -190,7 +191,7 @@ async def generate(request: Request):
190
  try:
191
  response = llm.create_chat_completion(
192
  messages=messages_for_llm,
193
- max_tokens=800, # Keep response length short for maximum speed
194
  temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
195
  stop=["</s>"] # Stop sequence for TinyLlama Chat
196
  )
 
157
  logger.info("➡️ /generate endpoint received a request.") # Log at the very beginning
158
  data = await request.json()
159
  prompt = data.get("prompt", "").strip()
160
+ max_gen_token = data.get("max_tokens", 800).strip()
161
 
162
  if not prompt:
163
  logger.warning("Prompt cannot be empty in /generate request.")
 
191
  try:
192
  response = llm.create_chat_completion(
193
  messages=messages_for_llm,
194
+ max_tokens=max_gen_token, # Keep response length short for maximum speed
195
  temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
196
  stop=["</s>"] # Stop sequence for TinyLlama Chat
197
  )