bibibi12345 commited on
Commit
ca6fa09
·
verified ·
1 Parent(s): 9b67c01

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +19 -5
app/main.py CHANGED
@@ -1079,8 +1079,9 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1079
 
1080
 
1081
  if request.stream:
1082
- # Check if fake streaming is enabled
1083
- if config.FAKE_STREAMING:
 
1084
  return await fake_stream_generator(model_name, prompt, current_gen_config, request)
1085
 
1086
  # Regular streaming call
@@ -1219,7 +1220,13 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1219
  # This assumes the generator correctly terminates after yielding the error.
1220
  # Re-evaluate if this causes issues. The goal is to avoid double responses.
1221
  # It seems returning the StreamingResponse object itself is the correct FastAPI pattern.
1222
- return result # Return the StreamingResponse object which contains the failing generator
 
 
 
 
 
 
1223
 
1224
 
1225
  else:
@@ -1259,7 +1266,12 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1259
  return JSONResponse(status_code=500, content=error_response)
1260
  else:
1261
  # Let the StreamingResponse handle yielding the error
1262
- return result # Return the StreamingResponse object containing the failing generator
 
 
 
 
 
1263
 
1264
 
1265
  except Exception as e:
@@ -1366,7 +1378,9 @@ async def fake_stream_generator(model_name, prompt, current_gen_config, request)
1366
  keep_alive_sent += 1
1367
 
1368
  # Wait before sending the next keep-alive message
1369
- await asyncio.sleep(config.FAKE_STREAMING_INTERVAL)
 
 
1370
 
1371
  try:
1372
  # Get the response from the completed task
 
1079
 
1080
 
1081
  if request.stream:
1082
+ # Check if fake streaming is enabled (directly from environment variable)
1083
+ fake_streaming = os.environ.get("FAKE_STREAMING", "false").lower() == "true"
1084
+ if fake_streaming:
1085
  return await fake_stream_generator(model_name, prompt, current_gen_config, request)
1086
 
1087
  # Regular streaming call
 
1220
  # This assumes the generator correctly terminates after yielding the error.
1221
  # Re-evaluate if this causes issues. The goal is to avoid double responses.
1222
  # It seems returning the StreamingResponse object itself is the correct FastAPI pattern.
1223
+ # For streaming requests, we need to return a new StreamingResponse with an error
1224
+ # since we can't access the previous StreamingResponse objects
1225
+ async def error_stream():
1226
+ yield f"data: {json.dumps(error_response)}\n\n"
1227
+ yield "data: [DONE]\n\n"
1228
+
1229
+ return StreamingResponse(error_stream(), media_type="text/event-stream")
1230
 
1231
 
1232
  else:
 
1266
  return JSONResponse(status_code=500, content=error_response)
1267
  else:
1268
  # Let the StreamingResponse handle yielding the error
1269
+ # For streaming requests, create a new error stream
1270
+ async def error_stream():
1271
+ yield f"data: {json.dumps(error_response)}\n\n"
1272
+ yield "data: [DONE]\n\n"
1273
+
1274
+ return StreamingResponse(error_stream(), media_type="text/event-stream")
1275
 
1276
 
1277
  except Exception as e:
 
1378
  keep_alive_sent += 1
1379
 
1380
  # Wait before sending the next keep-alive message
1381
+ # Get interval from environment variable directly
1382
+ fake_streaming_interval = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1.0"))
1383
+ await asyncio.sleep(fake_streaming_interval)
1384
 
1385
  try:
1386
  # Get the response from the completed task