Tim Luka Horstmann commited on
Commit
ee37147
·
1 Parent(s): 7ee4aae

Adjusted rate limiting

Browse files
Files changed (2) hide show
  1. __pycache__/app.cpython-313.pyc +0 -0
  2. app.py +16 -3
__pycache__/app.cpython-313.pyc ADDED
Binary file (24.4 kB). View file
 
app.py CHANGED
@@ -36,7 +36,20 @@ app.state.limiter = limiter
36
  async def custom_rate_limit_handler(request: Request, exc: RateLimitExceeded):
37
  client_ip = get_remote_address(request)
38
  logger.warning(f"Rate limit exceeded for IP {client_ip} on endpoint {request.url.path}")
39
- return await _rate_limit_exceeded_handler(request, exc)
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  app.add_exception_handler(RateLimitExceeded, custom_rate_limit_handler)
42
 
@@ -346,14 +359,14 @@ def get_ram_usage():
346
  }
347
 
348
  @app.post("/api/predict")
349
- @limiter.limit("5/minute") # Allow 10 chat requests per minute per IP
350
  async def predict(request: Request, query_request: QueryRequest):
351
  query = query_request.query
352
  history = query_request.history
353
  return StreamingResponse(stream_response(query, history), media_type="text/event-stream")
354
 
355
  @app.post("/api/tts")
356
- @limiter.limit("5/minute") # Allow 5 TTS requests per minute per IP (more restrictive as TTS is more expensive)
357
  async def text_to_speech(request: Request, tts_request: TTSRequest):
358
  """Convert text to speech using ElevenLabs API"""
359
  if not elevenlabs_client:
 
36
  async def custom_rate_limit_handler(request: Request, exc: RateLimitExceeded):
37
  client_ip = get_remote_address(request)
38
  logger.warning(f"Rate limit exceeded for IP {client_ip} on endpoint {request.url.path}")
39
+
40
+ # Return a proper JSON response for rate limiting
41
+ return Response(
42
+ content=json.dumps({
43
+ "error": "rate_limit_exceeded",
44
+ "message": "Too many requests. Please wait a moment before trying again.",
45
+ "retry_after": 60 # seconds
46
+ }),
47
+ status_code=429,
48
+ headers={
49
+ "Content-Type": "application/json",
50
+ "Retry-After": "60"
51
+ }
52
+ )
53
 
54
  app.add_exception_handler(RateLimitExceeded, custom_rate_limit_handler)
55
 
 
359
  }
360
 
361
  @app.post("/api/predict")
362
+ @limiter.limit("15/minute") # Allow 15 chat requests per minute per IP
363
  async def predict(request: Request, query_request: QueryRequest):
364
  query = query_request.query
365
  history = query_request.history
366
  return StreamingResponse(stream_response(query, history), media_type="text/event-stream")
367
 
368
  @app.post("/api/tts")
369
+ @limiter.limit("10/minute") # Allow 10 TTS requests per minute per IP
370
  async def text_to_speech(request: Request, tts_request: TTSRequest):
371
  """Convert text to speech using ElevenLabs API"""
372
  if not elevenlabs_client: