Tim Luka Horstmann
commited on
Commit
·
ee37147
1
Parent(s):
7ee4aae
Adjusted rate limiting
Browse files- __pycache__/app.cpython-313.pyc +0 -0
- app.py +16 -3
__pycache__/app.cpython-313.pyc
ADDED
Binary file (24.4 kB). View file
|
|
app.py
CHANGED
@@ -36,7 +36,20 @@ app.state.limiter = limiter
|
|
36 |
async def custom_rate_limit_handler(request: Request, exc: RateLimitExceeded):
|
37 |
client_ip = get_remote_address(request)
|
38 |
logger.warning(f"Rate limit exceeded for IP {client_ip} on endpoint {request.url.path}")
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
app.add_exception_handler(RateLimitExceeded, custom_rate_limit_handler)
|
42 |
|
@@ -346,14 +359,14 @@ def get_ram_usage():
|
|
346 |
}
|
347 |
|
348 |
@app.post("/api/predict")
|
349 |
-
@limiter.limit("
|
350 |
async def predict(request: Request, query_request: QueryRequest):
|
351 |
query = query_request.query
|
352 |
history = query_request.history
|
353 |
return StreamingResponse(stream_response(query, history), media_type="text/event-stream")
|
354 |
|
355 |
@app.post("/api/tts")
|
356 |
-
@limiter.limit("
|
357 |
async def text_to_speech(request: Request, tts_request: TTSRequest):
|
358 |
"""Convert text to speech using ElevenLabs API"""
|
359 |
if not elevenlabs_client:
|
|
|
36 |
async def custom_rate_limit_handler(request: Request, exc: RateLimitExceeded):
|
37 |
client_ip = get_remote_address(request)
|
38 |
logger.warning(f"Rate limit exceeded for IP {client_ip} on endpoint {request.url.path}")
|
39 |
+
|
40 |
+
# Return a proper JSON response for rate limiting
|
41 |
+
return Response(
|
42 |
+
content=json.dumps({
|
43 |
+
"error": "rate_limit_exceeded",
|
44 |
+
"message": "Too many requests. Please wait a moment before trying again.",
|
45 |
+
"retry_after": 60 # seconds
|
46 |
+
}),
|
47 |
+
status_code=429,
|
48 |
+
headers={
|
49 |
+
"Content-Type": "application/json",
|
50 |
+
"Retry-After": "60"
|
51 |
+
}
|
52 |
+
)
|
53 |
|
54 |
app.add_exception_handler(RateLimitExceeded, custom_rate_limit_handler)
|
55 |
|
|
|
359 |
}
|
360 |
|
361 |
@app.post("/api/predict")
|
362 |
+
@limiter.limit("15/minute") # Allow 15 chat requests per minute per IP
|
363 |
async def predict(request: Request, query_request: QueryRequest):
|
364 |
query = query_request.query
|
365 |
history = query_request.history
|
366 |
return StreamingResponse(stream_response(query, history), media_type="text/event-stream")
|
367 |
|
368 |
@app.post("/api/tts")
|
369 |
+
@limiter.limit("10/minute") # Allow 10 TTS requests per minute per IP
|
370 |
async def text_to_speech(request: Request, tts_request: TTSRequest):
|
371 |
"""Convert text to speech using ElevenLabs API"""
|
372 |
if not elevenlabs_client:
|