Tim Luka Horstmann
commited on
Commit
·
703cd97
1
Parent(s):
a79e01b
Update stay alive
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import json
|
|
3 |
import time
|
4 |
import numpy as np
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
-
from fastapi import FastAPI, HTTPException
|
7 |
from fastapi.responses import StreamingResponse
|
8 |
from pydantic import BaseModel
|
9 |
from llama_cpp import Llama
|
@@ -239,4 +239,27 @@ async def warm_up_model():
|
|
239 |
logger.info("Model warm-up completed.")
|
240 |
# Log initial RAM usage
|
241 |
ram_stats = get_ram_usage()
|
242 |
-
logger.info(f"Initial RAM usage after startup: {ram_stats}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import time
|
4 |
import numpy as np
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
+
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
7 |
from fastapi.responses import StreamingResponse
|
8 |
from pydantic import BaseModel
|
9 |
from llama_cpp import Llama
|
|
|
239 |
logger.info("Model warm-up completed.")
|
240 |
# Log initial RAM usage
|
241 |
ram_stats = get_ram_usage()
|
242 |
+
logger.info(f"Initial RAM usage after startup: {ram_stats}")
|
243 |
+
|
244 |
+
# Add a background task to keep the model warm
|
245 |
+
@app.on_event("startup")
|
246 |
+
async def setup_periodic_tasks():
|
247 |
+
asyncio.create_task(keep_model_warm())
|
248 |
+
logger.info("Periodic model warm-up task scheduled")
|
249 |
+
|
250 |
+
async def keep_model_warm():
|
251 |
+
"""Background task that keeps the model warm by sending periodic requests"""
|
252 |
+
while True:
|
253 |
+
try:
|
254 |
+
logger.info("Performing periodic model warm-up")
|
255 |
+
dummy_query = "Say only the word 'ok.'"
|
256 |
+
dummy_history = []
|
257 |
+
# Process a dummy query through the generator to keep it warm
|
258 |
+
async for _ in stream_response(dummy_query, dummy_history):
|
259 |
+
pass
|
260 |
+
logger.info("Periodic warm-up completed")
|
261 |
+
except Exception as e:
|
262 |
+
logger.error(f"Error in periodic warm-up: {str(e)}")
|
263 |
+
|
264 |
+
# Wait for 13 minutes before the next warm-up
|
265 |
+
await asyncio.sleep(13 * 60)
|