Tim Luka Horstmann commited on
Commit
703cd97
·
1 Parent(s): a79e01b

Update stay alive

Browse files
Files changed (1) hide show
  1. app.py +25 -2
app.py CHANGED
@@ -3,7 +3,7 @@ import json
3
  import time
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
- from fastapi import FastAPI, HTTPException
7
  from fastapi.responses import StreamingResponse
8
  from pydantic import BaseModel
9
  from llama_cpp import Llama
@@ -239,4 +239,27 @@ async def warm_up_model():
239
  logger.info("Model warm-up completed.")
240
  # Log initial RAM usage
241
  ram_stats = get_ram_usage()
242
- logger.info(f"Initial RAM usage after startup: {ram_stats}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import time
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
7
  from fastapi.responses import StreamingResponse
8
  from pydantic import BaseModel
9
  from llama_cpp import Llama
 
239
  logger.info("Model warm-up completed.")
240
  # Log initial RAM usage
241
  ram_stats = get_ram_usage()
242
+ logger.info(f"Initial RAM usage after startup: {ram_stats}")
243
+
244
+ # Add a background task to keep the model warm
245
+ @app.on_event("startup")
246
+ async def setup_periodic_tasks():
247
+ asyncio.create_task(keep_model_warm())
248
+ logger.info("Periodic model warm-up task scheduled")
249
+
250
+ async def keep_model_warm():
251
+ """Background task that keeps the model warm by sending periodic requests"""
252
+ while True:
253
+ try:
254
+ logger.info("Performing periodic model warm-up")
255
+ dummy_query = "Say only the word 'ok.'"
256
+ dummy_history = []
257
+ # Process a dummy query through the generator to keep it warm
258
+ async for _ in stream_response(dummy_query, dummy_history):
259
+ pass
260
+ logger.info("Periodic warm-up completed")
261
+ except Exception as e:
262
+ logger.error(f"Error in periodic warm-up: {str(e)}")
263
+
264
+ # Wait for 13 minutes before the next warm-up
265
+ await asyncio.sleep(13 * 60)