Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
·
b20cbad
1
Parent(s):
3a14175
feat: ollama-like ps endpoints
Browse files
faster_whisper_server/main.py
CHANGED
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
| 3 |
import asyncio
|
| 4 |
from collections import OrderedDict
|
| 5 |
from contextlib import asynccontextmanager
|
|
|
|
| 6 |
from io import BytesIO
|
| 7 |
import time
|
| 8 |
from typing import TYPE_CHECKING, Annotated, Literal
|
|
@@ -107,6 +108,29 @@ def health() -> Response:
|
|
| 107 |
return Response(status_code=200, content="OK")
|
| 108 |
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
@app.get("/v1/models")
|
| 111 |
def get_models() -> ModelListResponse:
|
| 112 |
models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True)
|
|
|
|
| 3 |
import asyncio
|
| 4 |
from collections import OrderedDict
|
| 5 |
from contextlib import asynccontextmanager
|
| 6 |
+
import gc
|
| 7 |
from io import BytesIO
|
| 8 |
import time
|
| 9 |
from typing import TYPE_CHECKING, Annotated, Literal
|
|
|
|
| 108 |
return Response(status_code=200, content="OK")
|
| 109 |
|
| 110 |
|
| 111 |
+
@app.get("/api/ps", tags=["experimental"], summary="Get a list of loaded models.")
|
| 112 |
+
def get_running_models() -> dict[str, list[str]]:
|
| 113 |
+
return {"models": list(loaded_models.keys())}
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
@app.post("/api/ps/{model_name:path}", tags=["experimental"], summary="Load a model into memory.")
|
| 117 |
+
def load_model_route(model_name: str) -> Response:
|
| 118 |
+
if model_name in loaded_models:
|
| 119 |
+
return Response(status_code=409, content="Model already loaded")
|
| 120 |
+
load_model(model_name)
|
| 121 |
+
return Response(status_code=201)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
@app.delete("/api/ps/{model_name:path}", tags=["experimental"], summary="Unload a model from memory.")
|
| 125 |
+
def stop_running_model(model_name: str) -> Response:
|
| 126 |
+
model = loaded_models.get(model_name)
|
| 127 |
+
if model is not None:
|
| 128 |
+
del loaded_models[model_name]
|
| 129 |
+
gc.collect()
|
| 130 |
+
return Response(status_code=204)
|
| 131 |
+
return Response(status_code=404)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
@app.get("/v1/models")
|
| 135 |
def get_models() -> ModelListResponse:
|
| 136 |
models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True)
|