Spaces:
Running
Running
commit app, Dockerfile and requirements.txt
Browse files- Dockerfile +23 -0
- app/__pycache__/asr_worker.cpython-312.pyc +0 -0
- app/__pycache__/main.cpython-312.pyc +0 -0
- app/asr_worker.py +35 -0
- app/main.py +34 -0
- app/static/index.html +41 -0
- requirements.txt +3 -0
Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Hugging Face Spaces: FastAPI + ASR (CPU-only)
|
2 |
+
FROM python:3.10-slim
|
3 |
+
|
4 |
+
# Install system deps
|
5 |
+
RUN apt-get update && apt-get install -y \
|
6 |
+
ffmpeg libsndfile1 git curl build-essential && \
|
7 |
+
rm -rf /var/lib/apt/lists/*
|
8 |
+
|
9 |
+
WORKDIR /code
|
10 |
+
|
11 |
+
# Copy code
|
12 |
+
COPY ./app /code/app
|
13 |
+
COPY ./models /code/models
|
14 |
+
COPY requirements.txt ./
|
15 |
+
|
16 |
+
# Install Python dependencies
|
17 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
18 |
+
|
19 |
+
# Expose default port for HF Spaces
|
20 |
+
EXPOSE 7860
|
21 |
+
|
22 |
+
# Entrypoint for FastAPI app
|
23 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/__pycache__/asr_worker.cpython-312.pyc
ADDED
Binary file (2.33 kB). View file
|
|
app/__pycache__/main.cpython-312.pyc
ADDED
Binary file (2.16 kB). View file
|
|
app/asr_worker.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import sherpa_onnx
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
MODEL_DIR = Path("models/zipformer_bilingual")
|
6 |
+
|
7 |
+
def create_recognizer():
|
8 |
+
return sherpa_onnx.OnlineRecognizer.from_transducer(
|
9 |
+
tokens=str(MODEL_DIR / "tokens.txt"),
|
10 |
+
encoder=str(MODEL_DIR / "encoder-epoch-99-avg-1.onnx"),
|
11 |
+
decoder=str(MODEL_DIR / "decoder-epoch-99-avg-1.onnx"),
|
12 |
+
joiner=str(MODEL_DIR / "joiner-epoch-99-avg-1.onnx"),
|
13 |
+
provider="cpu",
|
14 |
+
num_threads=1,
|
15 |
+
sample_rate=16000,
|
16 |
+
feature_dim=80,
|
17 |
+
decoding_method="greedy_search"
|
18 |
+
)
|
19 |
+
|
20 |
+
def stream_audio(raw_pcm_bytes, stream, recognizer):
|
21 |
+
audio = np.frombuffer(raw_pcm_bytes, dtype=np.float32)
|
22 |
+
rms = float(np.sqrt(np.mean(audio ** 2)))
|
23 |
+
stream.accept_waveform(16000, audio)
|
24 |
+
if recognizer.is_ready(stream):
|
25 |
+
recognizer.decode_streams([stream])
|
26 |
+
result = recognizer.get_result(stream)
|
27 |
+
return result, rms
|
28 |
+
|
29 |
+
def finalize_stream(stream, recognizer):
|
30 |
+
tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
|
31 |
+
stream.accept_waveform(16000, tail)
|
32 |
+
stream.input_finished()
|
33 |
+
while recognizer.is_ready(stream):
|
34 |
+
recognizer.decode_streams([stream])
|
35 |
+
return recognizer.get_result(stream)
|
app/main.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, WebSocket
|
2 |
+
from fastapi.staticfiles import StaticFiles
|
3 |
+
from fastapi.responses import HTMLResponse
|
4 |
+
from app.asr_worker import create_recognizer, stream_audio, finalize_stream
|
5 |
+
|
6 |
+
app = FastAPI()
|
7 |
+
|
8 |
+
app.mount("/static", StaticFiles(directory="app/static"), name="static")
|
9 |
+
|
10 |
+
recognizer = create_recognizer()
|
11 |
+
|
12 |
+
@app.get("/")
|
13 |
+
async def root():
|
14 |
+
with open("app/static/index.html") as f:
|
15 |
+
return HTMLResponse(f.read())
|
16 |
+
|
17 |
+
|
18 |
+
@app.websocket("/ws")
|
19 |
+
async def websocket_endpoint(websocket: WebSocket):
|
20 |
+
await websocket.accept()
|
21 |
+
stream = recognizer.create_stream()
|
22 |
+
|
23 |
+
try:
|
24 |
+
while True:
|
25 |
+
data = await websocket.receive_bytes()
|
26 |
+
result, rms = stream_audio(data, stream, recognizer)
|
27 |
+
await websocket.send_json({
|
28 |
+
"partial": result,
|
29 |
+
"volume": min(rms * 5.0, 1.0)
|
30 |
+
})
|
31 |
+
except Exception:
|
32 |
+
final = finalize_stream(stream, recognizer)
|
33 |
+
await websocket.send_json({"final": final})
|
34 |
+
await websocket.close()
|
app/static/index.html
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<title>FastAPI Real-Time ASR</title>
|
5 |
+
</head>
|
6 |
+
<body>
|
7 |
+
<h1>🎤 Speak into your mic...</h1>
|
8 |
+
<div>Volume: <progress id="vol" max="1" value="0"></progress></div>
|
9 |
+
<p>Partial: <span id="partial"></span></p>
|
10 |
+
<p>Final: <b id="final"></b></p>
|
11 |
+
<script>
|
12 |
+
const ws = new WebSocket("ws://" + location.host + "/ws");
|
13 |
+
const vol = document.getElementById("vol");
|
14 |
+
const partial = document.getElementById("partial");
|
15 |
+
const finalText = document.getElementById("final");
|
16 |
+
|
17 |
+
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
|
18 |
+
const context = new AudioContext();
|
19 |
+
const source = context.createMediaStreamSource(stream);
|
20 |
+
const processor = context.createScriptProcessor(4096, 1, 1);
|
21 |
+
source.connect(processor);
|
22 |
+
processor.connect(context.destination);
|
23 |
+
|
24 |
+
processor.onaudioprocess = e => {
|
25 |
+
const input = e.inputBuffer.getChannelData(0);
|
26 |
+
ws.send(new Float32Array(input).buffer);
|
27 |
+
};
|
28 |
+
});
|
29 |
+
|
30 |
+
ws.onmessage = e => {
|
31 |
+
const msg = JSON.parse(e.data);
|
32 |
+
if (msg.partial) {
|
33 |
+
partial.textContent = msg.partial;
|
34 |
+
vol.value = msg.volume;
|
35 |
+
} else if (msg.final) {
|
36 |
+
finalText.textContent = msg.final;
|
37 |
+
}
|
38 |
+
};
|
39 |
+
</script>
|
40 |
+
</body>
|
41 |
+
</html>
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a7c3c10c8ec533e73405e503c3004146a36153ae701934132aecbe689e9e666
|
3 |
+
size 44
|