qfuxa commited on
Commit
e443c93
Β·
1 Parent(s): 183bbc5

remove whisper_fastapi_online_server.py

Browse files
Files changed (2) hide show
  1. README.md +5 -5
  2. whisper_fastapi_online_server.py +0 -83
README.md CHANGED
@@ -15,14 +15,14 @@
15
 
16
  ## πŸš€ Overview
17
 
18
- This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and lets you transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with an example frontend that you can customize for your own needs. Everything runs locally on your machine ✨
19
 
20
  ### πŸ”„ Architecture
21
 
22
  WhisperLiveKit consists of two main components:
23
 
24
  - **Backend (Server)**: FastAPI WebSocket server that processes audio and provides real-time transcription
25
- - **Frontend Example**: Basic HTML & JavaScript implementation that demonstrates how to capture and stream audio
26
 
27
  > **Note**: We recommend installing this library on the server/backend. For the frontend, you can use and adapt the provided HTML template from [whisperlivekit/web/live_transcription.html](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/web/live_transcription.html) for your specific use case.
28
 
@@ -33,13 +33,13 @@ WhisperLiveKit consists of two main components:
33
  - **πŸ”’ Fully Local** - All processing happens on your machine - no data sent to external servers
34
  - **πŸ“± Multi-User Support** - Handle multiple users simultaneously with a single backend/server
35
 
36
- ### βš™οΈ Differences from [Whisper Streaming](https://github.com/ufal/whisper_streaming)
37
 
 
38
  - **Multi-User Support** – Handles multiple users simultaneously by decoupling backend and online ASR
 
39
  - **MLX Whisper Backend** – Optimized for Apple Silicon for faster local processing
40
  - **Buffering Preview** – Displays unvalidated transcription segments
41
- - **Confidence Validation** – Immediately validate high-confidence tokens for faster inference
42
- - **Apple Silicon Optimized** - MLX backend for faster local processing on Mac
43
 
44
  ## πŸ“– Quick Start
45
 
 
15
 
16
  ## πŸš€ Overview
17
 
18
+ This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and lets you transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with a functional and simple frontend that you can customize for your own needs. Everything runs locally on your machine ✨
19
 
20
  ### πŸ”„ Architecture
21
 
22
  WhisperLiveKit consists of two main components:
23
 
24
  - **Backend (Server)**: FastAPI WebSocket server that processes audio and provides real-time transcription
25
+ - **Frontend Example**: Basic HTML & JavaScript implementation to capture and stream audio
26
 
27
  > **Note**: We recommend installing this library on the server/backend. For the frontend, you can use and adapt the provided HTML template from [whisperlivekit/web/live_transcription.html](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/web/live_transcription.html) for your specific use case.
28
 
 
33
  - **πŸ”’ Fully Local** - All processing happens on your machine - no data sent to external servers
34
  - **πŸ“± Multi-User Support** - Handle multiple users simultaneously with a single backend/server
35
 
36
+ ### βš™οΈ Core ifferences from [Whisper Streaming](https://github.com/ufal/whisper_streaming)
37
 
38
+ - **Automatic Silence Chunking** – Automatically chunks when no audio is detected to limit buffer size
39
  - **Multi-User Support** – Handles multiple users simultaneously by decoupling backend and online ASR
40
+ - **Confidence Validation** – Immediately validate high-confidence tokens for faster inference
41
  - **MLX Whisper Backend** – Optimized for Apple Silicon for faster local processing
42
  - **Buffering Preview** – Displays unvalidated transcription segments
 
 
43
 
44
  ## πŸ“– Quick Start
45
 
whisper_fastapi_online_server.py DELETED
@@ -1,83 +0,0 @@
1
- from contextlib import asynccontextmanager
2
- from fastapi import FastAPI, WebSocket, WebSocketDisconnect
3
- from fastapi.responses import HTMLResponse
4
- from fastapi.middleware.cors import CORSMiddleware
5
-
6
- from whisperlivekit import WhisperLiveKit, parse_args
7
- from whisperlivekit.audio_processor import AudioProcessor
8
-
9
- import asyncio
10
- import logging
11
- import os
12
-
13
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
14
- logging.getLogger().setLevel(logging.WARNING)
15
- logger = logging.getLogger(__name__)
16
- logger.setLevel(logging.DEBUG)
17
-
18
- kit = None
19
-
20
- @asynccontextmanager
21
- async def lifespan(app: FastAPI):
22
- global kit
23
- kit = WhisperLiveKit()
24
- yield
25
-
26
- app = FastAPI(lifespan=lifespan)
27
- app.add_middleware(
28
- CORSMiddleware,
29
- allow_origins=["*"],
30
- allow_credentials=True,
31
- allow_methods=["*"],
32
- allow_headers=["*"],
33
- )
34
-
35
-
36
- @app.get("/")
37
- async def get():
38
- return HTMLResponse(kit.web_interface())
39
-
40
-
41
- async def handle_websocket_results(websocket, results_generator):
42
- """Consumes results from the audio processor and sends them via WebSocket."""
43
- try:
44
- async for response in results_generator:
45
- await websocket.send_json(response)
46
- except Exception as e:
47
- logger.warning(f"Error in WebSocket results handler: {e}")
48
-
49
-
50
- @app.websocket("/asr")
51
- async def websocket_endpoint(websocket: WebSocket):
52
- audio_processor = AudioProcessor()
53
-
54
- await websocket.accept()
55
- logger.info("WebSocket connection opened.")
56
-
57
- results_generator = await audio_processor.create_tasks()
58
- websocket_task = asyncio.create_task(handle_websocket_results(websocket, results_generator))
59
-
60
- try:
61
- while True:
62
- message = await websocket.receive_bytes()
63
- await audio_processor.process_audio(message)
64
- except WebSocketDisconnect:
65
- logger.warning("WebSocket disconnected.")
66
- finally:
67
- websocket_task.cancel()
68
- await audio_processor.cleanup()
69
- logger.info("WebSocket endpoint cleaned up.")
70
-
71
- if __name__ == "__main__":
72
- import uvicorn
73
-
74
- args = parse_args()
75
-
76
- uvicorn.run(
77
- "whisper_fastapi_online_server:app",
78
- host=args.host,
79
- port=args.port,
80
- reload=False,
81
- log_level="info",
82
- lifespan="on",
83
- )