Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from faster_whisper import WhisperModel
|
3 |
+
import logging
|
4 |
+
|
5 |
+
# Configure logging for debugging purposes
|
6 |
+
logging.basicConfig()
|
7 |
+
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
|
8 |
+
|
9 |
+
def format_timestamp(seconds):
|
10 |
+
"""Convert seconds to HH:MM:SS.mmm format."""
|
11 |
+
hours = int(seconds // 3600)
|
12 |
+
minutes = int((seconds % 3600) // 60)
|
13 |
+
seconds_remainder = seconds % 60
|
14 |
+
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
|
15 |
+
|
16 |
+
def transcribe(audio_file, model_size):
|
17 |
+
# Initialize the Whisper model based on the selected model size
|
18 |
+
device = "cpu" # GPU : cuda CPU : cpu
|
19 |
+
compute_type = "int8" # GPU : float16 or int8 - CPU : int8
|
20 |
+
|
21 |
+
model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
22 |
+
|
23 |
+
# Transcribe the audio file
|
24 |
+
segments, _ = model.transcribe(audio_file.name, audio_file)
|
25 |
+
|
26 |
+
# Format and gather transcription with enhanced timestamps
|
27 |
+
transcription_with_timestamps = [
|
28 |
+
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
|
29 |
+
for segment in segments
|
30 |
+
]
|
31 |
+
|
32 |
+
return "\n".join(transcription_with_timestamps)
|
33 |
+
|
34 |
+
# Streamlit UI
|
35 |
+
st.title("Whisper API")
|
36 |
+
st.write("For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper-API)")
|
37 |
+
|
38 |
+
audio_file = st.file_uploader("π΅ Upload Audio or Video", type=['wav', 'mp3', 'ogg', 'mp4', 'avi'])
|
39 |
+
model_size = st.selectbox("π Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"])
|
40 |
+
|
41 |
+
if audio_file is not None and model_size is not None:
|
42 |
+
transcription = transcribe(audio_file, model_size)
|
43 |
+
st.text_area("π Transcription", transcription, height=300)
|