Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ from faster_whisper import WhisperModel
|
|
3 |
import logging
|
4 |
import tempfile
|
5 |
import os
|
|
|
6 |
|
7 |
# Configure logging for debugging purposes
|
8 |
logging.basicConfig()
|
@@ -15,7 +16,7 @@ def format_timestamp(seconds):
|
|
15 |
seconds_remainder = seconds % 60
|
16 |
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
|
17 |
|
18 |
-
def transcribe(audio_file, model_size):
|
19 |
# Initialize the Whisper model based on the selected model size
|
20 |
device = "cpu" # Use "cpu" for CPU, "cuda" for GPU
|
21 |
compute_type = "int8" # Use "int8" for faster inference on both CPU and GPU
|
@@ -27,12 +28,26 @@ def transcribe(audio_file, model_size):
|
|
27 |
tmp.write(audio_file.getvalue())
|
28 |
tmp_path = tmp.name
|
29 |
|
|
|
|
|
|
|
|
|
|
|
30 |
# Transcribe the audio file
|
|
|
31 |
segments, _ = model.transcribe(tmp_path)
|
32 |
-
|
|
|
|
|
33 |
# Clean up the temporary file
|
34 |
os.remove(tmp_path)
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
# Format and gather transcription with enhanced timestamps
|
37 |
transcription_with_timestamps = [
|
38 |
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
|
@@ -49,5 +64,8 @@ audio_file = st.file_uploader("π΅ Upload Audio or Video", type=['wav', 'mp3',
|
|
49 |
model_size = st.selectbox("π Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"])
|
50 |
|
51 |
if audio_file is not None and model_size is not None:
|
52 |
-
|
|
|
|
|
|
|
53 |
st.text_area("π Transcription", transcription, height=300)
|
|
|
3 |
import logging
|
4 |
import tempfile
|
5 |
import os
|
6 |
+
import time # For simulating progress and delay
|
7 |
|
8 |
# Configure logging for debugging purposes
|
9 |
logging.basicConfig()
|
|
|
16 |
seconds_remainder = seconds % 60
|
17 |
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
|
18 |
|
19 |
+
def transcribe(audio_file, model_size, progress_text, progress_bar):
|
20 |
# Initialize the Whisper model based on the selected model size
|
21 |
device = "cpu" # Use "cpu" for CPU, "cuda" for GPU
|
22 |
compute_type = "int8" # Use "int8" for faster inference on both CPU and GPU
|
|
|
28 |
tmp.write(audio_file.getvalue())
|
29 |
tmp_path = tmp.name
|
30 |
|
31 |
+
# Update progress and text
|
32 |
+
progress_text.text("Preparing file for transcription...")
|
33 |
+
progress_bar.progress(20)
|
34 |
+
time.sleep(1) # Simulate processing delay
|
35 |
+
|
36 |
# Transcribe the audio file
|
37 |
+
progress_text.text("Transcribing audio...")
|
38 |
segments, _ = model.transcribe(tmp_path)
|
39 |
+
progress_bar.progress(70)
|
40 |
+
time.sleep(1) # Simulate processing delay
|
41 |
+
|
42 |
# Clean up the temporary file
|
43 |
os.remove(tmp_path)
|
44 |
+
|
45 |
+
# Finalize progress
|
46 |
+
progress_text.text("Finalizing...")
|
47 |
+
progress_bar.progress(100)
|
48 |
+
time.sleep(0.5) # Final step delay
|
49 |
+
progress_text.text("Transcription complete.")
|
50 |
+
|
51 |
# Format and gather transcription with enhanced timestamps
|
52 |
transcription_with_timestamps = [
|
53 |
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
|
|
|
64 |
model_size = st.selectbox("π Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"])
|
65 |
|
66 |
if audio_file is not None and model_size is not None:
|
67 |
+
progress_text = st.empty() # Placeholder for dynamic text updates
|
68 |
+
progress_bar = st.progress(0)
|
69 |
+
transcription = transcribe(audio_file, model_size, progress_text, progress_bar)
|
70 |
+
progress_text.empty() # Optionally clear or leave the completion message
|
71 |
st.text_area("π Transcription", transcription, height=300)
|