Whisper / app.py
Lenylvt's picture
Update app.py
222040d verified
raw
history blame
2.78 kB
import streamlit as st
from faster_whisper import WhisperModel
import logging
import tempfile
import os
import time # For simulating progress and delay
# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
def format_timestamp(seconds):
"""Convert seconds to HH:MM:SS.mmm format."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
def transcribe(audio_file, model_size, progress_text, progress_bar):
# Initialize the Whisper model based on the selected model size
device = "cpu" # Use "cpu" for CPU, "cuda" for GPU
compute_type = "int8" # Use "int8" for faster inference on both CPU and GPU
model = WhisperModel(model_size, device=device, compute_type=compute_type)
# Save the uploaded file to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp:
tmp.write(audio_file.getvalue())
tmp_path = tmp.name
# Update progress and text
progress_text.text("Preparing file for transcription...")
progress_bar.progress(20)
time.sleep(1) # Simulate processing delay
# Transcribe the audio file
progress_text.text("Transcribing audio...")
segments, _ = model.transcribe(tmp_path)
progress_bar.progress(70)
time.sleep(1) # Simulate processing delay
# Clean up the temporary file
os.remove(tmp_path)
# Finalize progress
progress_text.text("Finalizing...")
progress_bar.progress(100)
time.sleep(0.5) # Final step delay
progress_text.text("Transcription complete.")
# Format and gather transcription with enhanced timestamps
transcription_with_timestamps = [
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
for segment in segments
]
return "\n".join(transcription_with_timestamps)
# Streamlit UI components
st.title("Whisper")
st.write("For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper-API)")
audio_file = st.file_uploader("🎡 Upload Audio or Video", type=['wav', 'mp3', 'ogg', 'mp4', 'avi'])
model_size = st.selectbox("πŸ“ Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"])
if audio_file is not None and model_size is not None:
progress_text = st.empty() # Placeholder for dynamic text updates
progress_bar = st.progress(0)
transcription = transcribe(audio_file, model_size, progress_text, progress_bar)
progress_text.empty() # Optionally clear or leave the completion message
st.text_area("πŸ“œ Transcription", transcription, height=300)