Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
from pydub import AudioSegment | |
import os | |
# Load a smaller Whisper model for faster transcription | |
model = pipeline("automatic-speech-recognition", model="openai/whisper-base") | |
def split_audio(filepath, chunk_length_ms=30000): | |
"""Split audio into chunks of `chunk_length_ms` milliseconds.""" | |
audio = AudioSegment.from_file(filepath) | |
chunks = [] | |
for i in range(0, len(audio), chunk_length_ms): | |
chunk = audio[i:i + chunk_length_ms] | |
chunk_path = f"chunk_{i}.wav" | |
chunk.export(chunk_path, format="wav") | |
chunks.append(chunk_path) | |
return chunks | |
def transcribe_audio(audio_file): | |
# Split the audio into chunks | |
chunks = split_audio(audio_file) | |
# Transcribe each chunk and collect results | |
transcriptions = [] | |
detected_language = None | |
for chunk in chunks: | |
# Enable language detection and transcription | |
result = model(chunk, generate_kwargs={"task": "transcribe", "language": None}) # Let Whisper detect language | |
transcriptions.append(result["text"]) | |
# Extract detected language from the result (if available) | |
if "language" in result: | |
detected_language = result["language"] | |
os.remove(chunk) # Clean up chunk files | |
# Combine all transcriptions into one | |
full_transcription = " ".join(transcriptions) | |
# If no language was detected, set a default message | |
if detected_language is None: | |
detected_language = "unknown (language not detected)" | |
# Return transcription and detected language | |
return f"Detected Language: {detected_language}\n\nTranscription:\n{full_transcription}" | |
# Define the Gradio interface | |
iface = gr.Interface( | |
fn=transcribe_audio, | |
inputs=gr.Audio(type="filepath", label="Upload Audio File"), | |
outputs=gr.Textbox(label="Transcription and Detected Language"), | |
title="Audio Transcription with Automatic Language Detection", | |
description="Upload an audio file, and the system will automatically detect the language and transcribe it." | |
) | |
# Launch the Gradio interface | |
iface.launch() |