Spaces:
Sleeping
Sleeping
File size: 2,165 Bytes
305c59b 49d93f9 1a0ef3f 305c59b 1a0ef3f 305c59b 19bb2e9 1a0ef3f 8ff4639 1a0ef3f 3b2b2f2 8ff4639 1a0ef3f 8ff4639 3b2b2f2 1a0ef3f 3b2b2f2 1a0ef3f 8ff4639 3b2b2f2 8ff4639 305c59b 49d93f9 305c59b 19bb2e9 8ff4639 305c59b 49d93f9 8ff4639 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
from transformers import pipeline
from pydub import AudioSegment
import os
# Load a smaller Whisper model for faster transcription
model = pipeline("automatic-speech-recognition", model="openai/whisper-base")
def split_audio(filepath, chunk_length_ms=30000):
"""Split audio into chunks of `chunk_length_ms` milliseconds."""
audio = AudioSegment.from_file(filepath)
chunks = []
for i in range(0, len(audio), chunk_length_ms):
chunk = audio[i:i + chunk_length_ms]
chunk_path = f"chunk_{i}.wav"
chunk.export(chunk_path, format="wav")
chunks.append(chunk_path)
return chunks
def transcribe_audio(audio_file):
# Split the audio into chunks
chunks = split_audio(audio_file)
# Transcribe each chunk and collect results
transcriptions = []
detected_language = None
for chunk in chunks:
# Enable language detection and transcription
result = model(chunk, generate_kwargs={"task": "transcribe", "language": None}) # Let Whisper detect language
transcriptions.append(result["text"])
# Extract detected language from the result (if available)
if "language" in result:
detected_language = result["language"]
os.remove(chunk) # Clean up chunk files
# Combine all transcriptions into one
full_transcription = " ".join(transcriptions)
# If no language was detected, set a default message
if detected_language is None:
detected_language = "unknown (language not detected)"
# Return transcription and detected language
return f"Detected Language: {detected_language}\n\nTranscription:\n{full_transcription}"
# Define the Gradio interface
iface = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
outputs=gr.Textbox(label="Transcription and Detected Language"),
title="Audio Transcription with Automatic Language Detection",
description="Upload an audio file, and the system will automatically detect the language and transcribe it."
)
# Launch the Gradio interface
iface.launch() |