File size: 2,165 Bytes
305c59b
49d93f9
1a0ef3f
 
305c59b
1a0ef3f
 
 
 
 
 
 
 
 
 
 
 
 
305c59b
19bb2e9
1a0ef3f
 
 
8ff4639
1a0ef3f
3b2b2f2
8ff4639
1a0ef3f
8ff4639
3b2b2f2
1a0ef3f
3b2b2f2
 
 
 
 
1a0ef3f
 
 
 
8ff4639
3b2b2f2
 
 
8ff4639
 
 
305c59b
49d93f9
 
305c59b
19bb2e9
8ff4639
 
 
305c59b
 
49d93f9
8ff4639
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
from transformers import pipeline
from pydub import AudioSegment
import os

# Load a smaller Whisper model for faster transcription
model = pipeline("automatic-speech-recognition", model="openai/whisper-base")

def split_audio(filepath, chunk_length_ms=30000):
    """Split audio into chunks of `chunk_length_ms` milliseconds."""
    audio = AudioSegment.from_file(filepath)
    chunks = []
    for i in range(0, len(audio), chunk_length_ms):
        chunk = audio[i:i + chunk_length_ms]
        chunk_path = f"chunk_{i}.wav"
        chunk.export(chunk_path, format="wav")
        chunks.append(chunk_path)
    return chunks

def transcribe_audio(audio_file):
    # Split the audio into chunks
    chunks = split_audio(audio_file)
    
    # Transcribe each chunk and collect results
    transcriptions = []
    detected_language = None
    
    for chunk in chunks:
        # Enable language detection and transcription
        result = model(chunk, generate_kwargs={"task": "transcribe", "language": None})  # Let Whisper detect language
        transcriptions.append(result["text"])
        
        # Extract detected language from the result (if available)
        if "language" in result:
            detected_language = result["language"]
        
        os.remove(chunk)  # Clean up chunk files
    
    # Combine all transcriptions into one
    full_transcription = " ".join(transcriptions)
    
    # If no language was detected, set a default message
    if detected_language is None:
        detected_language = "unknown (language not detected)"
    
    # Return transcription and detected language
    return f"Detected Language: {detected_language}\n\nTranscription:\n{full_transcription}"

# Define the Gradio interface
iface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
    outputs=gr.Textbox(label="Transcription and Detected Language"),
    title="Audio Transcription with Automatic Language Detection",
    description="Upload an audio file, and the system will automatically detect the language and transcribe it."
)

# Launch the Gradio interface
iface.launch()