Spaces:

Athspi
/

Ai-audio

Sleeping

File size: 1,340 Bytes

305c59b
f8b81a1
1e0f1bc
f8b81a1
d30da85
1e0f1bc
d30da85
1e0f1bc
d30da85
 
 
1e0f1bc
d30da85
 
 
 
1e0f1bc
 
 
 
d30da85
1e0f1bc
d30da85
 
 
 
 
 
 
 
 
1e0f1bc

import gradio as gr
from faster_whisper import WhisperModel

# Load the Faster Whisper model
model = WhisperModel("large-v3", device="cpu")  # Use "cuda" for GPU

# Define the transcription function
def transcribe_audio(audio_file):
    """
    Transcribes the audio file using the Faster Whisper model.
    """
    try:
        segments, info = model.transcribe(audio_file, beam_size=5)  # Adjust beam_size as needed
        transcription = "\n".join(
            [f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}" for segment in segments]
        )
        return transcription
    except Exception as e:
        return f"Error: {str(e)}"

# Create the Gradio interface
interface = gr.Interface(
    fn=transcribe_audio,  # Function to process the input
    inputs=gr.Audio(source="upload", type="filepath", label="Upload Audio"),  # Input: Audio file
    outputs=gr.Textbox(label="Transcription"),  # Output: Textbox for the transcription
    title="Audio-to-Text Transcription",
    description=(
        "Upload an audio file and get the transcription using the Faster Whisper model "
        "large-v3. Supports high-quality transcription with beam search."
    ),
    allow_flagging="never",
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch(server_name="0.0.0.0", server_port=7860, share=True)