botarioAcc's picture
update app.py
85eedc6 verified
import spaces
import time
import os
import torch
import gradio as gr
from transformers.pipelines import pipeline
import utils
from config import (
MODEL_PATHS,
SUPPORTED_LANGUAGES,
CUSTOM_CSS,
)
# set language here: available are en, de and lb
LANGUAGE = "lb"
if LANGUAGE not in SUPPORTED_LANGUAGES:
print(f"language ({LANGUAGE}) not supported. Use one of {SUPPORTED_LANGUAGES}")
exit()
else:
MODEL_PATH = MODEL_PATHS[LANGUAGE]
_asr_pipeline = None
@spaces.GPU
def transcribe_gradio(audio_path: str | None) -> str:
if not audio_path:
return "⚠️ Please record something or choose a file first."
global _asr_pipeline
if _asr_pipeline is None:
_asr_pipeline = pipeline(
"automatic-speech-recognition",
model=MODEL_PATH,
device=0 if torch.cuda.is_available() else -1,
chunk_length_s=30,
stride_length_s=(4, 2),
batch_size=8,
token=os.getenv("HF_TOKEN"),
)
start = time.time()
try:
result = _asr_pipeline(audio_path)
transcript = result["text"] if isinstance(result, dict) else str(result)
except Exception as err:
return f"❌ {err}"
runtime = time.time() - start
return f"{transcript}\n\nβŒ› Inference time: {runtime:.2f} s"
# gradio interface
with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo:
gr.Markdown("""
# πŸŽ™οΈ Speech-to-Text Demo β€” Wave2Vec (Luxembourgish)
Use **Record** to capture speech live or **Upload** to select an audio file (.wav, .mp3, .flac).
Hit **Transcribe** to convert your recording into text, and **Clear** to reset both fields.
""")
with gr.Row():
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Input audio",
autoplay=False,
)
output_text = gr.Textbox(
label="Transcript",
placeholder="Your transcript will appear here …",
show_copy_button=True,
lines=10,
)
with gr.Row(equal_height=True, elem_classes="centered-row") as row:
transcribe_btn = gr.Button("Transcribe ✨", scale=0)
clear_btn = gr.ClearButton(
[audio_input, output_text], scale=0, elem_classes="clear-btn"
)
transcribe_btn.click(transcribe_gradio, inputs=audio_input, outputs=output_text)
if __name__ == "__main__":
demo.launch()