Spaces:
Sleeping
Sleeping
import spaces | |
import time | |
import os | |
import torch | |
import gradio as gr | |
from transformers.pipelines import pipeline | |
import utils | |
from config import ( | |
MODEL_PATHS, | |
SUPPORTED_LANGUAGES, | |
CUSTOM_CSS, | |
) | |
# set language here: available are en, de and lb | |
LANGUAGE = "lb" | |
if LANGUAGE not in SUPPORTED_LANGUAGES: | |
print(f"language ({LANGUAGE}) not supported. Use one of {SUPPORTED_LANGUAGES}") | |
exit() | |
else: | |
MODEL_PATH = MODEL_PATHS[LANGUAGE] | |
_asr_pipeline = None | |
def transcribe_gradio(audio_path: str | None) -> str: | |
if not audio_path: | |
return "β οΈ Please record something or choose a file first." | |
global _asr_pipeline | |
if _asr_pipeline is None: | |
_asr_pipeline = pipeline( | |
"automatic-speech-recognition", | |
model=MODEL_PATH, | |
device=0 if torch.cuda.is_available() else -1, | |
chunk_length_s=30, | |
stride_length_s=(4, 2), | |
batch_size=8, | |
token=os.getenv("HF_TOKEN"), | |
) | |
start = time.time() | |
try: | |
result = _asr_pipeline(audio_path) | |
transcript = result["text"] if isinstance(result, dict) else str(result) | |
except Exception as err: | |
return f"β {err}" | |
runtime = time.time() - start | |
return f"{transcript}\n\nβ Inference time: {runtime:.2f} s" | |
# gradio interface | |
with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo: | |
gr.Markdown(""" | |
# ποΈ Speech-to-Text Demo β Wave2Vec (Luxembourgish) | |
Use **Record** to capture speech live or **Upload** to select an audio file (.wav, .mp3, .flac). | |
Hit **Transcribe** to convert your recording into text, and **Clear** to reset both fields. | |
""") | |
with gr.Row(): | |
audio_input = gr.Audio( | |
sources=["microphone", "upload"], | |
type="filepath", | |
label="Input audio", | |
autoplay=False, | |
) | |
output_text = gr.Textbox( | |
label="Transcript", | |
placeholder="Your transcript will appear here β¦", | |
show_copy_button=True, | |
lines=10, | |
) | |
with gr.Row(equal_height=True, elem_classes="centered-row") as row: | |
transcribe_btn = gr.Button("Transcribe β¨", scale=0) | |
clear_btn = gr.ClearButton( | |
[audio_input, output_text], scale=0, elem_classes="clear-btn" | |
) | |
transcribe_btn.click(transcribe_gradio, inputs=audio_input, outputs=output_text) | |
if __name__ == "__main__": | |
demo.launch() | |