Spaces:
Sleeping
Sleeping
File size: 2,509 Bytes
16ff511 6a91da6 bae72eb 6a91da6 85eedc6 6a91da6 85eedc6 6a91da6 85eedc6 6a91da6 85eedc6 6a91da6 85eedc6 6a91da6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import spaces
import time
import os
import torch
import gradio as gr
from transformers.pipelines import pipeline
import utils
from config import (
MODEL_PATHS,
SUPPORTED_LANGUAGES,
CUSTOM_CSS,
)
# set language here: available are en, de and lb
LANGUAGE = "lb"
if LANGUAGE not in SUPPORTED_LANGUAGES:
print(f"language ({LANGUAGE}) not supported. Use one of {SUPPORTED_LANGUAGES}")
exit()
else:
MODEL_PATH = MODEL_PATHS[LANGUAGE]
_asr_pipeline = None
@spaces.GPU
def transcribe_gradio(audio_path: str | None) -> str:
if not audio_path:
return "β οΈ Please record something or choose a file first."
global _asr_pipeline
if _asr_pipeline is None:
_asr_pipeline = pipeline(
"automatic-speech-recognition",
model=MODEL_PATH,
device=0 if torch.cuda.is_available() else -1,
chunk_length_s=30,
stride_length_s=(4, 2),
batch_size=8,
token=os.getenv("HF_TOKEN"),
)
start = time.time()
try:
result = _asr_pipeline(audio_path)
transcript = result["text"] if isinstance(result, dict) else str(result)
except Exception as err:
return f"β {err}"
runtime = time.time() - start
return f"{transcript}\n\nβ Inference time: {runtime:.2f} s"
# gradio interface
with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo:
gr.Markdown("""
# ποΈ Speech-to-Text Demo β Wave2Vec (Luxembourgish)
Use **Record** to capture speech live or **Upload** to select an audio file (.wav, .mp3, .flac).
Hit **Transcribe** to convert your recording into text, and **Clear** to reset both fields.
""")
with gr.Row():
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Input audio",
autoplay=False,
)
output_text = gr.Textbox(
label="Transcript",
placeholder="Your transcript will appear here β¦",
show_copy_button=True,
lines=10,
)
with gr.Row(equal_height=True, elem_classes="centered-row") as row:
transcribe_btn = gr.Button("Transcribe β¨", scale=0)
clear_btn = gr.ClearButton(
[audio_input, output_text], scale=0, elem_classes="clear-btn"
)
transcribe_btn.click(transcribe_gradio, inputs=audio_input, outputs=output_text)
if __name__ == "__main__":
demo.launch()
|