Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from wenet.cli.model import load_model | |
| from huggingface_hub import hf_hub_download | |
| import spaces | |
| REPO_ID = "Revai/reverb-asr" | |
| files = ['reverb_asr_v1.jit.zip', 'tk.units.txt'] | |
| downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files] | |
| model = load_model(downloaded_files[0], downloaded_files[1]) | |
| def process_cat_embs(cat_embs): | |
| device = "gpu" | |
| cat_embs = torch.tensor([float(c) for c in cat_embs.split(',')]).to(device) | |
| return cat_embs | |
| def recognition(audio, style=0): | |
| if not audio: | |
| return "Input Error! Please enter one audio!" | |
| cat_embs = process_cat_embs(f'{style},{1-style}') | |
| result = model.transcribe(audio, cat_embs=cat_embs) | |
| if not result or 'text' not in result: | |
| return "ERROR! No text output! Please try again!" | |
| text_output = result['text'].replace('β', ' ') | |
| return text_output | |
| # Gradio UI Components | |
| inputs = [ | |
| gr.Audio(source="microphone", type="filepath", label='Input audio'), | |
| gr.Slider(0, 1, value=0, label="Transcription Style", info="Adjust between non-verbatim (0) and verbatim (1) transcription") | |
| ] | |
| output = gr.Textbox(label="Output Text") | |
| # UI and Interface | |
| iface = gr.Interface( | |
| fn=recognition, | |
| inputs=inputs, | |
| outputs=output, | |
| title="Reverb ASR Transcription", | |
| description="Supports verbatim and non-verbatim transcription styles.", | |
| article="<p style='text-align: center'><a href='https://rev.com' target='_blank'>Learn more about Rev</a></p>", | |
| theme='huggingface' | |
| ) | |
| iface.launch(enable_queue=True) | |