Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import time | |
| import io | |
| import librosa | |
| import torch | |
| import soundfile as sf | |
| from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
| #Instantiating the model object. | |
| model = AutoModelForSpeechSeq2Seq.from_pretrained(pretrained_model_name_or_path= "distil-whisper/distil-large-v2", | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| use_safetensors=True) | |
| model = model.to("cuda") | |
| #Instantiating the processor object. | |
| processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path="distil-whisper/distil-large-v2") | |
| #Instantiating the transformer class' pipeline object. | |
| pipe = pipeline(task="automatic-speech-recognition", | |
| model="distil-whisper/distil-large-v2", | |
| tokenizer=processor.tokenizer, | |
| feature_extractor=processor.feature_extractor, | |
| max_new_tokens=128, | |
| chunk_length_s=30, | |
| batch_size=16, | |
| return_timestamps=True, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device="cuda" | |
| ) | |
| #Defining speech-to-text function. | |
| def convert(audio, state=""): | |
| """ | |
| This function performs speech to text conversion and will be used in Gradio's Interface function. | |
| Parameters: | |
| - audio: audio data as a bytes-like object. | |
| - state: a string representing the accumulated text from previous conversions. | |
| """ | |
| time.sleep(3) | |
| try: | |
| result = pipe(audio) | |
| transcribed_text = result['text'] | |
| state += transcribed_text + " " | |
| except Exception as e: | |
| return f"Error processing audio: Please start recording!", state | |
| return state, state | |
| #Instantiating Gradio Interface. | |
| gr_interface = gr.Interface( | |
| fn = convert, | |
| title = "Automatic Speech-to-Text", | |
| description = "### Record your speech and watch it get converted to text!", | |
| inputs = [ | |
| gr.Audio( | |
| label="Please Record Your Speech Here!", | |
| sources="microphone", | |
| type="filepath"), | |
| "state"], | |
| outputs = [ | |
| "textbox", | |
| "state" | |
| ], | |
| live=True | |
| ) | |
| #Launching the app (share=True). | |
| gr_interface.launch() |