import gradio as gr import transformers #def predict(image): # predictions = pipeline(image) # return {p["label"]: p["score"] for p in predictions} from datasets import load_dataset import torch from transformers import pipeline def predict(speech): # load model and tokenizer torch.manual_seed(42) ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") audio_file = ds[0]["audio"]["path"] audio_classifier = pipeline( task="audio-classification", model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition") preds = audio_classifier(audio_file) return [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds] demo = gr.Interface(fn=predict, inputs='texts' outputs="texts") demo.launch() #gr.Interface( # predict, # inputs=gr.inputs.speech(label="Upload", type="filepath"), # outputs=gr.outputs.Label(num_top_classes=2), # title="Audio", #).launch()