soulspeak_trial / app.py
amannnnn's picture
Update app.py
25d9a70 verified
import gradio as gr
from transformers import pipeline
from transformers import AutoProcessor, SeamlessM4Tv2Model
input_audio = gr.Audio(sources=['microphone','upload'], label='Speak with me...', show_label=True, interactive=True, format=['wav'])
def voice_to_emotion(audio):
processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
pipe = pipeline("automatic-speech-recognition", model="facebook/seamless-m4t-v2-large")
emotion = pipeline('sentiment-analysis', model='arpanghoshal/EmoRoBERTa')
classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
output_tokens = model.generate(audio, tgt_lang="eng", generate_speech=False)
translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
model_outputs = classifier(translated_text_from_audio)
res = ''
for each in model_outputs[0]:
res = str(each['label'])
return res
demo = gr.Interface(
fn=voice_to_emotion,
inputs = input_audio,
outputs = "textbox")
if __name__ == "__main__":
demo.launch()