Seravian commited on
Commit
826552f
·
verified ·
1 Parent(s): 2e07738

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -0
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from pydub import AudioSegment
4
+ import os
5
+
6
+ # Load Whisper pipeline
7
+ asr=pipeline("audio-classification", model="firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3")
8
+
9
+ def convert_audio_to_wav(audio_path):
10
+ """Convert audio to WAV format"""
11
+ audio = AudioSegment.from_file(audio_path)
12
+ wav_path = audio_path + ".wav"
13
+ audio.export(wav_path, format="wav")
14
+ return wav_path
15
+
16
+ def transcribe(audio_path):
17
+ wav_path = convert_audio_to_wav(audio_path)
18
+ result = asr(wav_path)
19
+ os.remove(wav_path)
20
+ return result[0]
21
+
22
+ # Gradio interface (DO NOT use share=True)
23
+ demo = gr.Interface(
24
+ fn=transcribe,
25
+ inputs=gr.Audio(type="filepath", label="Upload Audio (.m4a, .mp3, .wav...)"),
26
+ outputs=gr.Textbox(label="Transcription"),
27
+ title="Whisper Speech emotion Recognition",
28
+ description="Transcribes most audio formats using Whisper."
29
+ )
30
+
31
+ # Just launch it — no share=True!
32
+ demo.launch()
33
+
34
+
35
+