practiceAI / app.py
MusIre's picture
Update app.py
e416e8e
raw
history blame
1.55 kB
import subprocess
import gradio as gr # Add this import statement
subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
subprocess.run(["pip", "install", "gradio", "--upgrade"])
subprocess.run(["pip", "install", "soundfile"])
subprocess.run(["pip", "install", "numpy"])
subprocess.run(["pip", "install", "pydub"])
subprocess.run(["pip", "install", "openai"])
import gradio as gr
import openai
import soundfile as sf
import numpy as np
from pydub import AudioSegment
from io import BytesIO
# Set your OpenAI API key
openai.api_key = "YOUR_OPENAI_API_KEY"
# Whisper ASR model
whisper_model = "whisper-small"
# Define the Gradio interface
iface = gr.Interface(
fn=None, # To be defined later
inputs=gr.Audio(),
outputs=gr.Textbox(),
live=True,
)
# Define the function for ASR
def transcribe_audio(audio_data):
# Convert the audio data to a suitable format
audio = AudioSegment.from_file(BytesIO(audio_data), format="wav")
audio.export("temp.wav", format="wav")
# Load the audio file using soundfile
audio_array, _ = sf.read("temp.wav")
# Perform ASR using OpenAI's Whisper
response = openai.Completion.create(
engine=whisper_model,
audio_input=audio_array.tolist(),
content_type="audio/wav",
)
# Extract the transcribed text from the response
transcription = response["choices"][0]["text"].strip()
return transcription
# Set the function for the Gradio interface
iface.fn = transcribe_audio
# Launch the Gradio app
iface.launch()