Spaces:
Sleeping
Sleeping
import subprocess | |
import gradio as gr # Add this import statement | |
subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"]) | |
subprocess.run(["pip", "install", "gradio", "--upgrade"]) | |
subprocess.run(["pip", "install", "soundfile"]) | |
subprocess.run(["pip", "install", "numpy"]) | |
subprocess.run(["pip", "install", "pydub"]) | |
subprocess.run(["pip", "install", "openai"]) | |
import gradio as gr | |
import openai | |
import soundfile as sf | |
import numpy as np | |
from pydub import AudioSegment | |
from io import BytesIO | |
# Set your OpenAI API key | |
openai.api_key = "YOUR_OPENAI_API_KEY" | |
# Whisper ASR model | |
whisper_model = "whisper-small" | |
# Define the Gradio interface | |
iface = gr.Interface( | |
fn=None, # To be defined later | |
inputs=gr.Audio(), | |
outputs=gr.Textbox(), | |
live=True, | |
) | |
# Define the function for ASR | |
def transcribe_audio(audio_data): | |
# Convert the audio data to a suitable format | |
audio = AudioSegment.from_file(BytesIO(audio_data), format="wav") | |
audio.export("temp.wav", format="wav") | |
# Load the audio file using soundfile | |
audio_array, _ = sf.read("temp.wav") | |
# Perform ASR using OpenAI's Whisper | |
response = openai.Completion.create( | |
engine=whisper_model, | |
audio_input=audio_array.tolist(), | |
content_type="audio/wav", | |
) | |
# Extract the transcribed text from the response | |
transcription = response["choices"][0]["text"].strip() | |
return transcription | |
# Set the function for the Gradio interface | |
iface.fn = transcribe_audio | |
# Launch the Gradio app | |
iface.launch() | |