import subprocess import gradio as gr # Add this import statement subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"]) subprocess.run(["pip", "install", "gradio", "--upgrade"]) subprocess.run(["pip", "install", "soundfile"]) subprocess.run(["pip", "install", "numpy"]) subprocess.run(["pip", "install", "pydub"]) subprocess.run(["pip", "install", "openai"]) import gradio as gr import openai import soundfile as sf import numpy as np from pydub import AudioSegment from io import BytesIO # Set your OpenAI API key openai.api_key = "YOUR_OPENAI_API_KEY" # Whisper ASR model whisper_model = "whisper-small" # Define the Gradio interface iface = gr.Interface( fn=None, # To be defined later inputs=gr.Audio(), outputs=gr.Textbox(), live=True, ) # Define the function for ASR def transcribe_audio(audio_data): # Convert the audio data to a suitable format audio = AudioSegment.from_file(BytesIO(audio_data), format="wav") audio.export("temp.wav", format="wav") # Load the audio file using soundfile audio_array, _ = sf.read("temp.wav") # Perform ASR using OpenAI's Whisper response = openai.Completion.create( engine=whisper_model, audio_input=audio_array.tolist(), content_type="audio/wav", ) # Extract the transcribed text from the response transcription = response["choices"][0]["text"].strip() return transcription # Set the function for the Gradio interface iface.fn = transcribe_audio # Launch the Gradio app iface.launch()