Spaces:
Sleeping
Sleeping
File size: 1,546 Bytes
88dc3ba d16271b 88dc3ba 335cb84 eb4d23a e416e8e 527e644 ce7e2d6 e416e8e 88dc3ba e416e8e 630d7c9 e416e8e 88dc3ba e416e8e 527e644 e416e8e 527e644 e416e8e 527e644 e416e8e 527e644 e416e8e 527e644 e416e8e 527e644 e416e8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import subprocess
import gradio as gr # Add this import statement
subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
subprocess.run(["pip", "install", "gradio", "--upgrade"])
subprocess.run(["pip", "install", "soundfile"])
subprocess.run(["pip", "install", "numpy"])
subprocess.run(["pip", "install", "pydub"])
subprocess.run(["pip", "install", "openai"])
import gradio as gr
import openai
import soundfile as sf
import numpy as np
from pydub import AudioSegment
from io import BytesIO
# Set your OpenAI API key
openai.api_key = "YOUR_OPENAI_API_KEY"
# Whisper ASR model
whisper_model = "whisper-small"
# Define the Gradio interface
iface = gr.Interface(
fn=None, # To be defined later
inputs=gr.Audio(),
outputs=gr.Textbox(),
live=True,
)
# Define the function for ASR
def transcribe_audio(audio_data):
# Convert the audio data to a suitable format
audio = AudioSegment.from_file(BytesIO(audio_data), format="wav")
audio.export("temp.wav", format="wav")
# Load the audio file using soundfile
audio_array, _ = sf.read("temp.wav")
# Perform ASR using OpenAI's Whisper
response = openai.Completion.create(
engine=whisper_model,
audio_input=audio_array.tolist(),
content_type="audio/wav",
)
# Extract the transcribed text from the response
transcription = response["choices"][0]["text"].strip()
return transcription
# Set the function for the Gradio interface
iface.fn = transcribe_audio
# Launch the Gradio app
iface.launch()
|