Spaces:

MusIre
/

practiceAI

Sleeping

File size: 1,546 Bytes

88dc3ba
d16271b
88dc3ba
335cb84
eb4d23a
e416e8e
 
 
 
527e644
ce7e2d6
e416e8e
 
 
 
 
88dc3ba
e416e8e
 
630d7c9
e416e8e
 
88dc3ba
e416e8e
 
 
 
 
 
 
527e644
e416e8e
 
 
 
 
 
 
 
527e644
e416e8e
 
 
 
 
 
527e644
e416e8e
 
527e644
e416e8e
527e644
e416e8e
 
527e644
 
e416e8e

import subprocess
import gradio as gr  # Add this import statement

subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
subprocess.run(["pip", "install", "gradio", "--upgrade"])
subprocess.run(["pip", "install", "soundfile"])
subprocess.run(["pip", "install", "numpy"])
subprocess.run(["pip", "install", "pydub"])
subprocess.run(["pip", "install", "openai"])

import gradio as gr
import openai
import soundfile as sf
import numpy as np
from pydub import AudioSegment
from io import BytesIO

# Set your OpenAI API key
openai.api_key = "YOUR_OPENAI_API_KEY"

# Whisper ASR model
whisper_model = "whisper-small"

# Define the Gradio interface
iface = gr.Interface(
    fn=None,  # To be defined later
    inputs=gr.Audio(),
    outputs=gr.Textbox(),
    live=True,
)

# Define the function for ASR
def transcribe_audio(audio_data):
    # Convert the audio data to a suitable format
    audio = AudioSegment.from_file(BytesIO(audio_data), format="wav")
    audio.export("temp.wav", format="wav")
    
    # Load the audio file using soundfile
    audio_array, _ = sf.read("temp.wav")

    # Perform ASR using OpenAI's Whisper
    response = openai.Completion.create(
        engine=whisper_model,
        audio_input=audio_array.tolist(),
        content_type="audio/wav",
    )

    # Extract the transcribed text from the response
    transcription = response["choices"][0]["text"].strip()

    return transcription

# Set the function for the Gradio interface
iface.fn = transcribe_audio

# Launch the Gradio app
iface.launch()