Spaces:

MusIre
/

practiceAI

Sleeping

practiceAI / app.py

Update app.py

e416e8e over 1 year ago

1.55 kB

	import subprocess
	import gradio as gr # Add this import statement

	subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
	subprocess.run(["pip", "install", "gradio", "--upgrade"])
	subprocess.run(["pip", "install", "soundfile"])
	subprocess.run(["pip", "install", "numpy"])
	subprocess.run(["pip", "install", "pydub"])
	subprocess.run(["pip", "install", "openai"])

	import gradio as gr
	import openai
	import soundfile as sf
	import numpy as np
	from pydub import AudioSegment
	from io import BytesIO

	# Set your OpenAI API key
	openai.api_key = "YOUR_OPENAI_API_KEY"

	# Whisper ASR model
	whisper_model = "whisper-small"

	# Define the Gradio interface
	iface = gr.Interface(
	fn=None, # To be defined later
	inputs=gr.Audio(),
	outputs=gr.Textbox(),
	live=True,
	)

	# Define the function for ASR
	def transcribe_audio(audio_data):
	# Convert the audio data to a suitable format
	audio = AudioSegment.from_file(BytesIO(audio_data), format="wav")
	audio.export("temp.wav", format="wav")

	# Load the audio file using soundfile
	audio_array, _ = sf.read("temp.wav")

	# Perform ASR using OpenAI's Whisper
	response = openai.Completion.create(
	engine=whisper_model,
	audio_input=audio_array.tolist(),
	content_type="audio/wav",
	)

	# Extract the transcribed text from the response
	transcription = response["choices"][0]["text"].strip()

	return transcription

	# Set the function for the Gradio interface
	iface.fn = transcribe_audio

	# Launch the Gradio app
	iface.launch()