AIVoice6

Runtime error

App Files Files Community

AIVoice6 / app.py

dschandra

Update app.py

46ee408 verified 9 months ago

raw

history blame

2.09 kB

	# Import required libraries
	from faster_whisper import WhisperModel
	from transformers import pipeline
	from bark import generate_audio
	from flask import Flask, request, jsonify

	# Initialize Flask app
	app = Flask(__name__)

	# Load models
	speech_model = WhisperModel("tiny", device="cuda", compute_type="float16")
	nlp_model = pipeline("text-generation", model="gpt-3.5-turbo")

	@app.route('/process_audio', methods=['POST'])
	def process_audio():
	try:
	# Step 1: Receive the audio file from the user
	audio_file = request.files['audio']
	audio_path = f"./temp/{audio_file.filename}"
	audio_file.save(audio_path)

	# Step 2: Transcribe the audio to text
	transcription = transcribe_audio(audio_path)

	# Step 3: Generate a response based on the transcription
	response_text = generate_response(transcription)

	# Step 4: Synthesize speech from the response text
	response_audio = synthesize_speech(response_text)

	# Save the response audio to a file
	response_audio_path = f"./temp/response_audio.wav"
	response_audio.export(response_audio_path, format="wav")

	return jsonify({
	"transcription": transcription,
	"response_text": response_text,
	"response_audio_path": response_audio_path
	})
	except Exception as e:
	return jsonify({"error": str(e)}), 500

	def transcribe_audio(audio_path):
	"""
	Transcribe audio using Whisper.
	"""
	segments, info = speech_model.transcribe(audio_path)
	transcription = " ".join([segment.text for segment in segments])
	return transcription

	def generate_response(user_input):
	"""
	Generate text response using GPT-3.5-turbo.
	"""
	response = nlp_model(user_input, max_length=100, do_sample=True)
	return response[0]['generated_text']

	def synthesize_speech(text):
	"""
	Generate speech audio using Bark.
	"""
	audio_array = generate_audio(text)
	return audio_array

	# Run the app
	if __name__ == "__main__":
	app.run(debug=True, host="0.0.0.0", port=5000)