AIVoice5

Sleeping

App Files Files Community

AIVoice5 / app.py

dschandra

Update app.py

e2c4a3d verified 8 months ago

raw

history blame contribute delete

11.6 kB

	from flask import Flask, render_template_string, request, jsonify
	import speech_recognition as sr
	from tempfile import NamedTemporaryFile
	import os
	import ffmpeg
	import logging
	from werkzeug.exceptions import BadRequest

	# Initialize Flask App
	app = Flask(__name__)

	# Set up logging
	logging.basicConfig(level=logging.INFO)

	# Initialize conversation state
	user_order = [] # Stores the current order

	# Define food items (menu) with only 10 items
	menu_items = {
	'Veg': ["Vegetable Biryani"], # 1 Veg item
	'Non-Veg': ["Butter Chicken"], # 1 Non-Veg item
	'Both': ["Paneer Butter Masala", "Chicken Biryani"], # 1 Veg + 1 Non-Veg item
	'Drinks': ["Lassi", "Milkshake"], # 2 Drinks
	'Desserts': ["Gulab Jamun", "Ice Cream"] # 2 Desserts
	}

	# HTML Template for Frontend
	html_code = """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>AI Dining Assistant</title>
	<style>
	body {
	font-family: Arial, sans-serif;
	background-color: #f4f4f9;
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	height: 100vh;
	margin: 0;
	}
	h1 {
	color: #333;
	}
	.mic-button {
	width: 80px;
	height: 80px;
	border-radius: 50%;
	background-color: #007bff;
	color: white;
	font-size: 24px;
	border: none;
	display: flex;
	align-items: center;
	justify-content: center;
	cursor: pointer;
	box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
	transition: background-color 0.3s;
	}
	.mic-button:hover {
	background-color: #0056b3;
	}
	.status {
	margin-top: 20px;
	font-size: 18px;
	color: #666;
	}
	.response {
	margin-top: 20px;
	padding: 10px;
	background-color: #fff;
	border: 1px solid #ddd;
	border-radius: 5px;
	box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
	width: 300px;
	text-align: center;
	}
	</style>
	</head>
	<body>
	<h1>AI Dining Assistant</h1>
	<button class="mic-button" id="mic-button">🎤</button>
	<div class="status" id="status">Press the mic button to start the conversation...</div>
	<div class="response" id="response" style="display: none;">Response will appear here...</div>
	<script>
	const micButton = document.getElementById('mic-button');
	const status = document.getElementById('status');
	const response = document.getElementById('response');
	let mediaRecorder;
	let audioChunks = [];
	let isListening = false;

	micButton.addEventListener('click', () => {
	if (!isListening) {
	isListening = true;
	startConversation();
	}
	});

	function startConversation() {
	status.textContent = 'Please listen to the instructions. Once you are ready, I will start listening to your commands.';
	showInstructions();
	}

	function showInstructions() {
	setTimeout(() => {
	status.textContent = 'Listening...';
	response.style.display = 'none';
	startListening();
	}, 3000); // Wait 3 seconds to ensure user reads instructions
	}

	function startListening() {
	navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
	mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' });
	mediaRecorder.start();
	audioChunks = [];

	mediaRecorder.ondataavailable = event => audioChunks.push(event.data);
	mediaRecorder.onstop = async () => {
	status.textContent = 'Processing...';

	const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
	const formData = new FormData();
	formData.append('audio', audioBlob);

	try {
	const result = await fetch('/process-audio', { method: 'POST', body: formData });
	const data = await result.json();
	response.textContent = data.response;
	response.style.display = 'block';

	try {
	const utterance = new SpeechSynthesisUtterance(data.response);
	speechSynthesis.speak(utterance);
	utterance.onend = () => {
	console.log("Speech synthesis completed.");
	};
	utterance.onerror = (e) => {
	console.error("Speech synthesis error:", e.error);
	status.textContent = 'Error with speech output.';
	};
	} catch (speechError) {
	console.error("Speech synthesis not supported or failed:", speechError);
	response.textContent = "Speech output unavailable. Please check your browser.";
	}

	if (data.response.includes("Goodbye")) {
	status.textContent = 'Conversation ended. Press the mic button to start again.';
	isListening = false;
	} else {
	status.textContent = 'Listening...';
	setTimeout(startListening, 1000);
	}
	} catch (error) {
	response.textContent = 'Error occurred. Please try again.';
	response.style.display = 'block';
	status.textContent = 'Press the mic button to restart the conversation.';
	isListening = false;
	}
	};
	setTimeout(() => mediaRecorder.stop(), 10000); // Wait for 10 seconds of speaking time
	}).catch(() => {
	status.textContent = 'Microphone access denied.';
	isListening = false;
	});
	}
	</script>
	</body>
	</html>
	"""

	@app.route('/')
	def index():
	return render_template_string(html_code)

	@app.route('/process-audio', methods=['POST'])
	def process_audio():
	global user_order
	try:
	# Validate audio file
	audio_file = request.files.get('audio')
	if not audio_file:
	raise BadRequest("No audio file provided.")

	temp_file = NamedTemporaryFile(delete=False, suffix=".webm")
	audio_file.save(temp_file.name)
	logging.info(f"Saved input audio to {temp_file.name}")

	if os.path.getsize(temp_file.name) == 0:
	raise BadRequest("Uploaded audio file is empty.")

	# Convert audio to PCM WAV format (16kHz, mono)
	converted_file = NamedTemporaryFile(delete=False, suffix=".wav")
	try:
	ffmpeg.input(temp_file.name).output(
	converted_file.name, acodec='pcm_s16le', ac=1, ar='16000'
	).run(overwrite_output=True)
	except Exception as ffmpeg_error:
	logging.error(f"FFmpeg conversion error: {str(ffmpeg_error)}")
	return jsonify({"response": "Audio conversion failed. Please try again."})

	logging.info(f"Converted audio saved to {converted_file.name}")

	# Recognize speech
	recognizer = sr.Recognizer()
	with sr.AudioFile(converted_file.name) as source:
	audio_data = recognizer.record(source)
	try:
	command = recognizer.recognize_google(audio_data)
	logging.info(f"Recognized command: {command}")
	response = process_command(command)
	except sr.RequestError as e:
	logging.error(f"Error with Google Speech Recognition service: {e}")
	response = "Sorry, there was an issue with the speech recognition service."

	return jsonify({"response": response})

	except BadRequest as br:
	logging.error(f"Bad request error: {br}")
	return jsonify({"response": f"Bad Request: {str(br)}"})

	except Exception as e:
	logging.error(f"Error processing audio: {e}")
	return jsonify({"response": f"An error occurred: {str(e)}"})

	finally:
	# Clean up temporary files
	try:
	if os.path.exists(temp_file.name):
	os.unlink(temp_file.name)
	if os.path.exists(converted_file.name):
	os.unlink(converted_file.name)
	except Exception as cleanup_error:
	logging.error(f"Error cleaning up files: {cleanup_error}")

	def process_command(command):
	global user_order

	# Normalize the command to lowercase
	command = command.lower()

	# Show the menu without categorizing the items
	if "menu" in command or "what’s the menu" in command or "Show me the menu" in command:
	menu_response = (
	"Here are the available food items: "
	"Vegetable Biryani, Butter Chicken, Paneer Butter Masala, Chicken Biryani, "
	"Lassi, Milkshake, Gulab Jamun, Ice Cream. "
	"Please let me know what you'd like to add to your order."
	)
	return menu_response

	# Add item to order
	elif "add" in command:
	item_to_add = command.split("add")[-1].strip()

	# Check if the item is in the menu
	if item_to_add in menu_items["Veg"] or item_to_add in menu_items["Non-Veg"] or item_to_add in menu_items["Both"] or item_to_add in menu_items["Drinks"] or item_to_add in menu_items["Desserts"]:
	user_order.append(item_to_add)
	return f"{item_to_add} has been added to your order. Would you like to add more items?"
	else:
	return f"Sorry, {item_to_add} is not available in the menu. Please choose from the available items."

	# Show current order
	elif "show my order" in command or "what's my order" in command or "what's in my order" in command:
	if user_order:
	return "Your current order includes: " + ", ".join(user_order)
	else:
	return "You haven't added anything to your order yet."

	# Place order (Ask for confirmation)
	elif "place order" in command or "confirm order" in command:
	if user_order:
	return f"You have the following items in your order: {', '.join(user_order)}. Would you like to confirm? (Say 'yes' to confirm, 'no' to cancel)"
	else:
	return "You haven't added anything to your order yet. Please add some items first."

	# Final confirmation (user says yes to confirm the order)
	elif "yes" in command:
	if user_order:
	return "Your order has been confirmed and sent to the kitchen. Thank you for ordering!"
	else:
	return "Please add some items to your order before confirming."

	# Handle unrecognized commands
	return (
	"Sorry, I didn’t understand your request. You can say things like:\n"
	"- Show me the menu\n"
	"- Add [item] to my order\n"
	"- Show my order\n"
	"- Place the order"
	)



	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)