AIVoice5 / app.py
dschandra's picture
Update app.py
e2c4a3d verified
from flask import Flask, render_template_string, request, jsonify
import speech_recognition as sr
from tempfile import NamedTemporaryFile
import os
import ffmpeg
import logging
from werkzeug.exceptions import BadRequest
# Initialize Flask App
app = Flask(__name__)
# Set up logging
logging.basicConfig(level=logging.INFO)
# Initialize conversation state
user_order = [] # Stores the current order
# Define food items (menu) with only 10 items
menu_items = {
'Veg': ["Vegetable Biryani"], # 1 Veg item
'Non-Veg': ["Butter Chicken"], # 1 Non-Veg item
'Both': ["Paneer Butter Masala", "Chicken Biryani"], # 1 Veg + 1 Non-Veg item
'Drinks': ["Lassi", "Milkshake"], # 2 Drinks
'Desserts': ["Gulab Jamun", "Ice Cream"] # 2 Desserts
}
# HTML Template for Frontend
html_code = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Dining Assistant</title>
<style>
body {
font-family: Arial, sans-serif;
background-color: #f4f4f9;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 100vh;
margin: 0;
}
h1 {
color: #333;
}
.mic-button {
width: 80px;
height: 80px;
border-radius: 50%;
background-color: #007bff;
color: white;
font-size: 24px;
border: none;
display: flex;
align-items: center;
justify-content: center;
cursor: pointer;
box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
transition: background-color 0.3s;
}
.mic-button:hover {
background-color: #0056b3;
}
.status {
margin-top: 20px;
font-size: 18px;
color: #666;
}
.response {
margin-top: 20px;
padding: 10px;
background-color: #fff;
border: 1px solid #ddd;
border-radius: 5px;
box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
width: 300px;
text-align: center;
}
</style>
</head>
<body>
<h1>AI Dining Assistant</h1>
<button class="mic-button" id="mic-button">🎤</button>
<div class="status" id="status">Press the mic button to start the conversation...</div>
<div class="response" id="response" style="display: none;">Response will appear here...</div>
<script>
const micButton = document.getElementById('mic-button');
const status = document.getElementById('status');
const response = document.getElementById('response');
let mediaRecorder;
let audioChunks = [];
let isListening = false;
micButton.addEventListener('click', () => {
if (!isListening) {
isListening = true;
startConversation();
}
});
function startConversation() {
status.textContent = 'Please listen to the instructions. Once you are ready, I will start listening to your commands.';
showInstructions();
}
function showInstructions() {
setTimeout(() => {
status.textContent = 'Listening...';
response.style.display = 'none';
startListening();
}, 3000); // Wait 3 seconds to ensure user reads instructions
}
function startListening() {
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' });
mediaRecorder.start();
audioChunks = [];
mediaRecorder.ondataavailable = event => audioChunks.push(event.data);
mediaRecorder.onstop = async () => {
status.textContent = 'Processing...';
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
const formData = new FormData();
formData.append('audio', audioBlob);
try {
const result = await fetch('/process-audio', { method: 'POST', body: formData });
const data = await result.json();
response.textContent = data.response;
response.style.display = 'block';
try {
const utterance = new SpeechSynthesisUtterance(data.response);
speechSynthesis.speak(utterance);
utterance.onend = () => {
console.log("Speech synthesis completed.");
};
utterance.onerror = (e) => {
console.error("Speech synthesis error:", e.error);
status.textContent = 'Error with speech output.';
};
} catch (speechError) {
console.error("Speech synthesis not supported or failed:", speechError);
response.textContent = "Speech output unavailable. Please check your browser.";
}
if (data.response.includes("Goodbye")) {
status.textContent = 'Conversation ended. Press the mic button to start again.';
isListening = false;
} else {
status.textContent = 'Listening...';
setTimeout(startListening, 1000);
}
} catch (error) {
response.textContent = 'Error occurred. Please try again.';
response.style.display = 'block';
status.textContent = 'Press the mic button to restart the conversation.';
isListening = false;
}
};
setTimeout(() => mediaRecorder.stop(), 10000); // Wait for 10 seconds of speaking time
}).catch(() => {
status.textContent = 'Microphone access denied.';
isListening = false;
});
}
</script>
</body>
</html>
"""
@app.route('/')
def index():
return render_template_string(html_code)
@app.route('/process-audio', methods=['POST'])
def process_audio():
global user_order
try:
# Validate audio file
audio_file = request.files.get('audio')
if not audio_file:
raise BadRequest("No audio file provided.")
temp_file = NamedTemporaryFile(delete=False, suffix=".webm")
audio_file.save(temp_file.name)
logging.info(f"Saved input audio to {temp_file.name}")
if os.path.getsize(temp_file.name) == 0:
raise BadRequest("Uploaded audio file is empty.")
# Convert audio to PCM WAV format (16kHz, mono)
converted_file = NamedTemporaryFile(delete=False, suffix=".wav")
try:
ffmpeg.input(temp_file.name).output(
converted_file.name, acodec='pcm_s16le', ac=1, ar='16000'
).run(overwrite_output=True)
except Exception as ffmpeg_error:
logging.error(f"FFmpeg conversion error: {str(ffmpeg_error)}")
return jsonify({"response": "Audio conversion failed. Please try again."})
logging.info(f"Converted audio saved to {converted_file.name}")
# Recognize speech
recognizer = sr.Recognizer()
with sr.AudioFile(converted_file.name) as source:
audio_data = recognizer.record(source)
try:
command = recognizer.recognize_google(audio_data)
logging.info(f"Recognized command: {command}")
response = process_command(command)
except sr.RequestError as e:
logging.error(f"Error with Google Speech Recognition service: {e}")
response = "Sorry, there was an issue with the speech recognition service."
return jsonify({"response": response})
except BadRequest as br:
logging.error(f"Bad request error: {br}")
return jsonify({"response": f"Bad Request: {str(br)}"})
except Exception as e:
logging.error(f"Error processing audio: {e}")
return jsonify({"response": f"An error occurred: {str(e)}"})
finally:
# Clean up temporary files
try:
if os.path.exists(temp_file.name):
os.unlink(temp_file.name)
if os.path.exists(converted_file.name):
os.unlink(converted_file.name)
except Exception as cleanup_error:
logging.error(f"Error cleaning up files: {cleanup_error}")
def process_command(command):
global user_order
# Normalize the command to lowercase
command = command.lower()
# Show the menu without categorizing the items
if "menu" in command or "what’s the menu" in command or "Show me the menu" in command:
menu_response = (
"Here are the available food items: "
"Vegetable Biryani, Butter Chicken, Paneer Butter Masala, Chicken Biryani, "
"Lassi, Milkshake, Gulab Jamun, Ice Cream. "
"Please let me know what you'd like to add to your order."
)
return menu_response
# Add item to order
elif "add" in command:
item_to_add = command.split("add")[-1].strip()
# Check if the item is in the menu
if item_to_add in menu_items["Veg"] or item_to_add in menu_items["Non-Veg"] or item_to_add in menu_items["Both"] or item_to_add in menu_items["Drinks"] or item_to_add in menu_items["Desserts"]:
user_order.append(item_to_add)
return f"{item_to_add} has been added to your order. Would you like to add more items?"
else:
return f"Sorry, {item_to_add} is not available in the menu. Please choose from the available items."
# Show current order
elif "show my order" in command or "what's my order" in command or "what's in my order" in command:
if user_order:
return "Your current order includes: " + ", ".join(user_order)
else:
return "You haven't added anything to your order yet."
# Place order (Ask for confirmation)
elif "place order" in command or "confirm order" in command:
if user_order:
return f"You have the following items in your order: {', '.join(user_order)}. Would you like to confirm? (Say 'yes' to confirm, 'no' to cancel)"
else:
return "You haven't added anything to your order yet. Please add some items first."
# Final confirmation (user says yes to confirm the order)
elif "yes" in command:
if user_order:
return "Your order has been confirmed and sent to the kitchen. Thank you for ordering!"
else:
return "Please add some items to your order before confirming."
# Handle unrecognized commands
return (
"Sorry, I didn’t understand your request. You can say things like:\n"
"- Show me the menu\n"
"- Add [item] to my order\n"
"- Show my order\n"
"- Place the order"
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)