dschandra commited on
Commit
46ee408
·
verified ·
1 Parent(s): b1431be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -226
app.py CHANGED
@@ -1,239 +1,67 @@
1
- from flask import Flask, render_template_string, request, jsonify
2
- import speech_recognition as sr
3
- from tempfile import NamedTemporaryFile
4
- import os
5
- import ffmpeg
6
- import logging
7
- from werkzeug.exceptions import BadRequest
8
 
 
9
  app = Flask(__name__)
10
- logging.basicConfig(level=logging.INFO)
11
 
12
- # Global variables
13
- cart = [] # To store items and prices
14
- MENU = {
15
- "Biryani": {"Chicken Biryani": 250, "Veg Biryani": 200, "Mutton Biryani": 300},
16
- "Starters": {
17
- "Chicken Wings": 220,
18
- "Paneer Tikka": 180,
19
- "Fish Fingers": 250,
20
- "Spring Rolls": 160,
21
- },
22
- "Breads": {
23
- "Butter Naan": 50,
24
- "Garlic Naan": 60,
25
- "Roti": 40,
26
- "Lachha Paratha": 70,
27
- },
28
- "Curries": {
29
- "Butter Chicken": 300,
30
- "Paneer Butter Masala": 250,
31
- "Dal Tadka": 200,
32
- "Chicken Tikka Masala": 320,
33
- },
34
- "Drinks": {"Coke": 60, "Sprite": 60, "Mango Lassi": 80, "Masala Soda": 70},
35
- "Desserts": {
36
- "Gulab Jamun": 100,
37
- "Rasgulla": 90,
38
- "Ice Cream": 120,
39
- "Brownie with Ice Cream": 180,
40
- },
41
- }
42
 
43
- # HTML Template for Frontend
44
- html_code = """
45
- <!DOCTYPE html>
46
- <html lang="en">
47
- <head>
48
- <meta charset="UTF-8">
49
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
50
- <title>AI Dining Assistant</title>
51
- <style>
52
- body {
53
- font-family: Arial, sans-serif;
54
- text-align: center;
55
- background-color: #f4f4f9;
56
- }
57
- h1 {
58
- color: #333;
59
- }
60
- .mic-button {
61
- width: 80px;
62
- height: 80px;
63
- border-radius: 50%;
64
- background-color: #007bff;
65
- color: white;
66
- font-size: 24px;
67
- border: none;
68
- cursor: pointer;
69
- }
70
- .status, .response {
71
- margin-top: 20px;
72
- }
73
- </style>
74
- </head>
75
- <body>
76
- <h1>AI Dining Assistant</h1>
77
- <button class="mic-button" id="mic-button">🎤</button>
78
- <div class="status" id="status">Press the mic button to start...</div>
79
- <div class="response" id="response" style="display: none;">Response will appear here...</div>
80
- <script>
81
- const micButton = document.getElementById('mic-button');
82
- const status = document.getElementById('status');
83
- const response = document.getElementById('response');
84
- let isListening = false;
85
-
86
- micButton.addEventListener('click', () => {
87
- if (!isListening) {
88
- isListening = true;
89
- greetUser();
90
- }
91
- });
92
-
93
- function greetUser() {
94
- const utterance = new SpeechSynthesisUtterance("Hi. Welcome to Biryani Hub. Can I show you the menu?");
95
- speechSynthesis.speak(utterance);
96
- utterance.onend = () => {
97
- status.textContent = "Listening...";
98
- startListening();
99
- };
100
- }
101
-
102
- async function startListening() {
103
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
104
- const mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm;codecs=opus" });
105
- const audioChunks = [];
106
-
107
- mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
108
- mediaRecorder.onstop = async () => {
109
- const audioBlob = new Blob(audioChunks, { type: "audio/webm" });
110
- const formData = new FormData();
111
- formData.append("audio", audioBlob);
112
-
113
- status.textContent = "Processing...";
114
- try {
115
- const result = await fetch("/process-audio", { method: "POST", body: formData });
116
- const data = await result.json();
117
- response.textContent = data.response;
118
- response.style.display = "block";
119
-
120
- const utterance = new SpeechSynthesisUtterance(data.response);
121
- speechSynthesis.speak(utterance);
122
- utterance.onend = () => {
123
- if (!data.response.includes("Goodbye") && !data.response.includes("final order")) {
124
- startListening(); // Continue listening
125
- } else {
126
- status.textContent = "Conversation ended.";
127
- isListening = false;
128
- }
129
- };
130
- } catch (error) {
131
- response.textContent = "Error processing your request. Please try again.";
132
- status.textContent = "Press the mic button to restart.";
133
- isListening = false;
134
- }
135
- };
136
-
137
- mediaRecorder.start();
138
- setTimeout(() => mediaRecorder.stop(), 5000); // Stop recording after 5 seconds
139
- }
140
- </script>
141
- </body>
142
- </html>
143
- """
144
-
145
- @app.route("/")
146
- def index():
147
- return render_template_string(html_code)
148
-
149
- @app.route("/process-audio", methods=["POST"])
150
  def process_audio():
151
  try:
152
- audio_file = request.files.get("audio")
153
- if not audio_file:
154
- raise BadRequest("No audio file provided.")
155
-
156
- temp_file = NamedTemporaryFile(delete=False, suffix=".webm")
157
- audio_file.save(temp_file.name)
158
 
159
- if os.path.getsize(temp_file.name) == 0:
160
- raise BadRequest("Uploaded audio file is empty.")
161
 
162
- converted_file = NamedTemporaryFile(delete=False, suffix=".wav")
163
- ffmpeg.input(temp_file.name).output(
164
- converted_file.name, acodec="pcm_s16le", ac=1, ar="16000"
165
- ).run(overwrite_output=True)
166
 
167
- recognizer = sr.Recognizer()
168
- with sr.AudioFile(converted_file.name) as source:
169
- audio_data = recognizer.record(source)
170
- try:
171
- command = recognizer.recognize_google(audio_data)
172
- response = process_command(command)
173
- except sr.UnknownValueError:
174
- response = "Sorry, I could not understand. Please try again."
175
 
176
- return jsonify({"response": response})
 
 
177
 
178
- except BadRequest as br:
179
- return jsonify({"response": f"Bad Request: {str(br)}"}), 400
 
 
 
180
  except Exception as e:
181
- return jsonify({"response": f"An error occurred: {str(e)}"}), 500
182
- finally:
183
- os.unlink(temp_file.name)
184
- os.unlink(converted_file.name)
185
-
186
- def process_command(command):
187
- global cart, MENU
188
- command = command.lower()
189
-
190
- # Handle specific category requests
191
- for category in MENU.keys():
192
- if category.lower() in command:
193
- items = MENU[category]
194
- item_list = ", ".join([f"{item} (₹{price})" for item, price in items.items()])
195
- return f"{category} menu: {item_list}. What would you like to order?"
196
-
197
- # Handle full menu request
198
- if "menu" in command:
199
- categories = ", ".join(MENU.keys())
200
- return f"We have the following categories: {categories}. Which one would you like to explore?"
201
-
202
- # Add items to the cart
203
- all_items = {item.lower(): (category, price) for category, items in MENU.items() for item, price in items.items()}
204
- if command in all_items.keys():
205
- category, price = all_items[command]
206
- cart.append((command.title(), price))
207
- total = sum(item[1] for item in cart)
208
- cart_summary = ", ".join([f"{i[0]} (₹{i[1]})" for i in cart])
209
- return f"{command.title()} added to your cart. Your cart: {cart_summary}. Total: ₹{total}. Do you want to order anything else?"
210
-
211
- # Remove items from the cart
212
- if "remove" in command:
213
- for item in cart:
214
- if item[0].lower() in command:
215
- cart.remove(item)
216
- total = sum(i[1] for i in cart)
217
- cart_summary = ", ".join([f"{i[0]} (₹{i[1]})" for i in cart])
218
- return f"{item[0]} removed from your cart. Updated cart: {cart_summary}. Total: ₹{total}."
219
- return "The item you are trying to remove is not in your cart. Please check again."
220
-
221
- # Handle final order
222
- if "final order" in command or "submit" in command:
223
- if cart:
224
- items = ", ".join([f"{item[0]} (₹{item[1]})" for item in cart])
225
- total = sum(item[1] for item in cart)
226
- cart.clear()
227
- return f"Your final order is: {items}. Total price: ₹{total}. Thank you for ordering!"
228
- else:
229
- return "Your cart is empty. Please add items first."
230
-
231
- # Handle goodbye
232
- if "no" in command or "nothing" in command or "goodbye" in command:
233
- cart.clear()
234
- return "Goodbye! Thank you for using AI Dining Assistant."
235
-
236
- return "Sorry, I didn't understand that. Please try again."
237
-
238
  if __name__ == "__main__":
239
- app.run(host="0.0.0.0", port=7860)
 
1
+ # Import required libraries
2
+ from faster_whisper import WhisperModel
3
+ from transformers import pipeline
4
+ from bark import generate_audio
5
+ from flask import Flask, request, jsonify
 
 
6
 
7
+ # Initialize Flask app
8
  app = Flask(__name__)
 
9
 
10
+ # Load models
11
+ speech_model = WhisperModel("tiny", device="cuda", compute_type="float16")
12
+ nlp_model = pipeline("text-generation", model="gpt-3.5-turbo")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ @app.route('/process_audio', methods=['POST'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def process_audio():
16
  try:
17
+ # Step 1: Receive the audio file from the user
18
+ audio_file = request.files['audio']
19
+ audio_path = f"./temp/{audio_file.filename}"
20
+ audio_file.save(audio_path)
 
 
21
 
22
+ # Step 2: Transcribe the audio to text
23
+ transcription = transcribe_audio(audio_path)
24
 
25
+ # Step 3: Generate a response based on the transcription
26
+ response_text = generate_response(transcription)
 
 
27
 
28
+ # Step 4: Synthesize speech from the response text
29
+ response_audio = synthesize_speech(response_text)
 
 
 
 
 
 
30
 
31
+ # Save the response audio to a file
32
+ response_audio_path = f"./temp/response_audio.wav"
33
+ response_audio.export(response_audio_path, format="wav")
34
 
35
+ return jsonify({
36
+ "transcription": transcription,
37
+ "response_text": response_text,
38
+ "response_audio_path": response_audio_path
39
+ })
40
  except Exception as e:
41
+ return jsonify({"error": str(e)}), 500
42
+
43
+ def transcribe_audio(audio_path):
44
+ """
45
+ Transcribe audio using Whisper.
46
+ """
47
+ segments, info = speech_model.transcribe(audio_path)
48
+ transcription = " ".join([segment.text for segment in segments])
49
+ return transcription
50
+
51
+ def generate_response(user_input):
52
+ """
53
+ Generate text response using GPT-3.5-turbo.
54
+ """
55
+ response = nlp_model(user_input, max_length=100, do_sample=True)
56
+ return response[0]['generated_text']
57
+
58
+ def synthesize_speech(text):
59
+ """
60
+ Generate speech audio using Bark.
61
+ """
62
+ audio_array = generate_audio(text)
63
+ return audio_array
64
+
65
+ # Run the app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  if __name__ == "__main__":
67
+ app.run(debug=True, host="0.0.0.0", port=5000)