AIVoice6

Runtime error

App Files Files Community

dschandra commited on Jan 4

Commit

46ee408

verified ·

1 Parent(s): b1431be

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -226

app.py CHANGED Viewed

@@ -1,239 +1,67 @@
-from flask import Flask, render_template_string, request, jsonify
-import speech_recognition as sr
-from tempfile import NamedTemporaryFile
-import os
-import ffmpeg
-import logging
-from werkzeug.exceptions import BadRequest
 app = Flask(__name__)
-logging.basicConfig(level=logging.INFO)
-# Global variables
-cart = []  # To store items and prices
-MENU = {
-    "Biryani": {"Chicken Biryani": 250, "Veg Biryani": 200, "Mutton Biryani": 300},
-    "Starters": {
-        "Chicken Wings": 220,
-        "Paneer Tikka": 180,
-        "Fish Fingers": 250,
-        "Spring Rolls": 160,
-    },
-    "Breads": {
-        "Butter Naan": 50,
-        "Garlic Naan": 60,
-        "Roti": 40,
-        "Lachha Paratha": 70,
-    },
-    "Curries": {
-        "Butter Chicken": 300,
-        "Paneer Butter Masala": 250,
-        "Dal Tadka": 200,
-        "Chicken Tikka Masala": 320,
-    },
-    "Drinks": {"Coke": 60, "Sprite": 60, "Mango Lassi": 80, "Masala Soda": 70},
-    "Desserts": {
-        "Gulab Jamun": 100,
-        "Rasgulla": 90,
-        "Ice Cream": 120,
-        "Brownie with Ice Cream": 180,
-    },
-}
-# HTML Template for Frontend
-html_code = """
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>AI Dining Assistant</title>
-    <style>
-        body {
-            font-family: Arial, sans-serif;
-            text-align: center;
-            background-color: #f4f4f9;
-        }
-        h1 {
-            color: #333;
-        }
-        .mic-button {
-            width: 80px;
-            height: 80px;
-            border-radius: 50%;
-            background-color: #007bff;
-            color: white;
-            font-size: 24px;
-            border: none;
-            cursor: pointer;
-        }
-        .status, .response {
-            margin-top: 20px;
-        }
-    </style>
-</head>
-<body>
-    <h1>AI Dining Assistant</h1>
-    <button class="mic-button" id="mic-button">🎤</button>
-    <div class="status" id="status">Press the mic button to start...</div>
-    <div class="response" id="response" style="display: none;">Response will appear here...</div>
-    <script>
-        const micButton = document.getElementById('mic-button');
-        const status = document.getElementById('status');
-        const response = document.getElementById('response');
-        let isListening = false;
-        micButton.addEventListener('click', () => {
-            if (!isListening) {
-                isListening = true;
-                greetUser();
-            }
-        });
-        function greetUser() {
-            const utterance = new SpeechSynthesisUtterance("Hi. Welcome to Biryani Hub. Can I show you the menu?");
-            speechSynthesis.speak(utterance);
-            utterance.onend = () => {
-                status.textContent = "Listening...";
-                startListening();
-            };
-        }
-        async function startListening() {
-            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-            const mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm;codecs=opus" });
-            const audioChunks = [];
-            mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
-            mediaRecorder.onstop = async () => {
-                const audioBlob = new Blob(audioChunks, { type: "audio/webm" });
-                const formData = new FormData();
-                formData.append("audio", audioBlob);
-                status.textContent = "Processing...";
-                try {
-                    const result = await fetch("/process-audio", { method: "POST", body: formData });
-                    const data = await result.json();
-                    response.textContent = data.response;
-                    response.style.display = "block";
-                    const utterance = new SpeechSynthesisUtterance(data.response);
-                    speechSynthesis.speak(utterance);
-                    utterance.onend = () => {
-                        if (!data.response.includes("Goodbye") && !data.response.includes("final order")) {
-                            startListening(); // Continue listening
-                        } else {
-                            status.textContent = "Conversation ended.";
-                            isListening = false;
-                        }
-                    };
-                } catch (error) {
-                    response.textContent = "Error processing your request. Please try again.";
-                    status.textContent = "Press the mic button to restart.";
-                    isListening = false;
-                }
-            };
-            mediaRecorder.start();
-            setTimeout(() => mediaRecorder.stop(), 5000); // Stop recording after 5 seconds
-        }
-    </script>
-</body>
-</html>
-"""
-@app.route("/")
-def index():
-    return render_template_string(html_code)
-@app.route("/process-audio", methods=["POST"])
 def process_audio():
     try:
-        audio_file = request.files.get("audio")
-        if not audio_file:
-            raise BadRequest("No audio file provided.")
-        temp_file = NamedTemporaryFile(delete=False, suffix=".webm")
-        audio_file.save(temp_file.name)
-        if os.path.getsize(temp_file.name) == 0:
-            raise BadRequest("Uploaded audio file is empty.")
-        converted_file = NamedTemporaryFile(delete=False, suffix=".wav")
-        ffmpeg.input(temp_file.name).output(
-            converted_file.name, acodec="pcm_s16le", ac=1, ar="16000"
-        ).run(overwrite_output=True)
-        recognizer = sr.Recognizer()
-        with sr.AudioFile(converted_file.name) as source:
-            audio_data = recognizer.record(source)
-            try:
-                command = recognizer.recognize_google(audio_data)
-                response = process_command(command)
-            except sr.UnknownValueError:
-                response = "Sorry, I could not understand. Please try again."
-        return jsonify({"response": response})
-    except BadRequest as br:
-        return jsonify({"response": f"Bad Request: {str(br)}"}), 400
     except Exception as e:
-        return jsonify({"response": f"An error occurred: {str(e)}"}), 500
-    finally:
-        os.unlink(temp_file.name)
-        os.unlink(converted_file.name)
-def process_command(command):
-    global cart, MENU
-    command = command.lower()
-    # Handle specific category requests
-    for category in MENU.keys():
-        if category.lower() in command:
-            items = MENU[category]
-            item_list = ", ".join([f"{item} (₹{price})" for item, price in items.items()])
-            return f"{category} menu: {item_list}. What would you like to order?"
-    # Handle full menu request
-    if "menu" in command:
-        categories = ", ".join(MENU.keys())
-        return f"We have the following categories: {categories}. Which one would you like to explore?"
-    # Add items to the cart
-    all_items = {item.lower(): (category, price) for category, items in MENU.items() for item, price in items.items()}
-    if command in all_items.keys():
-        category, price = all_items[command]
-        cart.append((command.title(), price))
-        total = sum(item[1] for item in cart)
-        cart_summary = ", ".join([f"{i[0]} (₹{i[1]})" for i in cart])
-        return f"{command.title()} added to your cart. Your cart: {cart_summary}. Total: ₹{total}. Do you want to order anything else?"
-    # Remove items from the cart
-    if "remove" in command:
-        for item in cart:
-            if item[0].lower() in command:
-                cart.remove(item)
-                total = sum(i[1] for i in cart)
-                cart_summary = ", ".join([f"{i[0]} (₹{i[1]})" for i in cart])
-                return f"{item[0]} removed from your cart. Updated cart: {cart_summary}. Total: ₹{total}."
-        return "The item you are trying to remove is not in your cart. Please check again."
-    # Handle final order
-    if "final order" in command or "submit" in command:
-        if cart:
-            items = ", ".join([f"{item[0]} (₹{item[1]})" for item in cart])
-            total = sum(item[1] for item in cart)
-            cart.clear()
-            return f"Your final order is: {items}. Total price: ₹{total}. Thank you for ordering!"
-        else:
-            return "Your cart is empty. Please add items first."
-    # Handle goodbye
-    if "no" in command or "nothing" in command or "goodbye" in command:
-        cart.clear()
-        return "Goodbye! Thank you for using AI Dining Assistant."
-    return "Sorry, I didn't understand that. Please try again."
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

+# Import required libraries
+from faster_whisper import WhisperModel
+from transformers import pipeline
+from bark import generate_audio
+from flask import Flask, request, jsonify
+# Initialize Flask app
 app = Flask(__name__)
+# Load models
+speech_model = WhisperModel("tiny", device="cuda", compute_type="float16")
+nlp_model = pipeline("text-generation", model="gpt-3.5-turbo")
+@app.route('/process_audio', methods=['POST'])
 def process_audio():
     try:
+        # Step 1: Receive the audio file from the user
+        audio_file = request.files['audio']
+        audio_path = f"./temp/{audio_file.filename}"
+        audio_file.save(audio_path)
+        # Step 2: Transcribe the audio to text
+        transcription = transcribe_audio(audio_path)
+        # Step 3: Generate a response based on the transcription
+        response_text = generate_response(transcription)
+        # Step 4: Synthesize speech from the response text
+        response_audio = synthesize_speech(response_text)
+        # Save the response audio to a file
+        response_audio_path = f"./temp/response_audio.wav"
+        response_audio.export(response_audio_path, format="wav")
+        return jsonify({
+            "transcription": transcription,
+            "response_text": response_text,
+            "response_audio_path": response_audio_path
+        })
     except Exception as e:
+        return jsonify({"error": str(e)}), 500
+def transcribe_audio(audio_path):
+    """
+    Transcribe audio using Whisper.
+    """
+    segments, info = speech_model.transcribe(audio_path)
+    transcription = " ".join([segment.text for segment in segments])
+    return transcription
+def generate_response(user_input):
+    """
+    Generate text response using GPT-3.5-turbo.
+    """
+    response = nlp_model(user_input, max_length=100, do_sample=True)
+    return response[0]['generated_text']
+def synthesize_speech(text):
+    """
+    Generate speech audio using Bark.
+    """
+    audio_array = generate_audio(text)
+    return audio_array
+# Run the app
 if __name__ == "__main__":
+    app.run(debug=True, host="0.0.0.0", port=5000)