Spaces:

Athspi
/

athspi

Sleeping

App Files Files Community

Athspi commited on 15 days ago

Commit

465bca7

verified ·

1 Parent(s): 062884e

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -81

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py - Flask Backend
 from flask import Flask, request, jsonify, send_from_directory
 import google.generativeai as genai
 from dotenv import load_dotenv
@@ -6,131 +6,194 @@ import os
 from flask_cors import CORS
 import markdown2
 import re
-from gtts import gTTS # <-- Import gTTS
-import uuid # <-- Import UUID for unique filenames
 # Load environment variables
 load_dotenv()
-# Define paths and create static audio directory if it doesn't exist
 AUDIO_FOLDER = os.path.join('static', 'audio')
-if not os.path.exists(AUDIO_FOLDER):
-    os.makedirs(AUDIO_FOLDER)
 # Initialize Flask app
 app = Flask(__name__, static_folder='static')
-CORS(app)  # Enable CORS for all routes
-# Configure Gemini with a system instruction
-# This guides the AI's behavior and ensures responses are good for TTS.
-system_instruction_text = """
-You are a helpful, friendly, and informative AI assistant named AstroChat.
-Your goal is to provide clear, concise, and natural-sounding answers to user queries.
-When you respond:
-- Use clear and simple language.
-- Avoid overly complex sentence structures that might be hard to read aloud.
-- Keep the user engaged and offer follow-up questions or related topics where appropriate.
-- Ensure your responses are suitable for text-to-speech conversion.
-- Provide factual and accurate information.
 """
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
-# Initialize the model with the system instruction
 model = genai.GenerativeModel(
-    'gemini-2.5-flash', # Using 1.5-flash for better performance and system_instruction support
-    system_instruction=system_instruction_text
 )
-def convert_markdown_to_html(text):
-    # Convert markdown to HTML
-    # Using 'fenced-code-blocks' and 'tables' for better markdown support
-    html = markdown2.markdown(text, extras=["fenced-code-blocks", "tables"])
-    # Add custom styling to code blocks (pre blocks)
-    # This specifically targets `<pre><code>` blocks generated by markdown2 for styling.
-    html = re.sub(r'<pre><code(.*?)>', r'<pre class="code-block"><code\1>', html)
-    # Convert **bold** to <strong> for better visibility (markdown2 usually handles this, but good to ensure)
-    html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
-    # Convert *italic* to <em> (markdown2 usually handles this, but good to ensure)
-    html = re.sub(r'\*(.*?)\*', r'<em>\1</em>', html)
-    return html
 @app.route('/chat', methods=['POST'])
-def chat():
     try:
         data = request.json
-        user_message = data.get('message')
         if not user_message:
-            return jsonify({"error": "No message provided"}), 400
-        # Generate response using Gemini
-        # For multi-turn conversations, you might manage chat history here
         response = model.generate_content(user_message)
-        # Get plain text for audio generation
-        plain_text_response = response.text
-        # Convert markdown to HTML for display
-        html_response = convert_markdown_to_html(plain_text_response)
-        return jsonify({
-            "response_html": html_response,
-            "response_text": plain_text_response # Send plain text for TTS
-        })
     except Exception as e:
-        app.logger.error(f"Chat Error: {e}")
-        return jsonify({"error": str(e)}), 500
 @app.route('/generate-audio', methods=['POST'])
-def generate_audio():
     try:
         data = request.json
-        text_to_speak = data.get('text')
-        if not text_to_speak:
             return jsonify({"error": "No text provided"}), 400
-        # Sanitize text for TTS (remove common markdown characters for smoother pronunciation)
-        # This prevents gTTS from trying to pronounce asterisks, backticks, etc.
-        cleaned_text = re.sub(r'[\*_`#]', '', text_to_speak) # Remove bold, italic, code, headers markdown
-        cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() # Replace multiple spaces with single space
-        if not cleaned_text: # If text becomes empty after cleaning
-            return jsonify({"error": "Text became empty after cleaning, cannot generate audio."}), 400
-        # Generate a unique filename using UUID to prevent collisions
-        filename = f"{uuid.uuid4()}.mp3"
         filepath = os.path.join(AUDIO_FOLDER, filename)
-        # Create TTS object and save to file
-        tts = gTTS(text=cleaned_text, lang='en', slow=False) # 'en' for English, 'slow=False' for normal speed
         tts.save(filepath)
-        # Return the URL to the audio file, converting path separators for web use
-        audio_url = f"/{filepath.replace(os.path.sep, '/')}"
-        return jsonify({"audio_url": audio_url})
     except Exception as e:
-        app.logger.error(f"Audio Generation Error: {e}")
-        return jsonify({"error": str(e)}), 500
-# Serve the main index.html file
 @app.route('/')
 def serve_index():
     return send_from_directory('static', 'index.html')
-# Serve other static files (CSS, JS, audio files)
 @app.route('/<path:path>')
 def serve_static(path):
-    # Ensure that only files from 'static' are served
     return send_from_directory('static', path)
 if __name__ == '__main__':
-    # Run the Flask app
-    # debug=True allows automatic reloading on code changes and provides more detailed error messages
-    app.run(host="0.0.0.0", port=7860)

+# app.py - Complete Flask Backend
 from flask import Flask, request, jsonify, send_from_directory
 import google.generativeai as genai
 from dotenv import load_dotenv
 from flask_cors import CORS
 import markdown2
 import re
+from gtts import gTTS
+import uuid
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Load environment variables
 load_dotenv()
+# Configuration
 AUDIO_FOLDER = os.path.join('static', 'audio')
+os.makedirs(AUDIO_FOLDER, exist_ok=True)
+MAX_AUDIO_LENGTH = 5000  # characters
 # Initialize Flask app
 app = Flask(__name__, static_folder='static')
+CORS(app)
+# Enhanced Gemini System Instruction
+SYSTEM_INSTRUCTION = """
+You are AstroChat, an advanced AI assistant with voice capabilities. Follow these guidelines:
+1. Voice Responses:
+- When users request audio (e.g., "read this", "speak aloud", "audio version"), include [AUDIO] in response
+- Structure responses for optimal TTS:
+  * Short sentences (12-15 words)
+  * Pause between paragraphs
+  * Spell out complex terms
+2. Content Formatting:
+- Code: Explain → Format in markdown
+- Lists: Use bullet points
+- Quotes: Provide attribution
+- Math/Science: Explain symbols verbally
+3. Interaction Style:
+- Friendly but professional
+- Ask clarifying questions
+- Admit knowledge limits
+- Offer follow-up suggestions
+4. Special Cases:
+- Acronyms: Spell out first use
+- Names: Provide pronunciation hints
+- Technical terms: Give simple definitions
 """
+# Initialize Gemini
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 model = genai.GenerativeModel(
+    'gemini-1.5-flash',
+    system_instruction=SYSTEM_INSTRUCTION
 )
+def process_response(text):
+    """Process AI response for audio triggers and markdown conversion"""
+    audio_requested = '[AUDIO]' in text
+    clean_text = text.replace('[AUDIO]', '').strip()
+    # Convert markdown to HTML with enhanced processing
+    extras = [
+        "fenced-code-blocks",
+        "tables",
+        "code-friendly",
+        "cuddled-lists"
+    ]
+    html = markdown2.markdown(clean_text, extras=extras)
+    # Enhanced code block styling
+    html = re.sub(
+        r'<pre><code(.*?)>',
+        r'<pre class="code-block"><code\1>',
+        html
+    )
+    # Improve link handling
+    html = re.sub(
+        r'<a href="(.*?)">(.*?)</a>',
+        r'<a href="\1" target="_blank" rel="noopener">\2</a>',
+        html
+    )
+    return {
+        "response_html": html,
+        "response_text": clean_text,
+        "audio_requested": audio_requested
+    }
 @app.route('/chat', methods=['POST'])
+def handle_chat():
     try:
         data = request.json
+        user_message = data.get('message', '').strip()
         if not user_message:
+            return jsonify({"error": "Empty message"}), 400
+        # Detect audio requests
+        audio_triggers = [
+            "read aloud", "speak this", "audio please",
+            "say it", "voice response", "read this",
+            "can you speak", "tell me aloud"
+        ]
+        explicit_audio = any(
+            trigger in user_message.lower()
+            for trigger in audio_triggers
+        )
+        # Generate response
         response = model.generate_content(user_message)
+        processed = process_response(response.text)
+        # Force audio if explicitly requested
+        if explicit_audio:
+            processed["audio_requested"] = True
+        return jsonify(processed)
     except Exception as e:
+        logger.error(f"Chat error: {str(e)}")
+        return jsonify({
+            "error": "I encountered an error",
+            "details": str(e)
+        }), 500
 @app.route('/generate-audio', methods=['POST'])
+def handle_audio():
     try:
         data = request.json
+        text = data.get('text', '').strip()
+        if not text:
             return jsonify({"error": "No text provided"}), 400
+        # Enhanced text cleaning
+        clean_text = re.sub(r'[\*_`#\[\]]', '', text)  # Remove markdown
+        clean_text = re.sub(r'\s+', ' ', clean_text).strip()
+        # Safe truncation
+        if len(clean_text) > MAX_AUDIO_LENGTH:
+            clean_text = clean_text[:MAX_AUDIO_LENGTH]
+            clean_text += "... [content truncated]"
+        # Generate unique filename
+        filename = f"audio_{uuid.uuid4()}.mp3"
         filepath = os.path.join(AUDIO_FOLDER, filename)
+        # Generate speech with enhanced parameters
+        tts = gTTS(
+            text=clean_text,
+            lang='en',
+            slow=False,
+            lang_check=False,
+            pre_processor_funcs=[
+                lambda x: re.sub(r'([a-z])([A-Z])', r'\1 \2', x)  # Handle camelCase
+            ]
+        )
         tts.save(filepath)
+        return jsonify({
+            "audio_url": f"/audio/{filename}",
+            "text_length": len(clean_text)
+        })
     except Exception as e:
+        logger.error(f"Audio error: {str(e)}")
+        return jsonify({
+            "error": "Audio generation failed",
+            "details": str(e)
+        }), 500
+@app.route('/audio/<filename>')
+def serve_audio(filename):
+    try:
+        return send_from_directory(AUDIO_FOLDER, filename)
+    except FileNotFoundError:
+        return jsonify({"error": "Audio file not found"}), 404
 @app.route('/')
 def serve_index():
     return send_from_directory('static', 'index.html')
 @app.route('/<path:path>')
 def serve_static(path):
     return send_from_directory('static', path)
 if __name__ == '__main__':
+    port = int(os.environ.get('PORT', 7860))
+    app.run(host="0.0.0.0", port=port)