Spaces:

Athspi
/

athspi

Sleeping

File size: 5,305 Bytes

3ebc508
1deacc5
3ebc508
 
 
 
1deacc5
 
14c0817
 
3ebc508
 
 
 
14c0817
 
 
 
 
3ebc508
1deacc5
3ebc508
 
14c0817
 
 
 
 
 
 
 
 
 
 
 
 
3ebc508
14c0817
 
 
 
 
3ebc508
1deacc5
 
14c0817
 
1deacc5
14c0817
 
 
263ee79
14c0817
1deacc5
263ee79
14c0817
1deacc5
 
 
 
3ebc508
 
 
 
 
 
263ee79
 
3ebc508
263ee79
14c0817
263ee79
3ebc508
14c0817
 
 
 
 
1deacc5
3ebc508
14c0817
 
3ebc508
 
 
14c0817
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ebc508
 
14c0817
3ebc508
 
1deacc5
 
14c0817
1deacc5
 
14c0817
1deacc5
3ebc508
e81567c
14c0817
 
e81567c

# app.py - Flask Backend
from flask import Flask, request, jsonify, send_from_directory
import google.generativeai as genai
from dotenv import load_dotenv
import os
from flask_cors import CORS
import markdown2
import re
from gtts import gTTS # <-- Import gTTS
import uuid # <-- Import UUID for unique filenames

# Load environment variables
load_dotenv()

# Define paths and create static audio directory if it doesn't exist
AUDIO_FOLDER = os.path.join('static', 'audio')
if not os.path.exists(AUDIO_FOLDER):
    os.makedirs(AUDIO_FOLDER)

# Initialize Flask app
app = Flask(__name__, static_folder='static')
CORS(app)  # Enable CORS for all routes

# Configure Gemini with a system instruction
# This guides the AI's behavior and ensures responses are good for TTS.
system_instruction_text = """
You are a helpful, friendly, and informative AI assistant named AstroChat.
Your goal is to provide clear, concise, and natural-sounding answers to user queries.
When you respond:
- Use clear and simple language.
- Avoid overly complex sentence structures that might be hard to read aloud.
- Keep the user engaged and offer follow-up questions or related topics where appropriate.
- Ensure your responses are suitable for text-to-speech conversion.
- Provide factual and accurate information.
"""

genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
# Initialize the model with the system instruction
model = genai.GenerativeModel(
    'gemini-2.5-flash', # Using 1.5-flash for better performance and system_instruction support
    system_instruction=system_instruction_text
)

def convert_markdown_to_html(text):
    # Convert markdown to HTML
    # Using 'fenced-code-blocks' and 'tables' for better markdown support
    html = markdown2.markdown(text, extras=["fenced-code-blocks", "tables"])
    
    # Add custom styling to code blocks (pre blocks)
    # This specifically targets `<pre><code>` blocks generated by markdown2 for styling.
    html = re.sub(r'<pre><code(.*?)>', r'<pre class="code-block"><code\1>', html)
    
    # Convert **bold** to <strong> for better visibility (markdown2 usually handles this, but good to ensure)
    html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
    
    # Convert *italic* to <em> (markdown2 usually handles this, but good to ensure)
    html = re.sub(r'\*(.*?)\*', r'<em>\1</em>', html)
    
    return html

@app.route('/chat', methods=['POST'])
def chat():
    try:
        data = request.json
        user_message = data.get('message')
        
        if not user_message:
            return jsonify({"error": "No message provided"}), 400
        
        # Generate response using Gemini
        # For multi-turn conversations, you might manage chat history here
        response = model.generate_content(user_message)
        
        # Get plain text for audio generation
        plain_text_response = response.text
        
        # Convert markdown to HTML for display
        html_response = convert_markdown_to_html(plain_text_response)
        
        return jsonify({
            "response_html": html_response,
            "response_text": plain_text_response # Send plain text for TTS
        })
        
    except Exception as e:
        app.logger.error(f"Chat Error: {e}")
        return jsonify({"error": str(e)}), 500

@app.route('/generate-audio', methods=['POST'])
def generate_audio():
    try:
        data = request.json
        text_to_speak = data.get('text')

        if not text_to_speak:
            return jsonify({"error": "No text provided"}), 400

        # Sanitize text for TTS (remove common markdown characters for smoother pronunciation)
        # This prevents gTTS from trying to pronounce asterisks, backticks, etc.
        cleaned_text = re.sub(r'[\*_`#]', '', text_to_speak) # Remove bold, italic, code, headers markdown
        cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() # Replace multiple spaces with single space

        if not cleaned_text: # If text becomes empty after cleaning
            return jsonify({"error": "Text became empty after cleaning, cannot generate audio."}), 400

        # Generate a unique filename using UUID to prevent collisions
        filename = f"{uuid.uuid4()}.mp3"
        filepath = os.path.join(AUDIO_FOLDER, filename)

        # Create TTS object and save to file
        tts = gTTS(text=cleaned_text, lang='en', slow=False) # 'en' for English, 'slow=False' for normal speed
        tts.save(filepath)

        # Return the URL to the audio file, converting path separators for web use
        audio_url = f"/{filepath.replace(os.path.sep, '/')}"
        return jsonify({"audio_url": audio_url})

    except Exception as e:
        app.logger.error(f"Audio Generation Error: {e}")
        return jsonify({"error": str(e)}), 500

# Serve the main index.html file
@app.route('/')
def serve_index():
    return send_from_directory('static', 'index.html')

# Serve other static files (CSS, JS, audio files)
@app.route('/<path:path>')
def serve_static(path):
    # Ensure that only files from 'static' are served
    return send_from_directory('static', path)

if __name__ == '__main__':
    # Run the Flask app
    # debug=True allows automatic reloading on code changes and provides more detailed error messages
    app.run(host="0.0.0.0", port=7860)