File size: 5,648 Bytes
465bca7
1deacc5
3ebc508
 
 
 
1deacc5
 
465bca7
 
 
 
 
 
 
3ebc508
 
 
 
465bca7
14c0817
465bca7
 
14c0817
3ebc508
1deacc5
465bca7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14c0817
 
465bca7
3ebc508
14c0817
465bca7
 
14c0817
3ebc508
465bca7
 
 
 
1deacc5
465bca7
 
 
 
 
 
 
 
263ee79
465bca7
 
 
 
 
 
263ee79
465bca7
 
 
 
 
 
1deacc5
465bca7
 
 
 
 
1deacc5
3ebc508
465bca7
3ebc508
 
465bca7
3ebc508
263ee79
465bca7
 
 
 
 
 
 
 
 
 
 
 
3ebc508
465bca7
263ee79
465bca7
3ebc508
465bca7
 
 
14c0817
465bca7
3ebc508
 
465bca7
 
 
 
 
14c0817
 
465bca7
14c0817
 
465bca7
 
 
14c0817
465bca7
 
 
 
 
 
 
 
 
 
 
 
14c0817
465bca7
 
 
 
 
 
 
 
 
 
 
14c0817
465bca7
 
 
 
 
 
14c0817
465bca7
 
 
 
 
 
 
 
 
 
 
 
3ebc508
 
 
1deacc5
 
 
 
 
3ebc508
e81567c
465bca7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# app.py - Complete Flask Backend
from flask import Flask, request, jsonify, send_from_directory
import google.generativeai as genai
from dotenv import load_dotenv
import os
from flask_cors import CORS
import markdown2
import re
from gtts import gTTS
import uuid
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()

# Configuration
AUDIO_FOLDER = os.path.join('static', 'audio')
os.makedirs(AUDIO_FOLDER, exist_ok=True)
MAX_AUDIO_LENGTH = 5000  # characters

# Initialize Flask app
app = Flask(__name__, static_folder='static')
CORS(app)

# Enhanced Gemini System Instruction
SYSTEM_INSTRUCTION = """
You are AstroChat, an advanced AI assistant with voice capabilities. Follow these guidelines:

1. Voice Responses:
- When users request audio (e.g., "read this", "speak aloud", "audio version"), include [AUDIO] in response
- Structure responses for optimal TTS:
  * Short sentences (12-15 words)
  * Pause between paragraphs
  * Spell out complex terms

2. Content Formatting:
- Code: Explain → Format in markdown
- Lists: Use bullet points
- Quotes: Provide attribution
- Math/Science: Explain symbols verbally

3. Interaction Style:
- Friendly but professional
- Ask clarifying questions
- Admit knowledge limits
- Offer follow-up suggestions

4. Special Cases:
- Acronyms: Spell out first use
- Names: Provide pronunciation hints
- Technical terms: Give simple definitions
"""

# Initialize Gemini
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel(
    'gemini-1.5-flash',
    system_instruction=SYSTEM_INSTRUCTION
)

def process_response(text):
    """Process AI response for audio triggers and markdown conversion"""
    audio_requested = '[AUDIO]' in text
    clean_text = text.replace('[AUDIO]', '').strip()
    
    # Convert markdown to HTML with enhanced processing
    extras = [
        "fenced-code-blocks", 
        "tables",
        "code-friendly",
        "cuddled-lists"
    ]
    html = markdown2.markdown(clean_text, extras=extras)
    
    # Enhanced code block styling
    html = re.sub(
        r'<pre><code(.*?)>', 
        r'<pre class="code-block"><code\1>', 
        html
    )
    
    # Improve link handling
    html = re.sub(
        r'<a href="(.*?)">(.*?)</a>',
        r'<a href="\1" target="_blank" rel="noopener">\2</a>',
        html
    )
    
    return {
        "response_html": html,
        "response_text": clean_text,
        "audio_requested": audio_requested
    }

@app.route('/chat', methods=['POST'])
def handle_chat():
    try:
        data = request.json
        user_message = data.get('message', '').strip()
        
        if not user_message:
            return jsonify({"error": "Empty message"}), 400
        
        # Detect audio requests
        audio_triggers = [
            "read aloud", "speak this", "audio please",
            "say it", "voice response", "read this",
            "can you speak", "tell me aloud"
        ]
        explicit_audio = any(
            trigger in user_message.lower() 
            for trigger in audio_triggers
        )
        
        # Generate response
        response = model.generate_content(user_message)
        processed = process_response(response.text)
        
        # Force audio if explicitly requested
        if explicit_audio:
            processed["audio_requested"] = True
        
        return jsonify(processed)
        
    except Exception as e:
        logger.error(f"Chat error: {str(e)}")
        return jsonify({
            "error": "I encountered an error",
            "details": str(e)
        }), 500

@app.route('/generate-audio', methods=['POST'])
def handle_audio():
    try:
        data = request.json
        text = data.get('text', '').strip()
        
        if not text:
            return jsonify({"error": "No text provided"}), 400
        
        # Enhanced text cleaning
        clean_text = re.sub(r'[\*_`#\[\]]', '', text)  # Remove markdown
        clean_text = re.sub(r'\s+', ' ', clean_text).strip()
        
        # Safe truncation
        if len(clean_text) > MAX_AUDIO_LENGTH:
            clean_text = clean_text[:MAX_AUDIO_LENGTH]
            clean_text += "... [content truncated]"
        
        # Generate unique filename
        filename = f"audio_{uuid.uuid4()}.mp3"
        filepath = os.path.join(AUDIO_FOLDER, filename)
        
        # Generate speech with enhanced parameters
        tts = gTTS(
            text=clean_text,
            lang='en',
            slow=False,
            lang_check=False,
            pre_processor_funcs=[
                lambda x: re.sub(r'([a-z])([A-Z])', r'\1 \2', x)  # Handle camelCase
            ]
        )
        tts.save(filepath)
        
        return jsonify({
            "audio_url": f"/audio/{filename}",
            "text_length": len(clean_text)
        })
        
    except Exception as e:
        logger.error(f"Audio error: {str(e)}")
        return jsonify({
            "error": "Audio generation failed",
            "details": str(e)
        }), 500

@app.route('/audio/<filename>')
def serve_audio(filename):
    try:
        return send_from_directory(AUDIO_FOLDER, filename)
    except FileNotFoundError:
        return jsonify({"error": "Audio file not found"}), 404

@app.route('/')
def serve_index():
    return send_from_directory('static', 'index.html')

@app.route('/<path:path>')
def serve_static(path):
    return send_from_directory('static', path)

if __name__ == '__main__':
    port = int(os.environ.get('PORT', 7860))
    app.run(host="0.0.0.0", port=port)