File size: 5,648 Bytes
465bca7 1deacc5 3ebc508 1deacc5 465bca7 3ebc508 465bca7 14c0817 465bca7 14c0817 3ebc508 1deacc5 465bca7 14c0817 465bca7 3ebc508 14c0817 465bca7 14c0817 3ebc508 465bca7 1deacc5 465bca7 263ee79 465bca7 263ee79 465bca7 1deacc5 465bca7 1deacc5 3ebc508 465bca7 3ebc508 465bca7 3ebc508 263ee79 465bca7 3ebc508 465bca7 263ee79 465bca7 3ebc508 465bca7 14c0817 465bca7 3ebc508 465bca7 14c0817 465bca7 14c0817 465bca7 14c0817 465bca7 14c0817 465bca7 14c0817 465bca7 14c0817 465bca7 3ebc508 1deacc5 3ebc508 e81567c 465bca7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# app.py - Complete Flask Backend
from flask import Flask, request, jsonify, send_from_directory
import google.generativeai as genai
from dotenv import load_dotenv
import os
from flask_cors import CORS
import markdown2
import re
from gtts import gTTS
import uuid
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Load environment variables
load_dotenv()
# Configuration
AUDIO_FOLDER = os.path.join('static', 'audio')
os.makedirs(AUDIO_FOLDER, exist_ok=True)
MAX_AUDIO_LENGTH = 5000 # characters
# Initialize Flask app
app = Flask(__name__, static_folder='static')
CORS(app)
# Enhanced Gemini System Instruction
SYSTEM_INSTRUCTION = """
You are AstroChat, an advanced AI assistant with voice capabilities. Follow these guidelines:
1. Voice Responses:
- When users request audio (e.g., "read this", "speak aloud", "audio version"), include [AUDIO] in response
- Structure responses for optimal TTS:
* Short sentences (12-15 words)
* Pause between paragraphs
* Spell out complex terms
2. Content Formatting:
- Code: Explain → Format in markdown
- Lists: Use bullet points
- Quotes: Provide attribution
- Math/Science: Explain symbols verbally
3. Interaction Style:
- Friendly but professional
- Ask clarifying questions
- Admit knowledge limits
- Offer follow-up suggestions
4. Special Cases:
- Acronyms: Spell out first use
- Names: Provide pronunciation hints
- Technical terms: Give simple definitions
"""
# Initialize Gemini
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel(
'gemini-1.5-flash',
system_instruction=SYSTEM_INSTRUCTION
)
def process_response(text):
"""Process AI response for audio triggers and markdown conversion"""
audio_requested = '[AUDIO]' in text
clean_text = text.replace('[AUDIO]', '').strip()
# Convert markdown to HTML with enhanced processing
extras = [
"fenced-code-blocks",
"tables",
"code-friendly",
"cuddled-lists"
]
html = markdown2.markdown(clean_text, extras=extras)
# Enhanced code block styling
html = re.sub(
r'<pre><code(.*?)>',
r'<pre class="code-block"><code\1>',
html
)
# Improve link handling
html = re.sub(
r'<a href="(.*?)">(.*?)</a>',
r'<a href="\1" target="_blank" rel="noopener">\2</a>',
html
)
return {
"response_html": html,
"response_text": clean_text,
"audio_requested": audio_requested
}
@app.route('/chat', methods=['POST'])
def handle_chat():
try:
data = request.json
user_message = data.get('message', '').strip()
if not user_message:
return jsonify({"error": "Empty message"}), 400
# Detect audio requests
audio_triggers = [
"read aloud", "speak this", "audio please",
"say it", "voice response", "read this",
"can you speak", "tell me aloud"
]
explicit_audio = any(
trigger in user_message.lower()
for trigger in audio_triggers
)
# Generate response
response = model.generate_content(user_message)
processed = process_response(response.text)
# Force audio if explicitly requested
if explicit_audio:
processed["audio_requested"] = True
return jsonify(processed)
except Exception as e:
logger.error(f"Chat error: {str(e)}")
return jsonify({
"error": "I encountered an error",
"details": str(e)
}), 500
@app.route('/generate-audio', methods=['POST'])
def handle_audio():
try:
data = request.json
text = data.get('text', '').strip()
if not text:
return jsonify({"error": "No text provided"}), 400
# Enhanced text cleaning
clean_text = re.sub(r'[\*_`#\[\]]', '', text) # Remove markdown
clean_text = re.sub(r'\s+', ' ', clean_text).strip()
# Safe truncation
if len(clean_text) > MAX_AUDIO_LENGTH:
clean_text = clean_text[:MAX_AUDIO_LENGTH]
clean_text += "... [content truncated]"
# Generate unique filename
filename = f"audio_{uuid.uuid4()}.mp3"
filepath = os.path.join(AUDIO_FOLDER, filename)
# Generate speech with enhanced parameters
tts = gTTS(
text=clean_text,
lang='en',
slow=False,
lang_check=False,
pre_processor_funcs=[
lambda x: re.sub(r'([a-z])([A-Z])', r'\1 \2', x) # Handle camelCase
]
)
tts.save(filepath)
return jsonify({
"audio_url": f"/audio/{filename}",
"text_length": len(clean_text)
})
except Exception as e:
logger.error(f"Audio error: {str(e)}")
return jsonify({
"error": "Audio generation failed",
"details": str(e)
}), 500
@app.route('/audio/<filename>')
def serve_audio(filename):
try:
return send_from_directory(AUDIO_FOLDER, filename)
except FileNotFoundError:
return jsonify({"error": "Audio file not found"}), 404
@app.route('/')
def serve_index():
return send_from_directory('static', 'index.html')
@app.route('/<path:path>')
def serve_static(path):
return send_from_directory('static', path)
if __name__ == '__main__':
port = int(os.environ.get('PORT', 7860))
app.run(host="0.0.0.0", port=port) |