Athspi commited on
Commit
465bca7
·
verified ·
1 Parent(s): 062884e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -81
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py - Flask Backend
2
  from flask import Flask, request, jsonify, send_from_directory
3
  import google.generativeai as genai
4
  from dotenv import load_dotenv
@@ -6,131 +6,194 @@ import os
6
  from flask_cors import CORS
7
  import markdown2
8
  import re
9
- from gtts import gTTS # <-- Import gTTS
10
- import uuid # <-- Import UUID for unique filenames
 
 
 
 
 
11
 
12
  # Load environment variables
13
  load_dotenv()
14
 
15
- # Define paths and create static audio directory if it doesn't exist
16
  AUDIO_FOLDER = os.path.join('static', 'audio')
17
- if not os.path.exists(AUDIO_FOLDER):
18
- os.makedirs(AUDIO_FOLDER)
19
 
20
  # Initialize Flask app
21
  app = Flask(__name__, static_folder='static')
22
- CORS(app) # Enable CORS for all routes
23
-
24
- # Configure Gemini with a system instruction
25
- # This guides the AI's behavior and ensures responses are good for TTS.
26
- system_instruction_text = """
27
- You are a helpful, friendly, and informative AI assistant named AstroChat.
28
- Your goal is to provide clear, concise, and natural-sounding answers to user queries.
29
- When you respond:
30
- - Use clear and simple language.
31
- - Avoid overly complex sentence structures that might be hard to read aloud.
32
- - Keep the user engaged and offer follow-up questions or related topics where appropriate.
33
- - Ensure your responses are suitable for text-to-speech conversion.
34
- - Provide factual and accurate information.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  """
36
 
 
37
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
38
- # Initialize the model with the system instruction
39
  model = genai.GenerativeModel(
40
- 'gemini-2.5-flash', # Using 1.5-flash for better performance and system_instruction support
41
- system_instruction=system_instruction_text
42
  )
43
 
44
- def convert_markdown_to_html(text):
45
- # Convert markdown to HTML
46
- # Using 'fenced-code-blocks' and 'tables' for better markdown support
47
- html = markdown2.markdown(text, extras=["fenced-code-blocks", "tables"])
48
 
49
- # Add custom styling to code blocks (pre blocks)
50
- # This specifically targets `<pre><code>` blocks generated by markdown2 for styling.
51
- html = re.sub(r'<pre><code(.*?)>', r'<pre class="code-block"><code\1>', html)
 
 
 
 
 
52
 
53
- # Convert **bold** to <strong> for better visibility (markdown2 usually handles this, but good to ensure)
54
- html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
 
 
 
 
55
 
56
- # Convert *italic* to <em> (markdown2 usually handles this, but good to ensure)
57
- html = re.sub(r'\*(.*?)\*', r'<em>\1</em>', html)
 
 
 
 
58
 
59
- return html
 
 
 
 
60
 
61
  @app.route('/chat', methods=['POST'])
62
- def chat():
63
  try:
64
  data = request.json
65
- user_message = data.get('message')
66
 
67
  if not user_message:
68
- return jsonify({"error": "No message provided"}), 400
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- # Generate response using Gemini
71
- # For multi-turn conversations, you might manage chat history here
72
  response = model.generate_content(user_message)
 
73
 
74
- # Get plain text for audio generation
75
- plain_text_response = response.text
 
76
 
77
- # Convert markdown to HTML for display
78
- html_response = convert_markdown_to_html(plain_text_response)
79
-
80
- return jsonify({
81
- "response_html": html_response,
82
- "response_text": plain_text_response # Send plain text for TTS
83
- })
84
 
85
  except Exception as e:
86
- app.logger.error(f"Chat Error: {e}")
87
- return jsonify({"error": str(e)}), 500
 
 
 
88
 
89
  @app.route('/generate-audio', methods=['POST'])
90
- def generate_audio():
91
  try:
92
  data = request.json
93
- text_to_speak = data.get('text')
94
-
95
- if not text_to_speak:
96
  return jsonify({"error": "No text provided"}), 400
97
-
98
- # Sanitize text for TTS (remove common markdown characters for smoother pronunciation)
99
- # This prevents gTTS from trying to pronounce asterisks, backticks, etc.
100
- cleaned_text = re.sub(r'[\*_`#]', '', text_to_speak) # Remove bold, italic, code, headers markdown
101
- cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() # Replace multiple spaces with single space
102
-
103
- if not cleaned_text: # If text becomes empty after cleaning
104
- return jsonify({"error": "Text became empty after cleaning, cannot generate audio."}), 400
105
-
106
- # Generate a unique filename using UUID to prevent collisions
107
- filename = f"{uuid.uuid4()}.mp3"
 
108
  filepath = os.path.join(AUDIO_FOLDER, filename)
109
-
110
- # Create TTS object and save to file
111
- tts = gTTS(text=cleaned_text, lang='en', slow=False) # 'en' for English, 'slow=False' for normal speed
 
 
 
 
 
 
 
 
112
  tts.save(filepath)
113
-
114
- # Return the URL to the audio file, converting path separators for web use
115
- audio_url = f"/{filepath.replace(os.path.sep, '/')}"
116
- return jsonify({"audio_url": audio_url})
117
-
 
118
  except Exception as e:
119
- app.logger.error(f"Audio Generation Error: {e}")
120
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
 
 
121
 
122
- # Serve the main index.html file
123
  @app.route('/')
124
  def serve_index():
125
  return send_from_directory('static', 'index.html')
126
 
127
- # Serve other static files (CSS, JS, audio files)
128
  @app.route('/<path:path>')
129
  def serve_static(path):
130
- # Ensure that only files from 'static' are served
131
  return send_from_directory('static', path)
132
 
133
  if __name__ == '__main__':
134
- # Run the Flask app
135
- # debug=True allows automatic reloading on code changes and provides more detailed error messages
136
- app.run(host="0.0.0.0", port=7860)
 
1
+ # app.py - Complete Flask Backend
2
  from flask import Flask, request, jsonify, send_from_directory
3
  import google.generativeai as genai
4
  from dotenv import load_dotenv
 
6
  from flask_cors import CORS
7
  import markdown2
8
  import re
9
+ from gtts import gTTS
10
+ import uuid
11
+ import logging
12
+
13
+ # Configure logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
 
17
  # Load environment variables
18
  load_dotenv()
19
 
20
+ # Configuration
21
  AUDIO_FOLDER = os.path.join('static', 'audio')
22
+ os.makedirs(AUDIO_FOLDER, exist_ok=True)
23
+ MAX_AUDIO_LENGTH = 5000 # characters
24
 
25
  # Initialize Flask app
26
  app = Flask(__name__, static_folder='static')
27
+ CORS(app)
28
+
29
+ # Enhanced Gemini System Instruction
30
+ SYSTEM_INSTRUCTION = """
31
+ You are AstroChat, an advanced AI assistant with voice capabilities. Follow these guidelines:
32
+
33
+ 1. Voice Responses:
34
+ - When users request audio (e.g., "read this", "speak aloud", "audio version"), include [AUDIO] in response
35
+ - Structure responses for optimal TTS:
36
+ * Short sentences (12-15 words)
37
+ * Pause between paragraphs
38
+ * Spell out complex terms
39
+
40
+ 2. Content Formatting:
41
+ - Code: Explain → Format in markdown
42
+ - Lists: Use bullet points
43
+ - Quotes: Provide attribution
44
+ - Math/Science: Explain symbols verbally
45
+
46
+ 3. Interaction Style:
47
+ - Friendly but professional
48
+ - Ask clarifying questions
49
+ - Admit knowledge limits
50
+ - Offer follow-up suggestions
51
+
52
+ 4. Special Cases:
53
+ - Acronyms: Spell out first use
54
+ - Names: Provide pronunciation hints
55
+ - Technical terms: Give simple definitions
56
  """
57
 
58
+ # Initialize Gemini
59
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 
60
  model = genai.GenerativeModel(
61
+ 'gemini-1.5-flash',
62
+ system_instruction=SYSTEM_INSTRUCTION
63
  )
64
 
65
+ def process_response(text):
66
+ """Process AI response for audio triggers and markdown conversion"""
67
+ audio_requested = '[AUDIO]' in text
68
+ clean_text = text.replace('[AUDIO]', '').strip()
69
 
70
+ # Convert markdown to HTML with enhanced processing
71
+ extras = [
72
+ "fenced-code-blocks",
73
+ "tables",
74
+ "code-friendly",
75
+ "cuddled-lists"
76
+ ]
77
+ html = markdown2.markdown(clean_text, extras=extras)
78
 
79
+ # Enhanced code block styling
80
+ html = re.sub(
81
+ r'<pre><code(.*?)>',
82
+ r'<pre class="code-block"><code\1>',
83
+ html
84
+ )
85
 
86
+ # Improve link handling
87
+ html = re.sub(
88
+ r'<a href="(.*?)">(.*?)</a>',
89
+ r'<a href="\1" target="_blank" rel="noopener">\2</a>',
90
+ html
91
+ )
92
 
93
+ return {
94
+ "response_html": html,
95
+ "response_text": clean_text,
96
+ "audio_requested": audio_requested
97
+ }
98
 
99
  @app.route('/chat', methods=['POST'])
100
+ def handle_chat():
101
  try:
102
  data = request.json
103
+ user_message = data.get('message', '').strip()
104
 
105
  if not user_message:
106
+ return jsonify({"error": "Empty message"}), 400
107
+
108
+ # Detect audio requests
109
+ audio_triggers = [
110
+ "read aloud", "speak this", "audio please",
111
+ "say it", "voice response", "read this",
112
+ "can you speak", "tell me aloud"
113
+ ]
114
+ explicit_audio = any(
115
+ trigger in user_message.lower()
116
+ for trigger in audio_triggers
117
+ )
118
 
119
+ # Generate response
 
120
  response = model.generate_content(user_message)
121
+ processed = process_response(response.text)
122
 
123
+ # Force audio if explicitly requested
124
+ if explicit_audio:
125
+ processed["audio_requested"] = True
126
 
127
+ return jsonify(processed)
 
 
 
 
 
 
128
 
129
  except Exception as e:
130
+ logger.error(f"Chat error: {str(e)}")
131
+ return jsonify({
132
+ "error": "I encountered an error",
133
+ "details": str(e)
134
+ }), 500
135
 
136
  @app.route('/generate-audio', methods=['POST'])
137
+ def handle_audio():
138
  try:
139
  data = request.json
140
+ text = data.get('text', '').strip()
141
+
142
+ if not text:
143
  return jsonify({"error": "No text provided"}), 400
144
+
145
+ # Enhanced text cleaning
146
+ clean_text = re.sub(r'[\*_`#\[\]]', '', text) # Remove markdown
147
+ clean_text = re.sub(r'\s+', ' ', clean_text).strip()
148
+
149
+ # Safe truncation
150
+ if len(clean_text) > MAX_AUDIO_LENGTH:
151
+ clean_text = clean_text[:MAX_AUDIO_LENGTH]
152
+ clean_text += "... [content truncated]"
153
+
154
+ # Generate unique filename
155
+ filename = f"audio_{uuid.uuid4()}.mp3"
156
  filepath = os.path.join(AUDIO_FOLDER, filename)
157
+
158
+ # Generate speech with enhanced parameters
159
+ tts = gTTS(
160
+ text=clean_text,
161
+ lang='en',
162
+ slow=False,
163
+ lang_check=False,
164
+ pre_processor_funcs=[
165
+ lambda x: re.sub(r'([a-z])([A-Z])', r'\1 \2', x) # Handle camelCase
166
+ ]
167
+ )
168
  tts.save(filepath)
169
+
170
+ return jsonify({
171
+ "audio_url": f"/audio/{filename}",
172
+ "text_length": len(clean_text)
173
+ })
174
+
175
  except Exception as e:
176
+ logger.error(f"Audio error: {str(e)}")
177
+ return jsonify({
178
+ "error": "Audio generation failed",
179
+ "details": str(e)
180
+ }), 500
181
+
182
+ @app.route('/audio/<filename>')
183
+ def serve_audio(filename):
184
+ try:
185
+ return send_from_directory(AUDIO_FOLDER, filename)
186
+ except FileNotFoundError:
187
+ return jsonify({"error": "Audio file not found"}), 404
188
 
 
189
  @app.route('/')
190
  def serve_index():
191
  return send_from_directory('static', 'index.html')
192
 
 
193
  @app.route('/<path:path>')
194
  def serve_static(path):
 
195
  return send_from_directory('static', path)
196
 
197
  if __name__ == '__main__':
198
+ port = int(os.environ.get('PORT', 7860))
199
+ app.run(host="0.0.0.0", port=port)