File size: 5,808 Bytes
ec9b387
1deacc5
3ebc508
 
 
 
1deacc5
 
465bca7
 
3ebc508
 
 
 
ec9b387
14c0817
ec9b387
 
14c0817
3ebc508
1deacc5
ec9b387
 
 
 
83027f0
 
 
 
 
 
 
 
 
 
 
 
14c0817
 
3ebc508
14c0817
ec9b387
 
14c0817
3ebc508
ec9b387
 
 
 
 
 
1deacc5
76141c4
 
83027f0
 
 
 
76141c4
 
83027f0
 
 
 
 
 
 
 
 
 
 
 
76141c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ebc508
ec9b387
3ebc508
 
ec9b387
3ebc508
263ee79
ec9b387
3ebc508
76141c4
 
 
83027f0
 
 
 
263ee79
83027f0
 
 
 
76141c4
 
 
 
83027f0
 
 
 
 
76141c4
83027f0
76141c4
 
 
83027f0
3ebc508
ec9b387
 
83027f0
76141c4
ec9b387
3ebc508
 
ec9b387
 
14c0817
 
ec9b387
14c0817
 
ec9b387
 
 
14c0817
ec9b387
 
 
 
 
76141c4
ec9b387
 
14c0817
ec9b387
 
14c0817
465bca7
76141c4
ec9b387
 
 
 
 
3ebc508
76141c4
 
 
 
3ebc508
 
1deacc5
 
 
 
 
3ebc508
e81567c
ec9b387
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# app.py - Flask Backend
from flask import Flask, request, jsonify, send_from_directory
import google.generativeai as genai
from dotenv import load_dotenv
import os
from flask_cors import CORS
import markdown2
import re
from gtts import gTTS
import uuid

# Load environment variables
load_dotenv()

# Define paths and create static audio directory if it doesn't exist
AUDIO_FOLDER = os.path.join('static', 'audio')
if not os.path.exists(AUDIO_FOLDER):
    os.makedirs(AUDIO_FOLDER)

# Initialize Flask app
app = Flask(__name__, static_folder='static')
CORS(app)  # Enable CORS for all routes

# Configure Gemini with a system instruction
system_instruction_text = """
You are a friendly, natural-sounding AI assistant named Athspi. 
When responding:
- Use a warm, conversational tone
- Never mention technical terms like "audio", "text", or "response"
- For stories, begin with "Here's your story πŸ‘‡" followed by a friendly intro
- For explanations, use simple, clear language
- Format responses for pleasant reading and listening
- When audio is requested, include story content between special markers as shown:
  [AUDIO_START]
  [story content here]
  [AUDIO_END]
But DO NOT include these markers in the visible response
"""

genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel(
    'gemini-2.5-flash',
    system_instruction=system_instruction_text
)

def convert_markdown_to_html(text):
    html = markdown2.markdown(text, extras=["fenced-code-blocks", "tables"])
    html = re.sub(r'<pre><code(.*?)>', r'<pre class="code-block"><code\1>', html)
    html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
    html = re.sub(r'\*(.*?)\*', r'<em>\1</em>', html)
    return html

def detect_audio_request(text):
    """Detect if user is requesting audio"""
    audio_keywords = [
        'audio', 'speak', 'say it', 'read aloud', 
        'hear', 'listen', 'tell me out loud'
    ]
    return any(keyword in text.lower() for keyword in audio_keywords)

def extract_audio_content(full_text):
    """Extract audio-specific content between markers"""
    pattern = r'\[AUDIO_START\](.*?)\[AUDIO_END\]'
    match = re.search(pattern, full_text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return full_text

def clean_visible_response(full_text):
    """Remove audio markers from visible response"""
    return re.sub(r'\[AUDIO_(START|END)\]', '', full_text).strip()

def generate_audio_file(text):
    """Generate audio file from text and return filename"""
    cleaned_text = re.sub(r'[\*_`#]', '', text)
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()

    if not cleaned_text:
        return None

    filename = f"{uuid.uuid4()}.mp3"
    filepath = os.path.join(AUDIO_FOLDER, filename)

    tts = gTTS(text=cleaned_text, lang='en', slow=False)
    tts.save(filepath)

    return filename

@app.route('/chat', methods=['POST'])
def chat():
    try:
        data = request.json
        user_message = data.get('message')
        
        if not user_message:
            return jsonify({"error": "No message provided"}), 400
        
        # Detect if user is requesting audio
        audio_requested = detect_audio_request(user_message)
        
        # Add instruction for audio markers if requested
        if audio_requested:
            user_message += "\n\nPlease include [AUDIO_START] and [AUDIO_END] markers around the story content."
        
        response = model.generate_content(user_message)
        full_response = response.text
        
        # Clean visible response by removing audio markers
        visible_response = clean_visible_response(full_response)
        
        # Generate audio if requested
        audio_url = None
        if audio_requested:
            # Extract audio-specific content
            audio_content = extract_audio_content(full_response)
            if not audio_content:
                audio_content = visible_response
                
            # Generate audio file
            audio_filename = generate_audio_file(audio_content)
            if audio_filename:
                audio_url = f"/static/audio/{audio_filename}"
        
        html_response = convert_markdown_to_html(visible_response)
        
        return jsonify({
            "response_html": html_response,
            "response_text": visible_response,
            "audio_url": audio_url
        })
        
    except Exception as e:
        app.logger.error(f"Chat Error: {e}")
        return jsonify({"error": str(e)}), 500

@app.route('/generate-audio', methods=['POST'])
def generate_audio():
    try:
        data = request.json
        text_to_speak = data.get('text')

        if not text_to_speak:
            return jsonify({"error": "No text provided"}), 400

        cleaned_text = re.sub(r'[\*_`#]', '', text_to_speak)
        cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()

        if not cleaned_text:
            return jsonify({"error": "Text became empty after cleaning"}), 400

        filename = f"{uuid.uuid4()}.mp3"
        filepath = os.path.join(AUDIO_FOLDER, filename)

        tts = gTTS(text=cleaned_text, lang='en', slow=False)
        tts.save(filepath)

        audio_url = f"/static/audio/{filename}"
        return jsonify({"audio_url": audio_url})

    except Exception as e:
        app.logger.error(f"Audio Generation Error: {e}")
        return jsonify({"error": str(e)}), 500

@app.route('/static/audio/<filename>')
def serve_audio(filename):
    return send_from_directory(AUDIO_FOLDER, filename)

@app.route('/')
def serve_index():
    return send_from_directory('static', 'index.html')

@app.route('/<path:path>')
def serve_static(path):
    return send_from_directory('static', path)

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860)