SolarumAsteridion commited on
Commit
40b0211
·
verified ·
1 Parent(s): 9668073

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -0
app.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # To run this code you need to install the following dependencies:
2
+ # pip install flask google-genai
3
+
4
+ import base64
5
+ import mimetypes
6
+ import os
7
+ import re
8
+ import struct
9
+ import tempfile
10
+ from datetime import datetime
11
+ from flask import Flask, render_template, request, jsonify, send_file
12
+ from google import genai
13
+ from google.genai import types
14
+ import io
15
+
16
+ app = Flask(__name__)
17
+
18
+ # Store the latest generated audio in memory
19
+ latest_audio = None
20
+
21
+ def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
22
+ """Generates a WAV file header for the given audio data and parameters."""
23
+ parameters = parse_audio_mime_type(mime_type)
24
+ bits_per_sample = parameters["bits_per_sample"]
25
+ sample_rate = parameters["rate"]
26
+ num_channels = 1
27
+ data_size = len(audio_data)
28
+ bytes_per_sample = bits_per_sample // 8
29
+ block_align = num_channels * bytes_per_sample
30
+ byte_rate = sample_rate * block_align
31
+ chunk_size = 36 + data_size
32
+
33
+ header = struct.pack(
34
+ "<4sI4s4sIHHIIHH4sI",
35
+ b"RIFF",
36
+ chunk_size,
37
+ b"WAVE",
38
+ b"fmt ",
39
+ 16,
40
+ 1,
41
+ num_channels,
42
+ sample_rate,
43
+ byte_rate,
44
+ block_align,
45
+ bits_per_sample,
46
+ b"data",
47
+ data_size
48
+ )
49
+ return header + audio_data
50
+
51
+ def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
52
+ """Parses bits per sample and rate from an audio MIME type string."""
53
+ bits_per_sample = 16
54
+ rate = 24000
55
+
56
+ parts = mime_type.split(";")
57
+ for param in parts:
58
+ param = param.strip()
59
+ if param.lower().startswith("rate="):
60
+ try:
61
+ rate_str = param.split("=", 1)[1]
62
+ rate = int(rate_str)
63
+ except (ValueError, IndexError):
64
+ pass
65
+ elif param.startswith("audio/L"):
66
+ try:
67
+ bits_per_sample = int(param.split("L", 1)[1])
68
+ except (ValueError, IndexError):
69
+ pass
70
+
71
+ return {"bits_per_sample": bits_per_sample, "rate": rate}
72
+
73
+ def generate_audio(text, voice="Zephyr", accent_type="hindi"):
74
+ """Generate audio from text using Gemini TTS"""
75
+ global latest_audio
76
+
77
+ client = genai.Client(
78
+ api_key=os.environ.get("GEMINI_API_KEY"),
79
+ )
80
+
81
+ model = "gemini-2.5-flash-preview-tts"
82
+
83
+ # Different accent prompts
84
+ accent_prompts = {
85
+ "hindi": "Speak with a clear Indian Hindi accent, with moderate intonation and expressiveness.",
86
+ "neutral": "Speak in a clear, neutral accent:",
87
+ "british": "Speak with a British English accent:",
88
+ "american": "Speak with an American English accent:"
89
+ }
90
+
91
+ prompt_text = f"{accent_prompts.get(accent_type, accent_prompts['hindi'])}\n\n{text}"
92
+
93
+ contents = [
94
+ types.Content(
95
+ role="user",
96
+ parts=[
97
+ types.Part.from_text(text=prompt_text),
98
+ ],
99
+ ),
100
+ ]
101
+
102
+ generate_content_config = types.GenerateContentConfig(
103
+ temperature=0.5,
104
+ seed=42,
105
+ response_modalities=["audio"],
106
+ speech_config=types.SpeechConfig(
107
+ voice_config=types.VoiceConfig(
108
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
109
+ voice_name=voice
110
+ )
111
+ )
112
+ ),
113
+ )
114
+
115
+ audio_data = None
116
+ mime_type = None
117
+
118
+ for chunk in client.models.generate_content_stream(
119
+ model=model,
120
+ contents=contents,
121
+ config=generate_content_config,
122
+ ):
123
+ if (
124
+ chunk.candidates is None
125
+ or chunk.candidates[0].content is None
126
+ or chunk.candidates[0].content.parts is None
127
+ ):
128
+ continue
129
+ if chunk.candidates[0].content.parts[0].inline_data and chunk.candidates[0].content.parts[0].inline_data.data:
130
+ inline_data = chunk.candidates[0].content.parts[0].inline_data
131
+ audio_data = inline_data.data
132
+ mime_type = inline_data.mime_type
133
+ break
134
+
135
+ if audio_data:
136
+ # Convert to WAV format
137
+ wav_data = convert_to_wav(audio_data, mime_type)
138
+ latest_audio = wav_data
139
+ return True
140
+ return False
141
+
142
+ @app.route('/')
143
+ def index():
144
+ return render_template('index.html')
145
+
146
+ @app.route('/generate', methods=['POST'])
147
+ def generate():
148
+ try:
149
+ data = request.json
150
+ text = data.get('text', '')
151
+ voice = data.get('voice', 'Zephyr')
152
+ accent = data.get('accent', 'hindi')
153
+
154
+ if not text:
155
+ return jsonify({'error': 'Text is required'}), 400
156
+
157
+ # Generate audio
158
+ success = generate_audio(text, voice, accent)
159
+
160
+ if success and latest_audio:
161
+ # Convert to base64 for sending to frontend
162
+ audio_base64 = base64.b64encode(latest_audio).decode('utf-8')
163
+ return jsonify({
164
+ 'success': True,
165
+ 'audio': audio_base64
166
+ })
167
+ else:
168
+ return jsonify({'error': 'Failed to generate audio'}), 500
169
+
170
+ except Exception as e:
171
+ return jsonify({'error': str(e)}), 500
172
+
173
+ @app.route('/download')
174
+ def download():
175
+ if latest_audio:
176
+ return send_file(
177
+ io.BytesIO(latest_audio),
178
+ mimetype='audio/wav',
179
+ as_attachment=True,
180
+ download_name=f'generated_audio_{datetime.now().strftime("%Y%m%d_%H%M%S")}.wav'
181
+ )
182
+ return jsonify({'error': 'No audio available'}), 404
183
+
184
+ if __name__ == '__main__':
185
+ app.run(debug=True, port=5000)