SohomToom commited on
Commit
96c6f9a
·
verified ·
1 Parent(s): f4a6ae0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -32
app.py CHANGED
@@ -137,36 +137,45 @@ def generate_sample_audio(sample_text, speaker_label):
137
 
138
  def generate_audio(docx_file, speaker_label):
139
  speaker_id = get_speaker_id_from_label(speaker_label)
140
- model = TTS("tts_models/en/vctk/vits")
141
-
142
- paragraphs = extract_paragraphs_from_docx(docx_file)
143
- combined_audio = AudioSegment.empty()
144
- temp_files = []
145
-
146
- try:
147
- for idx, para in enumerate(paragraphs):
148
- tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
149
- model.tts_to_file(text=para, speaker="p"+speaker_id, file_path=tmp.name)
150
- audio_chunk = AudioSegment.from_wav(tmp.name)
151
- combined_audio += audio_chunk
152
- temp_files.append(tmp.name)
153
- tmp.close()
154
-
155
- except Exception as e:
156
- print("Generation interrupted. Saving partial output.", e)
157
-
158
- output_dir = tempfile.mkdtemp()
159
- final_output_path = os.path.join(output_dir, "final_output.wav")
160
- combined_audio.export(final_output_path, format="wav")
161
-
162
- zip_path = os.path.join(output_dir, "output.zip")
163
- with zipfile.ZipFile(zip_path, 'w') as zipf:
164
- zipf.write(final_output_path, arcname="final_output.wav")
165
-
166
- for f in temp_files:
167
- os.remove(f)
168
-
169
- return zip_path
 
 
 
 
 
 
 
 
 
170
 
171
  # --- UI ---
172
  speaker_choices = list_speaker_choices()
@@ -177,14 +186,22 @@ with gr.Blocks() as demo:
177
  with gr.Row():
178
  speaker_dropdown = gr.Dropdown(label="Select Voice", choices=speaker_choices)
179
 
 
180
  with gr.Row():
181
  sample_textbox = gr.Textbox(label="Enter Sample Text (Max 500 characters)", max_lines=5)
182
  sample_button = gr.Button("Generate Sample")
183
  clear_button = gr.Button("Clear Sample")
184
 
 
 
 
185
  sample_audio = gr.Audio(label="Sample Output", type="filepath")
186
 
187
- sample_button.click(fn=generate_sample_audio, inputs=[sample_textbox, speaker_dropdown], outputs=[sample_audio])
 
 
 
 
188
  clear_button.click(fn=lambda: None, inputs=[], outputs=[sample_audio])
189
 
190
  with gr.Row():
@@ -192,7 +209,11 @@ with gr.Blocks() as demo:
192
  generate_button = gr.Button("Generate Full Audio")
193
  download_output = gr.File(label="Download Output Zip")
194
 
195
- generate_button.click(fn=generate_audio, inputs=[docx_input, speaker_dropdown], outputs=[download_output])
 
 
 
 
196
 
197
  if __name__ == "__main__":
198
  demo.launch()
 
137
 
138
  def generate_audio(docx_file, speaker_label):
139
  speaker_id = get_speaker_id_from_label(speaker_label)
140
+
141
+ if engine_choice == "Bark":
142
+ from bark import generate_audio
143
+ from bark.generation import preload_models
144
+ preload_models()
145
+ audio_array = generate_audio(sample_text)
146
+ tmp_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
147
+ AudioSegment(audio_array.tobytes(), frame_rate=24000, sample_width=2, channels=1).export(tmp_path, format="wav")
148
+ return tmp_path
149
+ else:
150
+ model = TTS("tts_models/en/vctk/vits")
151
+ paragraphs = extract_paragraphs_from_docx(docx_file)
152
+ combined_audio = AudioSegment.empty()
153
+ temp_files = []
154
+
155
+ try:
156
+ for idx, para in enumerate(paragraphs):
157
+ tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
158
+ model.tts_to_file(text=para, speaker="p"+speaker_id, file_path=tmp.name)
159
+ audio_chunk = AudioSegment.from_wav(tmp.name)
160
+ combined_audio += audio_chunk
161
+ temp_files.append(tmp.name)
162
+ tmp.close()
163
+
164
+ except Exception as e:
165
+ print("Generation interrupted. Saving partial output.", e)
166
+
167
+ output_dir = tempfile.mkdtemp()
168
+ final_output_path = os.path.join(output_dir, "final_output.wav")
169
+ combined_audio.export(final_output_path, format="wav")
170
+
171
+ zip_path = os.path.join(output_dir, "output.zip")
172
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
173
+ zipf.write(final_output_path, arcname="final_output.wav")
174
+
175
+ for f in temp_files:
176
+ os.remove(f)
177
+
178
+ return zip_path
179
 
180
  # --- UI ---
181
  speaker_choices = list_speaker_choices()
 
186
  with gr.Row():
187
  speaker_dropdown = gr.Dropdown(label="Select Voice", choices=speaker_choices)
188
 
189
+
190
  with gr.Row():
191
  sample_textbox = gr.Textbox(label="Enter Sample Text (Max 500 characters)", max_lines=5)
192
  sample_button = gr.Button("Generate Sample")
193
  clear_button = gr.Button("Clear Sample")
194
 
195
+ tts_engine_dropdown = gr.Dropdown(label="TTS Engine", choices=["Coqui (XTTS)", "Bark"], value="Coqui (XTTS)")
196
+
197
+
198
  sample_audio = gr.Audio(label="Sample Output", type="filepath")
199
 
200
+ sample_button.click(
201
+ fn=generate_sample_audio,
202
+ inputs=[sample_textbox, speaker_dropdown, tts_engine_dropdown],
203
+ outputs=[sample_audio]
204
+ )
205
  clear_button.click(fn=lambda: None, inputs=[], outputs=[sample_audio])
206
 
207
  with gr.Row():
 
209
  generate_button = gr.Button("Generate Full Audio")
210
  download_output = gr.File(label="Download Output Zip")
211
 
212
+ generate_button.click(
213
+ fn=generate_audio,
214
+ inputs=[docx_input, speaker_dropdown, tts_engine_dropdown],
215
+ outputs=[download_output]
216
+ )
217
 
218
  if __name__ == "__main__":
219
  demo.launch()