ramimu commited on
Commit
01fd073
·
verified ·
1 Parent(s): deb04b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -19
app.py CHANGED
@@ -179,9 +179,9 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
179
  return None, "Error: Please upload a reference audio file (.wav or .mp3)."
180
 
181
  try:
182
- print(f"Received request:")
183
  print(f" Text: '{text_to_speak}'")
184
- print(f" Audio: '{reference_audio_path}'")
185
  print(f" Exaggeration: {exaggeration}")
186
  print(f" CFG/Pace: {cfg_pace}")
187
  print(f" Random Seed: {random_seed}")
@@ -206,7 +206,7 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
206
  except:
207
  sample_rate = 24000
208
 
209
- print(f"Audio generated successfully. Output data type: {type(output_wav_data)}, Sample rate: {sample_rate}")
210
 
211
  if isinstance(output_wav_data, str):
212
  return output_wav_data, "Success: Audio generated successfully!"
@@ -219,11 +219,12 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
219
  return (sample_rate, output_wav_data), "Success: Audio generated successfully!"
220
 
221
  except Exception as e:
222
- print(f"ERROR: Failed during audio generation: {e}")
223
- print("Detailed error trace for audio generation:")
224
  traceback.print_exc()
225
  return None, f"Error during audio generation: {str(e)}. Check logs for more details."
226
 
 
227
  def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
228
  import requests
229
  import tempfile
@@ -232,19 +233,33 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
232
 
233
  temp_audio_path = None
234
  try:
235
- if reference_audio_url.startswith('data:audio'):
 
 
 
 
 
 
 
236
  header, encoded = reference_audio_url.split(',', 1)
237
  audio_data = base64.b64decode(encoded)
 
 
238
  if 'mp3' in header:
239
  ext = '.mp3'
240
  elif 'wav' in header:
241
  ext = '.wav'
242
  else:
243
  ext = '.wav'
 
244
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
245
  temp_file.write(audio_data)
246
  temp_audio_path = temp_file.name
247
- elif reference_audio_url.startswith('http'):
 
 
 
 
248
  response = requests.get(reference_audio_url)
249
  response.raise_for_status()
250
  if reference_audio_url.endswith('.mp3'):
@@ -252,33 +267,62 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
252
  elif reference_audio_url.endswith('.wav'):
253
  ext = '.wav'
254
  else:
255
- ext = '.wav'
256
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
257
  temp_file.write(response.content)
258
  temp_audio_path = temp_file.name
 
 
 
 
259
  else:
260
- temp_audio_path = reference_audio_url
 
 
 
 
 
 
 
 
 
261
 
 
262
  audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
 
263
 
264
- if temp_audio_path and temp_audio_path != reference_audio_url:
 
 
265
  try:
266
  os.unlink(temp_audio_path)
267
- except:
268
- pass
 
 
269
  return audio_output, status
 
270
  except Exception as e:
271
- if temp_audio_path and temp_audio_path != reference_audio_url:
 
 
 
 
 
 
272
  try:
273
- os.unlink(temp_audio_path)
274
- except:
275
- pass
 
 
276
  return None, f"API Error: {str(e)}"
277
 
 
278
  def main():
279
  print("Starting Advanced Gradio interface...")
280
  iface = gr.Interface(
281
- fn=clone_voice_api,
282
  inputs=[
283
  gr.Textbox(
284
  label="Text to Speak",
@@ -286,7 +330,7 @@ def main():
286
  lines=3
287
  ),
288
  gr.Audio(
289
- type="filepath",
290
  label="Reference Audio (Upload a short .wav or .mp3 clip)",
291
  sources=["upload", "microphone"]
292
  ),
@@ -339,8 +383,10 @@ def main():
339
  show_error=True,
340
  quiet=False,
341
  favicon_path=None,
342
- share=False,
343
  auth=None
 
 
344
  )
345
 
346
  if __name__ == "__main__":
 
179
  return None, "Error: Please upload a reference audio file (.wav or .mp3)."
180
 
181
  try:
182
+ print(f"clone_voice function called:")
183
  print(f" Text: '{text_to_speak}'")
184
+ print(f" Audio Path: '{reference_audio_path}'")
185
  print(f" Exaggeration: {exaggeration}")
186
  print(f" CFG/Pace: {cfg_pace}")
187
  print(f" Random Seed: {random_seed}")
 
206
  except:
207
  sample_rate = 24000
208
 
209
+ print(f"Audio generated successfully by clone_voice. Output data type: {type(output_wav_data)}, Sample rate: {sample_rate}")
210
 
211
  if isinstance(output_wav_data, str):
212
  return output_wav_data, "Success: Audio generated successfully!"
 
219
  return (sample_rate, output_wav_data), "Success: Audio generated successfully!"
220
 
221
  except Exception as e:
222
+ print(f"ERROR: Failed during audio generation in clone_voice: {e}")
223
+ print("Detailed error trace for audio generation in clone_voice:")
224
  traceback.print_exc()
225
  return None, f"Error during audio generation: {str(e)}. Check logs for more details."
226
 
227
+ # Updated clone_voice_api function with detailed logging
228
  def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
229
  import requests
230
  import tempfile
 
233
 
234
  temp_audio_path = None
235
  try:
236
+ print(f"API call received by clone_voice_api:")
237
+ print(f" Text: {text_to_speak}")
238
+ print(f" Audio URL type: {type(reference_audio_url)}")
239
+ print(f" Audio URL preview: {str(reference_audio_url)[:100]}...")
240
+ print(f" Parameters: exag={exaggeration}, cfg={cfg_pace}, seed={random_seed}, temp={temperature}")
241
+
242
+ if isinstance(reference_audio_url, str) and reference_audio_url.startswith('data:audio'):
243
+ print("Processing base64 audio data...")
244
  header, encoded = reference_audio_url.split(',', 1)
245
  audio_data = base64.b64decode(encoded)
246
+ print(f"Decoded audio data size: {len(audio_data)} bytes")
247
+
248
  if 'mp3' in header:
249
  ext = '.mp3'
250
  elif 'wav' in header:
251
  ext = '.wav'
252
  else:
253
  ext = '.wav'
254
+
255
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
256
  temp_file.write(audio_data)
257
  temp_audio_path = temp_file.name
258
+
259
+ print(f"Created temporary audio file from base64: {temp_audio_path}")
260
+
261
+ elif isinstance(reference_audio_url, str) and reference_audio_url.startswith('http'):
262
+ print("Processing HTTP audio URL...")
263
  response = requests.get(reference_audio_url)
264
  response.raise_for_status()
265
  if reference_audio_url.endswith('.mp3'):
 
267
  elif reference_audio_url.endswith('.wav'):
268
  ext = '.wav'
269
  else:
270
+ ext = '.wav' # Default
271
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
272
  temp_file.write(response.content)
273
  temp_audio_path = temp_file.name
274
+ print(f"Created temporary audio file from URL: {temp_audio_path}")
275
+ elif isinstance(reference_audio_url, str) and os.path.exists(reference_audio_url):
276
+ print("Using direct file path provided as string...")
277
+ temp_audio_path = reference_audio_url
278
  else:
279
+ # This case might occur if Gradio passes a TemporaryFileWrapper or similar
280
+ if hasattr(reference_audio_url, 'name'): # Check if it's a file-like object from Gradio
281
+ temp_audio_path = reference_audio_url.name
282
+ print(f"Using file path from Gradio object: {temp_audio_path}")
283
+ else:
284
+ print(f"Warning: Unrecognized audio input type or path: {reference_audio_url}. Assuming it's a direct path.")
285
+ temp_audio_path = str(reference_audio_url) # Fallback, attempt to use as path
286
+
287
+ if not temp_audio_path or not os.path.exists(temp_audio_path):
288
+ raise ValueError(f"Failed to obtain a valid audio file path from input: {reference_audio_url}")
289
 
290
+ print(f"Calling core clone_voice function with audio path: {temp_audio_path}")
291
  audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
292
+ print(f"clone_voice returned: {type(audio_output)}, {status}")
293
 
294
+ # Clean up temporary file only if we created one from base64 or URL
295
+ if temp_audio_path and isinstance(reference_audio_url, str) and \
296
+ (reference_audio_url.startswith('data:audio') or reference_audio_url.startswith('http')):
297
  try:
298
  os.unlink(temp_audio_path)
299
+ print(f"Cleaned up temporary file: {temp_audio_path}")
300
+ except Exception as e:
301
+ print(f"Failed to clean up temp file {temp_audio_path}: {e}")
302
+
303
  return audio_output, status
304
+
305
  except Exception as e:
306
+ print(f"ERROR in clone_voice_api: {e}")
307
+ import traceback # Ensure traceback is imported here if not globally
308
+ traceback.print_exc()
309
+
310
+ # Attempt to clean up temporary file in case of error too
311
+ if temp_audio_path and isinstance(reference_audio_url, str) and \
312
+ (reference_audio_url.startswith('data:audio') or reference_audio_url.startswith('http')):
313
  try:
314
+ if os.path.exists(temp_audio_path): # Check existence before unlinking
315
+ os.unlink(temp_audio_path)
316
+ print(f"Cleaned up temporary file after error: {temp_audio_path}")
317
+ except Exception as e_clean:
318
+ print(f"Failed to clean up temp file {temp_audio_path} after error: {e_clean}")
319
  return None, f"API Error: {str(e)}"
320
 
321
+
322
  def main():
323
  print("Starting Advanced Gradio interface...")
324
  iface = gr.Interface(
325
+ fn=clone_voice, # The UI and default Gradio API will use clone_voice directly
326
  inputs=[
327
  gr.Textbox(
328
  label="Text to Speak",
 
330
  lines=3
331
  ),
332
  gr.Audio(
333
+ type="filepath", # Gradio handles file upload/mic and provides a filepath
334
  label="Reference Audio (Upload a short .wav or .mp3 clip)",
335
  sources=["upload", "microphone"]
336
  ),
 
383
  show_error=True,
384
  quiet=False,
385
  favicon_path=None,
386
+ share=False, # Set to True if you want a public link from your local machine
387
  auth=None
388
+ # app_kwargs for FastAPI specific settings are not directly used by gr.Interface.launch
389
+ # but if you were embedding in FastAPI, you'd pass them to FastAPI app.
390
  )
391
 
392
  if __name__ == "__main__":