Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -179,9 +179,9 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
|
|
179 |
return None, "Error: Please upload a reference audio file (.wav or .mp3)."
|
180 |
|
181 |
try:
|
182 |
-
print(f"
|
183 |
print(f" Text: '{text_to_speak}'")
|
184 |
-
print(f" Audio: '{reference_audio_path}'")
|
185 |
print(f" Exaggeration: {exaggeration}")
|
186 |
print(f" CFG/Pace: {cfg_pace}")
|
187 |
print(f" Random Seed: {random_seed}")
|
@@ -206,7 +206,7 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
|
|
206 |
except:
|
207 |
sample_rate = 24000
|
208 |
|
209 |
-
print(f"Audio generated successfully. Output data type: {type(output_wav_data)}, Sample rate: {sample_rate}")
|
210 |
|
211 |
if isinstance(output_wav_data, str):
|
212 |
return output_wav_data, "Success: Audio generated successfully!"
|
@@ -219,11 +219,12 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
|
|
219 |
return (sample_rate, output_wav_data), "Success: Audio generated successfully!"
|
220 |
|
221 |
except Exception as e:
|
222 |
-
print(f"ERROR: Failed during audio generation: {e}")
|
223 |
-
print("Detailed error trace for audio generation:")
|
224 |
traceback.print_exc()
|
225 |
return None, f"Error during audio generation: {str(e)}. Check logs for more details."
|
226 |
|
|
|
227 |
def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
|
228 |
import requests
|
229 |
import tempfile
|
@@ -232,19 +233,33 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
|
|
232 |
|
233 |
temp_audio_path = None
|
234 |
try:
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
header, encoded = reference_audio_url.split(',', 1)
|
237 |
audio_data = base64.b64decode(encoded)
|
|
|
|
|
238 |
if 'mp3' in header:
|
239 |
ext = '.mp3'
|
240 |
elif 'wav' in header:
|
241 |
ext = '.wav'
|
242 |
else:
|
243 |
ext = '.wav'
|
|
|
244 |
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
|
245 |
temp_file.write(audio_data)
|
246 |
temp_audio_path = temp_file.name
|
247 |
-
|
|
|
|
|
|
|
|
|
248 |
response = requests.get(reference_audio_url)
|
249 |
response.raise_for_status()
|
250 |
if reference_audio_url.endswith('.mp3'):
|
@@ -252,33 +267,62 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
|
|
252 |
elif reference_audio_url.endswith('.wav'):
|
253 |
ext = '.wav'
|
254 |
else:
|
255 |
-
ext = '.wav'
|
256 |
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
|
257 |
temp_file.write(response.content)
|
258 |
temp_audio_path = temp_file.name
|
|
|
|
|
|
|
|
|
259 |
else:
|
260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
|
|
|
262 |
audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
|
|
|
263 |
|
264 |
-
if
|
|
|
|
|
265 |
try:
|
266 |
os.unlink(temp_audio_path)
|
267 |
-
|
268 |
-
|
|
|
|
|
269 |
return audio_output, status
|
|
|
270 |
except Exception as e:
|
271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
try:
|
273 |
-
os.
|
274 |
-
|
275 |
-
|
|
|
|
|
276 |
return None, f"API Error: {str(e)}"
|
277 |
|
|
|
278 |
def main():
|
279 |
print("Starting Advanced Gradio interface...")
|
280 |
iface = gr.Interface(
|
281 |
-
fn=
|
282 |
inputs=[
|
283 |
gr.Textbox(
|
284 |
label="Text to Speak",
|
@@ -286,7 +330,7 @@ def main():
|
|
286 |
lines=3
|
287 |
),
|
288 |
gr.Audio(
|
289 |
-
type="filepath",
|
290 |
label="Reference Audio (Upload a short .wav or .mp3 clip)",
|
291 |
sources=["upload", "microphone"]
|
292 |
),
|
@@ -339,8 +383,10 @@ def main():
|
|
339 |
show_error=True,
|
340 |
quiet=False,
|
341 |
favicon_path=None,
|
342 |
-
share=False,
|
343 |
auth=None
|
|
|
|
|
344 |
)
|
345 |
|
346 |
if __name__ == "__main__":
|
|
|
179 |
return None, "Error: Please upload a reference audio file (.wav or .mp3)."
|
180 |
|
181 |
try:
|
182 |
+
print(f"clone_voice function called:")
|
183 |
print(f" Text: '{text_to_speak}'")
|
184 |
+
print(f" Audio Path: '{reference_audio_path}'")
|
185 |
print(f" Exaggeration: {exaggeration}")
|
186 |
print(f" CFG/Pace: {cfg_pace}")
|
187 |
print(f" Random Seed: {random_seed}")
|
|
|
206 |
except:
|
207 |
sample_rate = 24000
|
208 |
|
209 |
+
print(f"Audio generated successfully by clone_voice. Output data type: {type(output_wav_data)}, Sample rate: {sample_rate}")
|
210 |
|
211 |
if isinstance(output_wav_data, str):
|
212 |
return output_wav_data, "Success: Audio generated successfully!"
|
|
|
219 |
return (sample_rate, output_wav_data), "Success: Audio generated successfully!"
|
220 |
|
221 |
except Exception as e:
|
222 |
+
print(f"ERROR: Failed during audio generation in clone_voice: {e}")
|
223 |
+
print("Detailed error trace for audio generation in clone_voice:")
|
224 |
traceback.print_exc()
|
225 |
return None, f"Error during audio generation: {str(e)}. Check logs for more details."
|
226 |
|
227 |
+
# Updated clone_voice_api function with detailed logging
|
228 |
def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
|
229 |
import requests
|
230 |
import tempfile
|
|
|
233 |
|
234 |
temp_audio_path = None
|
235 |
try:
|
236 |
+
print(f"API call received by clone_voice_api:")
|
237 |
+
print(f" Text: {text_to_speak}")
|
238 |
+
print(f" Audio URL type: {type(reference_audio_url)}")
|
239 |
+
print(f" Audio URL preview: {str(reference_audio_url)[:100]}...")
|
240 |
+
print(f" Parameters: exag={exaggeration}, cfg={cfg_pace}, seed={random_seed}, temp={temperature}")
|
241 |
+
|
242 |
+
if isinstance(reference_audio_url, str) and reference_audio_url.startswith('data:audio'):
|
243 |
+
print("Processing base64 audio data...")
|
244 |
header, encoded = reference_audio_url.split(',', 1)
|
245 |
audio_data = base64.b64decode(encoded)
|
246 |
+
print(f"Decoded audio data size: {len(audio_data)} bytes")
|
247 |
+
|
248 |
if 'mp3' in header:
|
249 |
ext = '.mp3'
|
250 |
elif 'wav' in header:
|
251 |
ext = '.wav'
|
252 |
else:
|
253 |
ext = '.wav'
|
254 |
+
|
255 |
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
|
256 |
temp_file.write(audio_data)
|
257 |
temp_audio_path = temp_file.name
|
258 |
+
|
259 |
+
print(f"Created temporary audio file from base64: {temp_audio_path}")
|
260 |
+
|
261 |
+
elif isinstance(reference_audio_url, str) and reference_audio_url.startswith('http'):
|
262 |
+
print("Processing HTTP audio URL...")
|
263 |
response = requests.get(reference_audio_url)
|
264 |
response.raise_for_status()
|
265 |
if reference_audio_url.endswith('.mp3'):
|
|
|
267 |
elif reference_audio_url.endswith('.wav'):
|
268 |
ext = '.wav'
|
269 |
else:
|
270 |
+
ext = '.wav' # Default
|
271 |
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
|
272 |
temp_file.write(response.content)
|
273 |
temp_audio_path = temp_file.name
|
274 |
+
print(f"Created temporary audio file from URL: {temp_audio_path}")
|
275 |
+
elif isinstance(reference_audio_url, str) and os.path.exists(reference_audio_url):
|
276 |
+
print("Using direct file path provided as string...")
|
277 |
+
temp_audio_path = reference_audio_url
|
278 |
else:
|
279 |
+
# This case might occur if Gradio passes a TemporaryFileWrapper or similar
|
280 |
+
if hasattr(reference_audio_url, 'name'): # Check if it's a file-like object from Gradio
|
281 |
+
temp_audio_path = reference_audio_url.name
|
282 |
+
print(f"Using file path from Gradio object: {temp_audio_path}")
|
283 |
+
else:
|
284 |
+
print(f"Warning: Unrecognized audio input type or path: {reference_audio_url}. Assuming it's a direct path.")
|
285 |
+
temp_audio_path = str(reference_audio_url) # Fallback, attempt to use as path
|
286 |
+
|
287 |
+
if not temp_audio_path or not os.path.exists(temp_audio_path):
|
288 |
+
raise ValueError(f"Failed to obtain a valid audio file path from input: {reference_audio_url}")
|
289 |
|
290 |
+
print(f"Calling core clone_voice function with audio path: {temp_audio_path}")
|
291 |
audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
|
292 |
+
print(f"clone_voice returned: {type(audio_output)}, {status}")
|
293 |
|
294 |
+
# Clean up temporary file only if we created one from base64 or URL
|
295 |
+
if temp_audio_path and isinstance(reference_audio_url, str) and \
|
296 |
+
(reference_audio_url.startswith('data:audio') or reference_audio_url.startswith('http')):
|
297 |
try:
|
298 |
os.unlink(temp_audio_path)
|
299 |
+
print(f"Cleaned up temporary file: {temp_audio_path}")
|
300 |
+
except Exception as e:
|
301 |
+
print(f"Failed to clean up temp file {temp_audio_path}: {e}")
|
302 |
+
|
303 |
return audio_output, status
|
304 |
+
|
305 |
except Exception as e:
|
306 |
+
print(f"ERROR in clone_voice_api: {e}")
|
307 |
+
import traceback # Ensure traceback is imported here if not globally
|
308 |
+
traceback.print_exc()
|
309 |
+
|
310 |
+
# Attempt to clean up temporary file in case of error too
|
311 |
+
if temp_audio_path and isinstance(reference_audio_url, str) and \
|
312 |
+
(reference_audio_url.startswith('data:audio') or reference_audio_url.startswith('http')):
|
313 |
try:
|
314 |
+
if os.path.exists(temp_audio_path): # Check existence before unlinking
|
315 |
+
os.unlink(temp_audio_path)
|
316 |
+
print(f"Cleaned up temporary file after error: {temp_audio_path}")
|
317 |
+
except Exception as e_clean:
|
318 |
+
print(f"Failed to clean up temp file {temp_audio_path} after error: {e_clean}")
|
319 |
return None, f"API Error: {str(e)}"
|
320 |
|
321 |
+
|
322 |
def main():
|
323 |
print("Starting Advanced Gradio interface...")
|
324 |
iface = gr.Interface(
|
325 |
+
fn=clone_voice, # The UI and default Gradio API will use clone_voice directly
|
326 |
inputs=[
|
327 |
gr.Textbox(
|
328 |
label="Text to Speak",
|
|
|
330 |
lines=3
|
331 |
),
|
332 |
gr.Audio(
|
333 |
+
type="filepath", # Gradio handles file upload/mic and provides a filepath
|
334 |
label="Reference Audio (Upload a short .wav or .mp3 clip)",
|
335 |
sources=["upload", "microphone"]
|
336 |
),
|
|
|
383 |
show_error=True,
|
384 |
quiet=False,
|
385 |
favicon_path=None,
|
386 |
+
share=False, # Set to True if you want a public link from your local machine
|
387 |
auth=None
|
388 |
+
# app_kwargs for FastAPI specific settings are not directly used by gr.Interface.launch
|
389 |
+
# but if you were embedding in FastAPI, you'd pass them to FastAPI app.
|
390 |
)
|
391 |
|
392 |
if __name__ == "__main__":
|