Spaces:

ramimu
/

voice_cloning

Sleeping

App Files Files Community

ramimu commited on May 31

Commit

01fd073

verified ·

1 Parent(s): deb04b6

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -19

app.py CHANGED Viewed

@@ -179,9 +179,9 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
         return None, "Error: Please upload a reference audio file (.wav or .mp3)."
     try:
-        print(f"Received request:")
         print(f"  Text: '{text_to_speak}'")
-        print(f"  Audio: '{reference_audio_path}'")
         print(f"  Exaggeration: {exaggeration}")
         print(f"  CFG/Pace: {cfg_pace}")
         print(f"  Random Seed: {random_seed}")
@@ -206,7 +206,7 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
         except:
             sample_rate = 24000
-        print(f"Audio generated successfully. Output data type: {type(output_wav_data)}, Sample rate: {sample_rate}")
         if isinstance(output_wav_data, str):
             return output_wav_data, "Success: Audio generated successfully!"
@@ -219,11 +219,12 @@ def clone_voice(text_to_speak, reference_audio_path, exaggeration=0.6, cfg_pace=
             return (sample_rate, output_wav_data), "Success: Audio generated successfully!"
     except Exception as e:
-        print(f"ERROR: Failed during audio generation: {e}")
-        print("Detailed error trace for audio generation:")
         traceback.print_exc()
         return None, f"Error during audio generation: {str(e)}. Check logs for more details."
 def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
     import requests
     import tempfile
@@ -232,19 +233,33 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
     temp_audio_path = None
     try:
-        if reference_audio_url.startswith('data:audio'):
             header, encoded = reference_audio_url.split(',', 1)
             audio_data = base64.b64decode(encoded)
             if 'mp3' in header:
                 ext = '.mp3'
             elif 'wav' in header:
                 ext = '.wav'
             else:
                 ext = '.wav'
             with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
                 temp_file.write(audio_data)
                 temp_audio_path = temp_file.name
-        elif reference_audio_url.startswith('http'):
             response = requests.get(reference_audio_url)
             response.raise_for_status()
             if reference_audio_url.endswith('.mp3'):
@@ -252,33 +267,62 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
             elif reference_audio_url.endswith('.wav'):
                 ext = '.wav'
             else:
-                ext = '.wav'
             with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
                 temp_file.write(response.content)
                 temp_audio_path = temp_file.name
         else:
-            temp_audio_path = reference_audio_url
         audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
-        if temp_audio_path and temp_audio_path != reference_audio_url:
             try:
                 os.unlink(temp_audio_path)
-            except:
-                pass
         return audio_output, status
     except Exception as e:
-        if temp_audio_path and temp_audio_path != reference_audio_url:
             try:
-                os.unlink(temp_audio_path)
-            except:
-                pass
         return None, f"API Error: {str(e)}"
 def main():
     print("Starting Advanced Gradio interface...")
     iface = gr.Interface(
-        fn=clone_voice_api,
         inputs=[
             gr.Textbox(
                 label="Text to Speak",
@@ -286,7 +330,7 @@ def main():
                 lines=3
             ),
             gr.Audio(
-                type="filepath",
                 label="Reference Audio (Upload a short .wav or .mp3 clip)",
                 sources=["upload", "microphone"]
             ),
@@ -339,8 +383,10 @@ def main():
         show_error=True,
         quiet=False,
         favicon_path=None,
-        share=False,
         auth=None
     )
 if __name__ == "__main__":

         return None, "Error: Please upload a reference audio file (.wav or .mp3)."
     try:
+        print(f"clone_voice function called:")
         print(f"  Text: '{text_to_speak}'")
+        print(f"  Audio Path: '{reference_audio_path}'")
         print(f"  Exaggeration: {exaggeration}")
         print(f"  CFG/Pace: {cfg_pace}")
         print(f"  Random Seed: {random_seed}")
         except:
             sample_rate = 24000
+        print(f"Audio generated successfully by clone_voice. Output data type: {type(output_wav_data)}, Sample rate: {sample_rate}")
         if isinstance(output_wav_data, str):
             return output_wav_data, "Success: Audio generated successfully!"
             return (sample_rate, output_wav_data), "Success: Audio generated successfully!"
     except Exception as e:
+        print(f"ERROR: Failed during audio generation in clone_voice: {e}")
+        print("Detailed error trace for audio generation in clone_voice:")
         traceback.print_exc()
         return None, f"Error during audio generation: {str(e)}. Check logs for more details."
+# Updated clone_voice_api function with detailed logging
 def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pace=0.3, random_seed=0, temperature=0.6):
     import requests
     import tempfile
     temp_audio_path = None
     try:
+        print(f"API call received by clone_voice_api:")
+        print(f"  Text: {text_to_speak}")
+        print(f"  Audio URL type: {type(reference_audio_url)}")
+        print(f"  Audio URL preview: {str(reference_audio_url)[:100]}...")
+        print(f"  Parameters: exag={exaggeration}, cfg={cfg_pace}, seed={random_seed}, temp={temperature}")
+        if isinstance(reference_audio_url, str) and reference_audio_url.startswith('data:audio'):
+            print("Processing base64 audio data...")
             header, encoded = reference_audio_url.split(',', 1)
             audio_data = base64.b64decode(encoded)
+            print(f"Decoded audio data size: {len(audio_data)} bytes")
             if 'mp3' in header:
                 ext = '.mp3'
             elif 'wav' in header:
                 ext = '.wav'
             else:
                 ext = '.wav'
             with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
                 temp_file.write(audio_data)
                 temp_audio_path = temp_file.name
+            print(f"Created temporary audio file from base64: {temp_audio_path}")
+        elif isinstance(reference_audio_url, str) and reference_audio_url.startswith('http'):
+            print("Processing HTTP audio URL...")
             response = requests.get(reference_audio_url)
             response.raise_for_status()
             if reference_audio_url.endswith('.mp3'):
             elif reference_audio_url.endswith('.wav'):
                 ext = '.wav'
             else:
+                ext = '.wav' # Default
             with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
                 temp_file.write(response.content)
                 temp_audio_path = temp_file.name
+            print(f"Created temporary audio file from URL: {temp_audio_path}")
+        elif isinstance(reference_audio_url, str) and os.path.exists(reference_audio_url):
+             print("Using direct file path provided as string...")
+             temp_audio_path = reference_audio_url
         else:
+            # This case might occur if Gradio passes a TemporaryFileWrapper or similar
+            if hasattr(reference_audio_url, 'name'): # Check if it's a file-like object from Gradio
+                 temp_audio_path = reference_audio_url.name
+                 print(f"Using file path from Gradio object: {temp_audio_path}")
+            else:
+                print(f"Warning: Unrecognized audio input type or path: {reference_audio_url}. Assuming it's a direct path.")
+                temp_audio_path = str(reference_audio_url) # Fallback, attempt to use as path
+        if not temp_audio_path or not os.path.exists(temp_audio_path):
+            raise ValueError(f"Failed to obtain a valid audio file path from input: {reference_audio_url}")
+        print(f"Calling core clone_voice function with audio path: {temp_audio_path}")
         audio_output, status = clone_voice(text_to_speak, temp_audio_path, exaggeration, cfg_pace, random_seed, temperature)
+        print(f"clone_voice returned: {type(audio_output)}, {status}")
+        # Clean up temporary file only if we created one from base64 or URL
+        if temp_audio_path and isinstance(reference_audio_url, str) and \
+           (reference_audio_url.startswith('data:audio') or reference_audio_url.startswith('http')):
             try:
                 os.unlink(temp_audio_path)
+                print(f"Cleaned up temporary file: {temp_audio_path}")
+            except Exception as e:
+                print(f"Failed to clean up temp file {temp_audio_path}: {e}")
         return audio_output, status
     except Exception as e:
+        print(f"ERROR in clone_voice_api: {e}")
+        import traceback # Ensure traceback is imported here if not globally
+        traceback.print_exc()
+        # Attempt to clean up temporary file in case of error too
+        if temp_audio_path and isinstance(reference_audio_url, str) and \
+           (reference_audio_url.startswith('data:audio') or reference_audio_url.startswith('http')):
             try:
+                if os.path.exists(temp_audio_path): # Check existence before unlinking
+                    os.unlink(temp_audio_path)
+                    print(f"Cleaned up temporary file after error: {temp_audio_path}")
+            except Exception as e_clean:
+                 print(f"Failed to clean up temp file {temp_audio_path} after error: {e_clean}")
         return None, f"API Error: {str(e)}"
 def main():
     print("Starting Advanced Gradio interface...")
     iface = gr.Interface(
+        fn=clone_voice, # The UI and default Gradio API will use clone_voice directly
         inputs=[
             gr.Textbox(
                 label="Text to Speak",
                 lines=3
             ),
             gr.Audio(
+                type="filepath", # Gradio handles file upload/mic and provides a filepath
                 label="Reference Audio (Upload a short .wav or .mp3 clip)",
                 sources=["upload", "microphone"]
             ),
         show_error=True,
         quiet=False,
         favicon_path=None,
+        share=False, # Set to True if you want a public link from your local machine
         auth=None
+        # app_kwargs for FastAPI specific settings are not directly used by gr.Interface.launch
+        # but if you were embedding in FastAPI, you'd pass them to FastAPI app.
     )
 if __name__ == "__main__":