shukdevdatta123 commited on
Commit
4760b00
·
verified ·
1 Parent(s): af961e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -39
app.py CHANGED
@@ -1,57 +1,104 @@
1
- import gradio as gr
 
2
  from TTS.api import TTS
 
3
  import numpy as np
4
- from scipy.io import wavfile
 
5
  import tempfile
6
  import os
7
 
8
- # Load the YourTTS model once at startup
9
- tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
10
- sample_rate = tts.synthesizer.output_sample_rate
11
 
12
- def generate_speech(reference_audio, text):
13
- """
14
- Generate speech audio mimicking the voice from the reference audio.
15
-
16
- Parameters:
17
- reference_audio (str): Filepath to the uploaded voice sample.
18
- text (str): Text to convert to speech.
19
-
20
- Returns:
21
- str: Path to the generated audio file
22
- """
23
- # Generate speech using the reference audio and text
24
- wav = tts.tts(text=text, speaker_wav=reference_audio, language="en")
25
- # Convert list to numpy array
26
- wav_np = np.array(wav, dtype=np.float32)
 
 
 
 
27
 
28
- # Create a temporary file to save the audio
29
- temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
30
- temp_file_path = temp_file.name
31
- # Save the audio to the temporary file
32
- wavfile.write(temp_file_path, sample_rate, wav_np)
33
- temp_file.close()
34
 
35
- return temp_file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Build the Gradio interface
38
- with gr.Blocks(title="Voice Cloning TTS") as app:
39
- gr.Markdown("## Voice Cloning Text-to-Speech")
40
- gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
 
 
 
 
41
 
42
  with gr.Row():
43
- audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
44
- text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- generate_btn = gr.Button("Generate Speech")
47
- audio_output = gr.Audio(label="Generated Speech", interactive=False)
 
 
 
 
 
 
 
 
 
48
 
49
- # Connect the button to the generation function
50
- generate_btn.click(
51
  fn=generate_speech,
52
  inputs=[audio_input, text_input],
53
  outputs=audio_output
54
  )
55
 
56
- # Launch the application
57
- app.launch()
 
1
+ # !pip install TTS gradio numpy librosa torch
2
+
3
  from TTS.api import TTS
4
+ import gradio as gr
5
  import numpy as np
6
+ import librosa
7
+ import torch
8
  import tempfile
9
  import os
10
 
11
+ # Check device availability
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
13
 
14
+ # Initialize TTS model
15
+ model_name = "tts_models/multilingual/multi-dataset/your_tts"
16
+ tts = TTS(model_name=model_name).to(device)
17
+
18
+ def process_audio(audio_path, max_duration=10):
19
+ """Load and trim audio to specified duration"""
20
+ y, sr = librosa.load(audio_path, sr=16000, mono=True)
21
+ max_samples = max_duration * sr
22
+ if len(y) > max_samples:
23
+ y = y[:int(max_samples)]
24
+ return y, sr
25
+
26
+ def generate_speech(audio_file, text):
27
+ # Create temp files
28
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ref_file, \
29
+ tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_file:
30
+
31
+ ref_path = ref_file.name
32
+ out_path = out_file.name
33
 
34
+ # Process reference audio
35
+ y, sr = process_audio(audio_file)
36
+ librosa.output.write_wav(ref_path, y, sr)
 
 
 
37
 
38
+ # Generate speech
39
+ try:
40
+ tts.tts_to_file(
41
+ text=text,
42
+ speaker_wav=ref_path,
43
+ language="en",
44
+ file_path=out_path
45
+ )
46
+
47
+ # Clean up temporary files
48
+ os.unlink(ref_path)
49
+ return out_path
50
+ except Exception as e:
51
+ print(f"Error: {e}")
52
+ return None
53
 
54
+ # Gradio interface
55
+ with gr.Blocks(title="Voice Clone TTS") as demo:
56
+ gr.Markdown("""
57
+ # 🎤 Voice Clone Text-to-Speech
58
+ 1. Upload a short English voice sample (5-10 seconds)
59
+ 2. Enter text you want to speak
60
+ 3. Generate audio in your voice!
61
+ """)
62
 
63
  with gr.Row():
64
+ with gr.Column():
65
+ audio_input = gr.Audio(
66
+ sources=["upload", "microphone"],
67
+ type="filepath",
68
+ label="Upload Voice Sample",
69
+ interactive=True
70
+ )
71
+ text_input = gr.Textbox(
72
+ label="Text to Speak",
73
+ placeholder="Enter English text here...",
74
+ lines=4
75
+ )
76
+ btn = gr.Button("Generate Speech", variant="primary")
77
+
78
+ with gr.Column():
79
+ audio_output = gr.Audio(
80
+ label="Generated Speech",
81
+ interactive=False
82
+ )
83
+ error_output = gr.Textbox(label="Processing Info", visible=False)
84
 
85
+ # Example inputs
86
+ gr.Examples(
87
+ examples=[
88
+ ["examples/sample_voice.wav", "Hello! Welcome to the future of voice cloning technology"],
89
+ ["examples/sample_voice2.wav", "This text is spoken in a completely cloned voice"]
90
+ ],
91
+ inputs=[audio_input, text_input],
92
+ outputs=audio_output,
93
+ fn=generate_speech,
94
+ cache_examples=True
95
+ )
96
 
97
+ btn.click(
 
98
  fn=generate_speech,
99
  inputs=[audio_input, text_input],
100
  outputs=audio_output
101
  )
102
 
103
+ if __name__ == "__main__":
104
+ demo.launch(server_port=7860, share=True)