shukdevdatta123 commited on
Commit
4999708
·
verified ·
1 Parent(s): f75668a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -89
app.py CHANGED
@@ -1,105 +1,52 @@
1
- # !pip install TTS gradio numpy librosa torch soundfile
2
-
3
- from TTS.api import TTS
4
  import gradio as gr
5
- import numpy as np
6
- import librosa
7
- import torch
8
  import tempfile
9
- import os
10
- import soundfile as sf # Added for better audio handling
11
-
12
- # Check device availability
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
- # Initialize TTS model with device parameter
16
- model_name = "tts_models/multilingual/multi-dataset/your_tts"
17
- tts = TTS(model_name=model_name).to(device) # This line is the problem
18
 
19
- def process_audio(audio_path, max_duration=10):
20
- """Load and trim audio to specified duration"""
21
- y, sr = librosa.load(audio_path, sr=16000, mono=True)
22
- max_samples = max_duration * sr
23
- if len(y) > max_samples:
24
- y = y[:int(max_samples)]
25
- return y, sr
26
-
27
- def generate_speech(audio_file, text):
28
- # Create temp files
29
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ref_file, \
30
- tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_file:
31
-
32
- ref_path = ref_file.name
33
- out_path = out_file.name
34
 
35
- # Process reference audio
36
- y, sr = process_audio(audio_file)
37
- sf.write(ref_path, y, sr) # Using soundfile instead of librosa for writing
 
 
38
 
39
- # Generate speech
40
- try:
41
- tts.tts_to_file(
42
- text=text,
43
- speaker_wav=ref_path,
44
- language="en",
45
- file_path=out_path
46
- )
47
-
48
- # Clean up temporary files
49
- os.unlink(ref_path)
50
- return out_path
51
- except Exception as e:
52
- print(f"Error: {e}")
53
- return None
54
 
55
- # Gradio interface
56
- with gr.Blocks(title="Voice Clone TTS") as demo:
57
- gr.Markdown("""
58
- # 🎤 Voice Clone Text-to-Speech
59
- 1. Upload a short English voice sample (5-10 seconds)
60
- 2. Enter text you want to speak
61
- 3. Generate audio in your voice!
62
- """)
63
 
64
  with gr.Row():
65
- with gr.Column():
66
- audio_input = gr.Audio(
67
- sources=["upload", "microphone"],
68
- type="filepath",
69
- label="Upload Voice Sample",
70
- interactive=True
71
- )
72
- text_input = gr.Textbox(
73
- label="Text to Speak",
74
- placeholder="Enter English text here...",
75
- lines=4
76
- )
77
- btn = gr.Button("Generate Speech", variant="primary")
78
-
79
- with gr.Column():
80
- audio_output = gr.Audio(
81
- label="Generated Speech",
82
- interactive=False
83
- )
84
- error_output = gr.Textbox(label="Processing Info", visible=False)
85
 
86
- # Example inputs
87
- gr.Examples(
88
- examples=[
89
- ["examples/sample_voice.wav", "Hello! Welcome to the future of voice cloning technology"],
90
- ["examples/sample_voice2.wav", "This text is spoken in a completely cloned voice"]
91
- ],
92
- inputs=[audio_input, text_input],
93
- outputs=audio_output,
94
- fn=generate_speech,
95
- cache_examples=False # Disabled cache to avoid potential issues
96
- )
97
 
98
- btn.click(
 
99
  fn=generate_speech,
100
  inputs=[audio_input, text_input],
101
  outputs=audio_output
102
  )
103
 
104
- if __name__ == "__main__":
105
- demo.launch(server_port=7860, share=True)
 
 
 
 
1
  import gradio as gr
2
+ from bark import SAMPLE_RATE, generate_audio, preload_models
3
+ from scipy.io.wavfile import write as write_wav
 
4
  import tempfile
 
 
 
 
 
5
 
6
+ # Preload the models at startup
7
+ preload_models()
 
8
 
9
+ def generate_speech(reference_audio, text):
10
+ """
11
+ Generate speech audio mimicking the voice from the reference audio using Bark.
12
+
13
+ Parameters:
14
+ reference_audio (str): Filepath to the uploaded voice sample.
15
+ text (str): Text to convert to speech.
 
 
 
 
 
 
 
 
16
 
17
+ Returns:
18
+ str: Path to the generated audio file
19
+ """
20
+ # Generate speech using the reference audio and text
21
+ audio_array = generate_audio(text, history_prompt=reference_audio)
22
 
23
+ # Create a temporary file to save the audio
24
+ temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
25
+ temp_file_path = temp_file.name
26
+ # Save the audio to the temporary file
27
+ write_wav(temp_file_path, SAMPLE_RATE, audio_array)
28
+ temp_file.close()
29
+
30
+ return temp_file_path
 
 
 
 
 
 
 
31
 
32
+ # Build the Gradio interface
33
+ with gr.Blocks(title="Voice Cloning TTS with Bark") as app:
34
+ gr.Markdown("## Voice Cloning Text-to-Speech with Bark")
35
+ gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
 
 
 
 
36
 
37
  with gr.Row():
38
+ audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
39
+ text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ generate_btn = gr.Button("Generate Speech")
42
+ audio_output = gr.Audio(label="Generated Speech", interactive=False)
 
 
 
 
 
 
 
 
 
43
 
44
+ # Connect the button to the generation function
45
+ generate_btn.click(
46
  fn=generate_speech,
47
  inputs=[audio_input, text_input],
48
  outputs=audio_output
49
  )
50
 
51
+ # Launch the application
52
+ app.launch()