File size: 1,982 Bytes
f41d3a4
c05c224
0bcf9d7
1177ab1
725bb8a
88a9a72
 
55d6b51
4f47fc6
 
c05c224
55d6b51
 
725bb8a
55d6b51
 
 
 
 
 
 
 
 
 
 
 
 
 
c68d673
0bcf9d7
 
 
 
c05c224
0bcf9d7
 
 
4f47fc6
f41d3a4
f86b0c1
774ebda
f41d3a4
774ebda
 
 
 
f41d3a4
 
f6fc20c
 
f41d3a4
 
 
 
 
774ebda
 
f41d3a4
 
 
774ebda
f41d3a4
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import torch
import gradio as gr
from TTS.tts.models.xtts import XttsArgs 

from TTS.api import TTS


# βœ… Accept Coqui License Automatically
os.environ["COQUI_TOS_AGREED"] = "1"


torch.serialization.safe_globals([XttsArgs])
torch.serialization.add_safe_globals([XttsConfig])

# βœ… Force full checkpoint loading
def safe_load_checkpoint(model_path):
    return torch.load(model_path, map_location="cpu", weights_only=False)  # βœ… Force full deserialization

# βœ… Initialize TTS model
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name=model_name).to("cpu")  # βœ… Ensure CPU usage


# βœ… Accept Coqui License Automatically
os.environ["COQUI_TOS_AGREED"] = "1"


torch.serialization.safe_globals([XttsArgs])

# βœ… Force full checkpoint loading
def safe_load_checkpoint(model_path):
    return torch.load(model_path, map_location="cpu", weights_only=False)  # βœ… Force full deserialization

# βœ… Initialize TTS model
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name=model_name).to("cpu")  # βœ… Ensure CPU usage



def generate_cloned_voice(text, reference_audio, language):
    output_path = "output.wav"

    # Generate cloned speech with language specification
    tts.tts_to_file(text=text, speaker_wav=reference_audio, file_path=output_path, language=language)

    return output_path

print(tts.languages)

# Create the Gradio interface
interface = gr.Interface(
    fn=generate_cloned_voice,
    inputs=[
        gr.Textbox(label="Enter Translated Text"),
        gr.Audio(label="Upload Reference Audio", type="filepath"),
        gr.Dropdown(["en", "fr", "de", "es", "it"], label="Select Target Language", value="en")
    ],
    outputs=gr.Audio(label="Generated Cloned Voice"),
    title="Free Voice Cloning API",
    description="Upload a sample voice and input text. Select a language, and the system will generate the text in the same voice."
)

# Launch the Gradio app
interface.launch()