import os
import torch
import gradio as gr
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs
 # ✅ Import missing class
from TTS.api import TTS

# ✅ Accept Coqui License Automatically
os.environ["COQUI_TOS_AGREED"] = "1"

# ✅ Allow missing classes for safe deserialization
torch.serialization.add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig])
torch.serialization.safe_globals([XttsArgs])

# ✅ Force full checkpoint loading
def safe_load_checkpoint(model_path):
    return torch.load(model_path, map_location="cpu", weights_only=False)  # ✅ Force full deserialization

# ✅ Initialize TTS model
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name=model_name).to("cpu")  # ✅ Ensure CPU usage


def generate_cloned_voice(text, reference_audio, language):
    output_path = "output.wav"

    # Generate cloned speech with language specification
    tts.tts_to_file(text=text, speaker_wav=reference_audio, file_path=output_path, language=language)

    return output_path

print(tts.languages)

# Create the Gradio interface
interface = gr.Interface(
    fn=generate_cloned_voice,
    inputs=[
        gr.Textbox(label="Enter Translated Text"),
        gr.Audio(label="Upload Reference Audio", type="filepath"),
        gr.Dropdown(["en", "fr", "de", "es", "it"], label="Select Target Language", value="en")
    ],
    outputs=gr.Audio(label="Generated Cloned Voice"),
    title="Free Voice Cloning API",
    description="Upload a sample voice and input text. Select a language, and the system will generate the text in the same voice."
)

# Launch the Gradio app
interface.launch()