# =============================================================
# Hugging Face Space – Lecture → Multilingual Podcast Generator
# =============================================================
# * Text generation: SmolAgents HfApiModel (Qwen/Qwen2.5‑Coder‑32B)
# * Speech synthesis: **Coqui XTTS‑v2** open model via the TTS lib
#   (no private / gated repo, so it runs without a HF token).
# * Outputs five WAV files: English, Bangla, Chinese, Urdu, Nepali.
# -----------------------------------------------------------------

import os
import tempfile
import uuid
import textwrap
from typing import List, Dict

import gradio as gr
from PyPDF2 import PdfReader
from smolagents import HfApiModel
from TTS.api import TTS  # ↳ Coqui TTS

# ------------------------------------------------------------------
# LLM configuration (SmolAgents wrapper for HF Inference API)
# ------------------------------------------------------------------
llm = HfApiModel(
    model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
    max_tokens=2096,
    temperature=0.5,
    custom_role_conversions=None,
)

# ------------------------------------------------------------------
# XTTS‑v2 multilingual text‑to‑speech (≈ 1.2 GB, CPU OK)
# ------------------------------------------------------------------
TTS_MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"

tts = TTS(model_name=TTS_MODEL_NAME, progress_bar=False)
# Automatically downloads and caches the model on first run.

LANG_INFO: Dict[str, Dict[str, str]] = {
    "en": {"name": "English"},
    "bn": {"name": "Bangla"},
    "zh": {"name": "Chinese"},
    "ur": {"name": "Urdu"},
    "ne": {"name": "Nepali"},
}

PROMPT_TEMPLATE = textwrap.dedent(
    """
    You are producing a lively two‑host educational podcast in {lang_name}.
    Summarize the following lecture content into a dialogue of about 1200 words.
    Use an engaging style: hosts ask each other questions, clarify ideas, add
    simple analogies, and conclude with a short recap. Keep technical accuracy.

    ### Lecture Content
    {content}
    """
)

# ------------------------------------------------------------------
# Utility: extract & truncate PDF text to fit the LLM token budget
# ------------------------------------------------------------------

def extract_pdf_text(pdf_file) -> str:
    reader = PdfReader(pdf_file)
    return "\n".join(p.extract_text() or "" for p in reader.pages)

TOKEN_LIMIT = 6000  # ≈ tokens (safe margin for prompt + response)

def truncate_text(text: str, limit: int = TOKEN_LIMIT) -> str:
    words = text.split()
    return " ".join(words[:limit])

# ------------------------------------------------------------------
# Main generation routine
# ------------------------------------------------------------------

def generate_podcast(pdf: gr.File) -> List[gr.Audio]:
    with tempfile.TemporaryDirectory() as tmpdir:
        lecture_text = truncate_text(extract_pdf_text(pdf.name))
        audio_outputs = []

        for lang_code, info in LANG_INFO.items():
            # 1️⃣  Create prompt + generate dialogue
            prompt = PROMPT_TEMPLATE.format(lang_name=info["name"], content=lecture_text)
            dialogue = llm(prompt)

            # 2️⃣  Save raw dialogue text (for reference)
            txt_path = os.path.join(tmpdir, f"podcast_{lang_code}.txt")
            with open(txt_path, "w", encoding="utf-8") as f:
                f.write(dialogue)

            # 3️⃣  Synthesise speech with XTTS‑v2
            wav_path = os.path.join(tmpdir, f"podcast_{lang_code}.wav")
            # ► xtts_v2 accepts ISO‑639‑1 language codes directly
            tts.tts_to_file(text=dialogue, language=lang_code, file_path=wav_path)

            audio_outputs.append((wav_path, None))  # (file, label) for Gradio Audio

        return audio_outputs

# ------------------------------------------------------------------
# Gradio UI
# ------------------------------------------------------------------

audio_components = [
    gr.Audio(label=f"{info['name']} Podcast", type="filepath") for info in LANG_INFO.values()
]

iface = gr.Interface(
    fn=generate_podcast,
    inputs=gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
    outputs=audio_components,
    title="Lecture → Multilingual Podcast Generator",
    description=(
        "Upload a lecture PDF and receive a two‑host audio podcast in English, "
        "Bangla, Chinese, Urdu, and Nepali. Generation uses Qwen‑32B for the "
        "dialogue and Coqui XTTS‑v2 for speech synthesis — no private repos "
        "or API keys needed."
    ),
)

if __name__ == "__main__":
    iface.launch()