# =============================================================
# Hugging Face Space – Lecture → Podcast Generator (User-selectable Languages)
# =============================================================
# • **Text generation** – SmolAgents `HfApiModel` (Qwen/Qwen2.5-Coder-32B-Instruct)
# • **Speech synthesis** – `InferenceClient.text_to_speech`, chunk-safe
#   (MMS-TTS for en/bn/ur/ne, mms-TTS-zho for zh). Long texts are split
#   into ≤280-char chunks to stay within HF endpoint limits.
# -----------------------------------------------------------------

import os
import re
import tempfile
import textwrap
from pathlib import Path
from typing import List, Dict, Optional, Any # Added Any

import gradio as gr
from huggingface_hub import InferenceClient # Added HubHTTPError explicitly
from PyPDF2 import PdfReader # For PDF processing
from smolagents import HfApiModel # For LLM interaction
from pydub import AudioSegment # Added for robust audio concatenation
from pydub.exceptions import CouldntDecodeError # Specific pydub error

# ------------------------------------------------------------------
# LLM setup – remote Qwen model via SmolAgents
# ------------------------------------------------------------------
llm = HfApiModel(
    model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
    max_tokens=2048, # Max tokens for the generated output dialogue
    temperature=0.5,
)

# ------------------------------------------------------------------
# Hugging Face Inference API client (uses HF_TOKEN secret if provided)
# ------------------------------------------------------------------
client = InferenceClient(token=os.getenv("HF_TOKEN", None))

# ------------------------------------------------------------------
# Language metadata and corresponding open TTS model IDs
# ------------------------------------------------------------------
LANG_INFO: Dict[str, Dict[str, str]] = {
    "en": {"name": "English", "tts_model": "facebook/mms-tts-eng"},
    "bn": {"name": "Bangla",  "tts_model": "facebook/mms-tts-ben"},
    "zh": {"name": "Chinese", "tts_model": "facebook/mms-tts-zho"},
    "ur": {"name": "Urdu",    "tts_model": "facebook/mms-tts-urd"},
    "ne": {"name": "Nepali",  "tts_model": "facebook/mms-tts-npi"},
}
# For reverse lookup: language name to language code
LANG_CODE_BY_NAME = {info["name"]: code for code, info in LANG_INFO.items()}

# ------------------------------------------------------------------
# Prompt template (target ~300 words for LLM output)
# ------------------------------------------------------------------
PROMPT_TEMPLATE = textwrap.dedent(
    """
    You are producing a lively two-host educational podcast in {lang_name}.
    Summarize the following lecture content into a dialogue of **approximately 300 words**.
    Make it engaging: hosts ask questions, clarify ideas with analogies, and
    wrap up with a concise recap. Preserve technical accuracy. Use Markdown for host names (e.g., **Host 1:**).

    ### Lecture Content
    {content}
    """
)

# PDF helpers -------------------------------------------------------

def extract_pdf_text(pdf_path: str) -> str:
    try:
        reader = PdfReader(pdf_path)
        return "\n".join(page.extract_text() or "" for page in reader.pages)
    except Exception as e:
        raise gr.Error(f"Failed to process PDF: {e}")

TOKEN_LIMIT = 8000

def truncate_text(text: str, limit: int = TOKEN_LIMIT) -> str:
    words = text.split()
    if len(words) > limit:
        gr.Warning(f"Input text was truncated from {len(words)} to {limit} words to fit LLM context window.")
        return " ".join(words[:limit])
    return text

# ------------------------------------------------------------------
# TTS helper – chunk long text safely (HF endpoint limit ~30s / 200-300 chars)
# ------------------------------------------------------------------
CHUNK_CHAR_LIMIT = 280

def _split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
    sentences_raw = re.split(r"(?<=[.!?])\s+", text.strip())
    sentences = [s.strip() for s in sentences_raw if s.strip()]
    if not sentences: return []
    chunks, current_chunk = [], ""
    for sent in sentences:
        if current_chunk and (len(current_chunk) + len(sent) + 1 > limit):
            chunks.append(current_chunk)
            current_chunk = sent
        else:
            current_chunk += (" " + sent) if current_chunk else sent
    if current_chunk: chunks.append(current_chunk)
    return [chunk for chunk in chunks if chunk.strip()]

def synthesize_speech(text: str, model_id: str, lang_tmpdir: Path) -> Path:
    chunks = _split_to_chunks(text)
    if not chunks: raise ValueError("Text resulted in no speakable chunks after splitting.")
    audio_segments: List[AudioSegment] = []
    for idx, chunk in enumerate(chunks):
        gr.Info(f"Synthesizing audio for chunk {idx + 1}/{len(chunks)}...")
        try:
            audio_bytes = client.text_to_speech(chunk, model=model_id)
        except HubHTTPError as e:
            error_message = f"TTS request failed for chunk {idx+1}/{len(chunks)} ('{chunk[:30]}...'): {e}"
            if "Input validation error: `inputs` must be non-empty" in str(e) and not chunk.strip():
                gr.Warning(f"Skipping an apparently empty chunk for TTS: Chunk {idx+1}")
                continue
            raise RuntimeError(error_message) from e
        part_path = lang_tmpdir / f"part_{idx}.flac"
        part_path.write_bytes(audio_bytes)
        try:
            segment = AudioSegment.from_file(part_path, format="flac")
            audio_segments.append(segment)
        except CouldntDecodeError as e:
            raise RuntimeError(f"Failed to decode audio chunk {idx+1} from {part_path}. TTS Error: {e}") from e
    if not audio_segments: raise RuntimeError("No audio segments were successfully synthesized or decoded.")
    combined_audio = sum(audio_segments, AudioSegment.empty())
    final_path = lang_tmpdir / "podcast_audio.flac" # Renamed for clarity
    combined_audio.export(final_path, format="flac")
    return final_path

# ------------------------------------------------------------------
# Main pipeline function for Gradio
# ------------------------------------------------------------------

def generate_podcast(pdf_file_obj: Optional[gr.File], selected_lang_names: List[str]) -> List[Optional[Any]]:
    if not pdf_file_obj:
        raise gr.Error("Please upload a PDF file.")
    if not selected_lang_names:
        raise gr.Error("Please select at least one language for the podcast.")

    selected_codes = [LANG_CODE_BY_NAME[name] for name in selected_lang_names]
    
    # Initialize results data structure for all languages
    # Each language will have a dict for audio, script_text (for display), and script_file (for download)
    results_data: Dict[str, Dict[str, Optional[str]]] = {
        code: {"audio": None, "script_text": None, "script_file": None}
        for code in LANG_INFO.keys()
    }

    try:
        with tempfile.TemporaryDirectory() as td:
            tmpdir_base = Path(td)
            
            gr.Info("Extracting text from PDF...")
            lecture_raw = extract_pdf_text(pdf_file_obj.name)
            lecture_text = truncate_text(lecture_raw)

            if not lecture_text.strip():
                raise gr.Error("Could not extract any text from the PDF, or the PDF content is empty.")

            for code in selected_codes: # Iterate only through user-selected languages
                info = LANG_INFO[code]
                lang_name = info["name"]
                tts_model = info["tts_model"]
                
                gr.Info(f"Processing for {lang_name}...")
                lang_tmpdir = tmpdir_base / code
                lang_tmpdir.mkdir(parents=True, exist_ok=True)
                
                dialogue: Optional[str] = None # Initialize dialogue for the current language scope
                
                # 1️⃣ Generate dialogue using LLM
                gr.Info(f"Generating dialogue for {lang_name}...")
                prompt = PROMPT_TEMPLATE.format(lang_name=lang_name, content=lecture_text)
                try:
                    dialogue_raw: str = llm(prompt)
                    if not dialogue_raw or not dialogue_raw.strip():
                        gr.Warning(f"LLM returned empty dialogue for {lang_name}. Skipping this language.")
                        continue # Skip to the next selected language; results_data[code] remains all None
                    
                    dialogue = dialogue_raw # Keep the generated dialogue

                    # Store script text and save script to a file
                    results_data[code]["script_text"] = dialogue
                    script_file_path = lang_tmpdir / f"podcast_script_{code}.txt"
                    script_file_path.write_text(dialogue, encoding="utf-8")
                    results_data[code]["script_file"] = str(script_file_path)

                except Exception as e:
                    gr.Error(f"Error generating dialogue for {lang_name}: {e}")
                    # If dialogue generation fails, all parts for this lang remain None or partially filled
                    # The continue ensures we don't try TTS if dialogue failed
                    continue 

                # 2️⃣ Synthesize speech (only if dialogue was successfully generated)
                if dialogue: # Ensure dialogue is not None here
                    gr.Info(f"Synthesizing speech for {lang_name}...")
                    try:
                        tts_path = synthesize_speech(dialogue, tts_model, lang_tmpdir)
                        results_data[code]["audio"] = str(tts_path)
                    except ValueError as e:
                        gr.Warning(f"Could not synthesize speech for {lang_name} (ValueError): {e}")
                        # Audio remains None for this language
                    except RuntimeError as e:
                        gr.Error(f"Error synthesizing speech for {lang_name} (RuntimeError): {e}")
                        # Audio remains None
                    except Exception as e:
                        gr.Error(f"Unexpected error during speech synthesis for {lang_name}: {e}")
                        # Audio remains None
        
        # Convert the results_data (dict of dicts) to an ordered flat list for Gradio outputs
        final_ordered_results: List[Optional[Any]] = []
        for code_key in LANG_INFO.keys(): # Iterate in the defined order of LANG_INFO
            lang_output_data = results_data[code_key]
            final_ordered_results.append(lang_output_data["audio"])
            final_ordered_results.append(lang_output_data["script_text"])
            final_ordered_results.append(lang_output_data["script_file"])
        
        gr.Info("Podcast generation complete!")
        return final_ordered_results

    except gr.Error as e:
        raise e
    except Exception as e:
        import traceback
        print("An unexpected error occurred in generate_podcast:")
        traceback.print_exc()
        raise gr.Error(f"An unexpected server error occurred. Details: {str(e)[:100]}...")

# ------------------------------------------------------------------
# Gradio Interface Setup
# ------------------------------------------------------------------
language_names_ordered = [LANG_INFO[code]["name"] for code in LANG_INFO.keys()]

inputs = [
    gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
    gr.CheckboxGroup(
        choices=language_names_ordered,
        value=["English"],
        label="Select podcast language(s) to generate",
    ),
]

# Create output components: Audio, Script Display (Markdown), Script Download (File) for each language
outputs = []
for code in LANG_INFO.keys(): # Iterate in the consistent order of LANG_INFO
    info = LANG_INFO[code]
    lang_name = info["name"]
    outputs.append(gr.Audio(label=f"{lang_name} Podcast", type="filepath"))
    outputs.append(gr.Markdown(label=f"{lang_name} Script")) # Display script as Markdown
    outputs.append(gr.File(label=f"Download {lang_name} Script (.txt)", type="filepath")) # Download script

iface = gr.Interface(
    fn=generate_podcast,
    inputs=inputs,
    outputs=outputs,
    title="Lecture → Podcast & Script Generator (Multi-Language)",
    description=(
        "Upload a lecture PDF, choose language(s), and receive an audio podcast "
        "and its script for each selected language. Dialogue by Qwen-32B, "
        "speech by MMS-TTS. Scripts are viewable and downloadable."
    ),
    allow_flagging="never",
)

if __name__ == "__main__":
    iface.launch()