import gradio as gr
import openai
from langdetect import detect
from transformers import pipeline
from keybert import KeyBERT
from fpdf import FPDF
import os

openai.api_key = os.getenv("OPENAI_API_KEY")  # Set this in HF Space secrets

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
kw_model = KeyBERT()
# Sample brand list for detection (customize as needed)
BRANDS = ["Zerodha", "Motilal", "ICICI", "HDFC", "ShareKhan", "IND Money", "Samsung", "Nike", "Adidas"]

def extract_brands(text):
    found = [brand for brand in BRANDS if brand.lower() in text.lower()]
    return found if found else ["None detected"]

def extract_topics(text, top_n=5):
    keywords = kw_model.extract_keywords(text, top_n=top_n, stop_words='english')
    topics = [kw for kw, score in keywords]
    return topics if topics else ["None extracted"]

def make_bullets(summary):
    sentences = summary.replace("\n", " ").split('. ')
    bullets = [f"- {s.strip()}" for s in sentences if s.strip()]
    return "\n".join(bullets)

def make_str(val):
    try:
        if val is None:
            return ""
        if isinstance(val, (bool, int, float)):
            return str(val)
        if isinstance(val, list):
            return "\n".join([make_str(v) for v in val])
        if isinstance(val, dict):
            return str(val)
        return str(val)
    except Exception:
        return ""

def create_pdf_report(language, transcript, transcript_en, summary, brands, topics, key_takeaways):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", "B", 16)
    pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C")
    pdf.set_font("Arial", size=12)
    pdf.ln(5)
    pdf.cell(0, 10, f"Detected Language: {language}", ln=True)
    pdf.ln(5)
    pdf.multi_cell(0, 8, "Original Transcript:\n" + transcript)
    pdf.ln(3)
    pdf.multi_cell(0, 8, "English Transcript:\n" + transcript_en)
    pdf.ln(3)
    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Brands Detected:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 8, ", ".join(brands))
    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Key Topics:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 8, ", ".join(topics))
    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Summary (Bulleted):", ln=True)
    pdf.set_font("Arial", size=12)
    for takeaway in key_takeaways.split('\n'):
        pdf.multi_cell(0, 8, takeaway)
    # Save to temporary file
    pdf_file = "/tmp/analysis_report.pdf"
    pdf.output(pdf_file)
    return pdf_file

def process_audio(audio_path):
    if not audio_path or not isinstance(audio_path, str):
        return ("No audio file provided.", "", "", "", "", "", "", None)
    try:
        with open(audio_path, "rb") as audio_file:
            transcript = openai.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="text"
            )
        transcript = make_str(transcript).strip()
    except Exception as e:
        return (make_str(f"Error in transcription: {e}"), "", "", "", "", "", "", None)
    try:
        detected_lang = detect(transcript)
        lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang)
    except Exception:
        lang_text = "unknown"
    transcript_en = transcript
    if detected_lang != "en":
        try:
            with open(audio_path, "rb") as audio_file:
                transcript_en = openai.audio.translations.create(
                    model="whisper-1",
                    file=audio_file,
                    response_format="text"
                )
            transcript_en = make_str(transcript_en).strip()
        except Exception as e:
            transcript_en = make_str(f"Error translating: {e}")
    try:
        summary_obj = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False)
        summary = summary_obj[0]["summary_text"] if isinstance(summary_obj, list) and "summary_text" in summary_obj[0] else make_str(summary_obj)
    except Exception as e:
        summary = make_str(f"Error summarizing: {e}")
    # New: Brands, topics, bullets
    brands = extract_brands(transcript_en)
    topics = extract_topics(transcript_en)
    key_takeaways = make_bullets(summary)
    # New: PDF file generation
    pdf_file = create_pdf_report(lang_text, transcript, transcript_en, summary, brands, topics, key_takeaways)
    return (
        lang_text,
        transcript,
        transcript_en,
        ", ".join(brands),
        ", ".join(topics),
        key_takeaways,
        pdf_file
    )

iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath", label="Upload MP3/WAV Audio"),
    outputs=[
        gr.Textbox(label="Detected Language"),
        gr.Textbox(label="Original Transcript"),
        gr.Textbox(label="English Transcript (if translated)"),
        gr.Textbox(label="Brands Detected"),
        gr.Textbox(label="Key Topics"),
        gr.Textbox(label="Bulleted Key Takeaways"),
        gr.File(label="Download PDF Report")
    ],
    title="Audio Transcript, Brand & Topic Analysis (OpenAI Whisper + PDF Download)",
    description="Upload your audio file (MP3/WAV). Get full transcript, summary, brand and topic detection, and download results as PDF."
)

iface.launch()