import gradio as gr
import openai
import os
from PyPDF2 import PdfReader
import json

openai.api_key = os.getenv("OPENAI_API_KEY")

def extract_text_from_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text
    return text.strip()

def summarize_pdf(file):
    try:
        text = extract_text_from_pdf(file)

        if not text:
            return "❌ Could not extract text from the PDF.", "", "", ""

        prompt = f"""
You are a helpful research assistant. Read the paper below and return a JSON object with the following 4 fields:

- "full_summary" (max 200 words)
- "eli5"
- "why_it_matters"
- "tldr" (1-line summary)

Respond **only** with a valid JSON object.

Paper:
{text}
"""

        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=1200
        )

        raw_output = response["choices"][0]["message"]["content"]

        # Try parsing JSON
        data = json.loads(raw_output)

        return (
            data.get("full_summary", "❌ Not found"),
            data.get("eli5", "❌ Not found"),
            data.get("why_it_matters", "❌ Not found"),
            data.get("tldr", "❌ Not found"),
        )

    except json.JSONDecodeError:
        return "❌ Failed to parse structured response.", "", "", ""
    except Exception as e:
        return f"❌ Error: {str(e)}", "", "", ""

# Gradio UI
demo = gr.Interface(
    fn=summarize_pdf,
    inputs=gr.File(label="Upload Research Paper PDF", file_types=[".pdf"]),
    outputs=[
        gr.Textbox(label="📘 Full Summary"),
        gr.Textbox(label="🧒 ELI5"),
        gr.Textbox(label="🎯 Why It Matters"),
        gr.Textbox(label="⚡ TL;DR")
    ],
    title="📄 The Arktika",
    description="Upload a research paper PDF and get structured summaries: Full, ELI5, Why it Matters, and TL;DR."
)

demo.launch()