import gradio as gr import openai import os from PyPDF2 import PdfReader import json openai.api_key = os.getenv("OPENAI_API_KEY") def extract_text_from_pdf(file): reader = PdfReader(file) text = "" for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text return text.strip() def summarize_pdf(file): try: text = extract_text_from_pdf(file) if not text: return "❌ Could not extract text from the PDF.", "", "", "" prompt = f""" You are a helpful research assistant. Read the paper below and return a JSON object with the following 4 fields: - "full_summary" (max 200 words) - "eli5" - "why_it_matters" - "tldr" (1-line summary) Respond **only** with a valid JSON object. Paper: {text} """ response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}], temperature=0.7, max_tokens=1200 ) raw_output = response["choices"][0]["message"]["content"] # Try parsing JSON data = json.loads(raw_output) return ( data.get("full_summary", "❌ Not found"), data.get("eli5", "❌ Not found"), data.get("why_it_matters", "❌ Not found"), data.get("tldr", "❌ Not found"), ) except json.JSONDecodeError: return "❌ Failed to parse structured response.", "", "", "" except Exception as e: return f"❌ Error: {str(e)}", "", "", "" # Gradio UI demo = gr.Interface( fn=summarize_pdf, inputs=gr.File(label="Upload Research Paper PDF", file_types=[".pdf"]), outputs=[ gr.Textbox(label="📘 Full Summary"), gr.Textbox(label="🧒 ELI5"), gr.Textbox(label="🎯 Why It Matters"), gr.Textbox(label="⚡ TL;DR") ], title="📄 The Arktika", description="Upload a research paper PDF and get structured summaries: Full, ELI5, Why it Matters, and TL;DR." ) demo.launch()