Shuja1401's picture
Update app.py
c9f734b verified
import gradio as gr
import openai
import os
from PyPDF2 import PdfReader
import json
openai.api_key = os.getenv("OPENAI_API_KEY")
def extract_text_from_pdf(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text.strip()
def summarize_pdf(file):
try:
text = extract_text_from_pdf(file)
if not text:
return "❌ Could not extract text from the PDF.", "", "", ""
prompt = f"""
You are a helpful research assistant. Read the paper below and return a JSON object with the following 4 fields:
- "full_summary" (max 200 words)
- "eli5"
- "why_it_matters"
- "tldr" (1-line summary)
Respond **only** with a valid JSON object.
Paper:
{text}
"""
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=1200
)
raw_output = response["choices"][0]["message"]["content"]
# Try parsing JSON
data = json.loads(raw_output)
return (
data.get("full_summary", "❌ Not found"),
data.get("eli5", "❌ Not found"),
data.get("why_it_matters", "❌ Not found"),
data.get("tldr", "❌ Not found"),
)
except json.JSONDecodeError:
return "❌ Failed to parse structured response.", "", "", ""
except Exception as e:
return f"❌ Error: {str(e)}", "", "", ""
# Gradio UI
demo = gr.Interface(
fn=summarize_pdf,
inputs=gr.File(label="Upload Research Paper PDF", file_types=[".pdf"]),
outputs=[
gr.Textbox(label="πŸ“˜ Full Summary"),
gr.Textbox(label="πŸ§’ ELI5"),
gr.Textbox(label="🎯 Why It Matters"),
gr.Textbox(label="⚑ TL;DR")
],
title="πŸ“„ The Arktika",
description="Upload a research paper PDF and get structured summaries: Full, ELI5, Why it Matters, and TL;DR."
)
demo.launch()