Spaces:
Sleeping
Sleeping
import gradio as gr | |
import openai | |
import os | |
from PyPDF2 import PdfReader | |
import json | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
def extract_text_from_pdf(file): | |
reader = PdfReader(file) | |
text = "" | |
for page in reader.pages: | |
page_text = page.extract_text() | |
if page_text: | |
text += page_text | |
return text.strip() | |
def summarize_pdf(file): | |
try: | |
text = extract_text_from_pdf(file) | |
if not text: | |
return "β Could not extract text from the PDF.", "", "", "" | |
prompt = f""" | |
You are a helpful research assistant. Read the paper below and return a JSON object with the following 4 fields: | |
- "full_summary" (max 200 words) | |
- "eli5" | |
- "why_it_matters" | |
- "tldr" (1-line summary) | |
Respond **only** with a valid JSON object. | |
Paper: | |
{text} | |
""" | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.7, | |
max_tokens=1200 | |
) | |
raw_output = response["choices"][0]["message"]["content"] | |
# Try parsing JSON | |
data = json.loads(raw_output) | |
return ( | |
data.get("full_summary", "β Not found"), | |
data.get("eli5", "β Not found"), | |
data.get("why_it_matters", "β Not found"), | |
data.get("tldr", "β Not found"), | |
) | |
except json.JSONDecodeError: | |
return "β Failed to parse structured response.", "", "", "" | |
except Exception as e: | |
return f"β Error: {str(e)}", "", "", "" | |
# Gradio UI | |
demo = gr.Interface( | |
fn=summarize_pdf, | |
inputs=gr.File(label="Upload Research Paper PDF", file_types=[".pdf"]), | |
outputs=[ | |
gr.Textbox(label="π Full Summary"), | |
gr.Textbox(label="π§ ELI5"), | |
gr.Textbox(label="π― Why It Matters"), | |
gr.Textbox(label="β‘ TL;DR") | |
], | |
title="π The Arktika", | |
description="Upload a research paper PDF and get structured summaries: Full, ELI5, Why it Matters, and TL;DR." | |
) | |
demo.launch() | |