Spaces:

Shuja1401
/

paper-news-summarizer

Sleeping

App Files Files Community

paper-news-summarizer / app.py

Shuja1401

Update app.py

c9f734b verified 6 days ago

raw

history blame contribute delete

2.06 kB

	import gradio as gr
	import openai
	import os
	from PyPDF2 import PdfReader
	import json

	openai.api_key = os.getenv("OPENAI_API_KEY")

	def extract_text_from_pdf(file):
	reader = PdfReader(file)
	text = ""
	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	return text.strip()

	def summarize_pdf(file):
	try:
	text = extract_text_from_pdf(file)

	if not text:
	return "❌ Could not extract text from the PDF.", "", "", ""

	prompt = f"""
	You are a helpful research assistant. Read the paper below and return a JSON object with the following 4 fields:

	- "full_summary" (max 200 words)
	- "eli5"
	- "why_it_matters"
	- "tldr" (1-line summary)

	Respond only with a valid JSON object.

	Paper:
	{text}
	"""

	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.7,
	max_tokens=1200
	)

	raw_output = response["choices"][0]["message"]["content"]

	# Try parsing JSON
	data = json.loads(raw_output)

	return (
	data.get("full_summary", "❌ Not found"),
	data.get("eli5", "❌ Not found"),
	data.get("why_it_matters", "❌ Not found"),
	data.get("tldr", "❌ Not found"),
	)

	except json.JSONDecodeError:
	return "❌ Failed to parse structured response.", "", "", ""
	except Exception as e:
	return f"❌ Error: {str(e)}", "", "", ""

	# Gradio UI
	demo = gr.Interface(
	fn=summarize_pdf,
	inputs=gr.File(label="Upload Research Paper PDF", file_types=[".pdf"]),
	outputs=[
	gr.Textbox(label="📘 Full Summary"),
	gr.Textbox(label="🧒 ELI5"),
	gr.Textbox(label="🎯 Why It Matters"),
	gr.Textbox(label="⚡ TL;DR")
	],
	title="📄 The Arktika",
	description="Upload a research paper PDF and get structured summaries: Full, ELI5, Why it Matters, and TL;DR."
	)

	demo.launch()