File size: 2,063 Bytes
e900ee4
 
 
2e036a8
ab126fb
e900ee4
 
 
2e036a8
 
 
 
 
 
 
 
 
e900ee4
 
2e036a8
 
 
 
 
ab126fb
 
 
 
 
 
 
 
 
2e036a8
 
 
 
 
e900ee4
 
2e036a8
e900ee4
2e036a8
e900ee4
 
ab126fb
2e036a8
ab126fb
 
2e036a8
 
ab126fb
 
 
 
2e036a8
e900ee4
ab126fb
 
e900ee4
2e036a8
e900ee4
 
 
 
 
2e036a8
 
 
 
 
 
c9f734b
ab126fb
e900ee4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
import openai
import os
from PyPDF2 import PdfReader
import json

openai.api_key = os.getenv("OPENAI_API_KEY")

def extract_text_from_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text
    return text.strip()

def summarize_pdf(file):
    try:
        text = extract_text_from_pdf(file)

        if not text:
            return "❌ Could not extract text from the PDF.", "", "", ""

        prompt = f"""
You are a helpful research assistant. Read the paper below and return a JSON object with the following 4 fields:

- "full_summary" (max 200 words)
- "eli5"
- "why_it_matters"
- "tldr" (1-line summary)

Respond **only** with a valid JSON object.

Paper:
{text}
"""

        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=1200
        )

        raw_output = response["choices"][0]["message"]["content"]

        # Try parsing JSON
        data = json.loads(raw_output)

        return (
            data.get("full_summary", "❌ Not found"),
            data.get("eli5", "❌ Not found"),
            data.get("why_it_matters", "❌ Not found"),
            data.get("tldr", "❌ Not found"),
        )

    except json.JSONDecodeError:
        return "❌ Failed to parse structured response.", "", "", ""
    except Exception as e:
        return f"❌ Error: {str(e)}", "", "", ""

# Gradio UI
demo = gr.Interface(
    fn=summarize_pdf,
    inputs=gr.File(label="Upload Research Paper PDF", file_types=[".pdf"]),
    outputs=[
        gr.Textbox(label="πŸ“˜ Full Summary"),
        gr.Textbox(label="πŸ§’ ELI5"),
        gr.Textbox(label="🎯 Why It Matters"),
        gr.Textbox(label="⚑ TL;DR")
    ],
    title="πŸ“„ The Arktika",
    description="Upload a research paper PDF and get structured summaries: Full, ELI5, Why it Matters, and TL;DR."
)

demo.launch()