File size: 2,575 Bytes
d2342f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
import PyPDF2
import tempfile
from transformers import pipeline

# Step 1: Summarizer class using HuggingFace directly
class TextSummarizer:
    def __init__(self):
        self.summarizer = pipeline(
            "summarization",
            model="facebook/bart-large-cnn"
        )

    def summarize_text(self, article_text, max_length=150, min_length=30):
        # Truncate very long inputs
        article_text = article_text.strip()
        if len(article_text) > 1024:
            article_text = article_text[:1024]

        summary = self.summarizer(
            article_text,
            max_length=max_length,
            min_length=min_length,
            do_sample=False
        )
        return summary[0]['summary_text'] if summary else "No summary generated."

# Step 2: PDF text extraction
def pdf_to_text(pdf_file):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp:
            tmp.write(pdf_file)
            tmp.flush()
            reader = PyPDF2.PdfReader(tmp.name)
            text = "\n".join(page.extract_text() or "" for page in reader.pages)
            return text.strip() if text.strip() else "No extractable text found in the PDF."
    except Exception as e:
        return f"Error reading PDF: {str(e)}"

# Step 3: Summarization function for Gradio
summarizer = TextSummarizer()

def summarize_input(text, max_words):
    if not text.strip():
        return "Please enter or extract some text first."
    try:
        max_length = int(max_words)
        min_length = max(30, max_length // 4)
        return summarizer.summarize_text(text, max_length=max_length, min_length=min_length)
    except Exception as e:
        return f"Error during summarization: {str(e)}"

# Step 4: Gradio UI setup
with gr.Blocks() as demo:
    gr.Markdown("## πŸ“ Text & PDF Summarizer")

    with gr.Row():
        text_input = gr.Textbox(label="Enter text to summarize", lines=15, placeholder="Paste your text here...")
        pdf_file = gr.File(label="Or upload a PDF", file_types=[".pdf"], type="binary")

    max_words = gr.Number(label="Max summary word count", value=150, precision=0)

    with gr.Row():
        convert_btn = gr.Button("Convert PDF to Text")
        summarize_btn = gr.Button("Summarize Text")

    output_text = gr.Textbox(label="Summary", lines=10)

    convert_btn.click(fn=pdf_to_text, inputs=pdf_file, outputs=text_input)
    summarize_btn.click(fn=summarize_input, inputs=[text_input, max_words], outputs=output_text)

# Step 5: Launch the app
if __name__ == "__main__":
    demo.launch()