rahimizadeh commited on
Commit
d2342f2
·
verified ·
1 Parent(s): b9e5c01

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import PyPDF2
3
+ import tempfile
4
+ from transformers import pipeline
5
+
6
+ # Step 1: Summarizer class using HuggingFace directly
7
+ class TextSummarizer:
8
+ def __init__(self):
9
+ self.summarizer = pipeline(
10
+ "summarization",
11
+ model="facebook/bart-large-cnn"
12
+ )
13
+
14
+ def summarize_text(self, article_text, max_length=150, min_length=30):
15
+ # Truncate very long inputs
16
+ article_text = article_text.strip()
17
+ if len(article_text) > 1024:
18
+ article_text = article_text[:1024]
19
+
20
+ summary = self.summarizer(
21
+ article_text,
22
+ max_length=max_length,
23
+ min_length=min_length,
24
+ do_sample=False
25
+ )
26
+ return summary[0]['summary_text'] if summary else "No summary generated."
27
+
28
+ # Step 2: PDF text extraction
29
+ def pdf_to_text(pdf_file):
30
+ try:
31
+ with tempfile.NamedTemporaryFile(delete=False) as tmp:
32
+ tmp.write(pdf_file)
33
+ tmp.flush()
34
+ reader = PyPDF2.PdfReader(tmp.name)
35
+ text = "\n".join(page.extract_text() or "" for page in reader.pages)
36
+ return text.strip() if text.strip() else "No extractable text found in the PDF."
37
+ except Exception as e:
38
+ return f"Error reading PDF: {str(e)}"
39
+
40
+ # Step 3: Summarization function for Gradio
41
+ summarizer = TextSummarizer()
42
+
43
+ def summarize_input(text, max_words):
44
+ if not text.strip():
45
+ return "Please enter or extract some text first."
46
+ try:
47
+ max_length = int(max_words)
48
+ min_length = max(30, max_length // 4)
49
+ return summarizer.summarize_text(text, max_length=max_length, min_length=min_length)
50
+ except Exception as e:
51
+ return f"Error during summarization: {str(e)}"
52
+
53
+ # Step 4: Gradio UI setup
54
+ with gr.Blocks() as demo:
55
+ gr.Markdown("## 📝 Text & PDF Summarizer")
56
+
57
+ with gr.Row():
58
+ text_input = gr.Textbox(label="Enter text to summarize", lines=15, placeholder="Paste your text here...")
59
+ pdf_file = gr.File(label="Or upload a PDF", file_types=[".pdf"], type="binary")
60
+
61
+ max_words = gr.Number(label="Max summary word count", value=150, precision=0)
62
+
63
+ with gr.Row():
64
+ convert_btn = gr.Button("Convert PDF to Text")
65
+ summarize_btn = gr.Button("Summarize Text")
66
+
67
+ output_text = gr.Textbox(label="Summary", lines=10)
68
+
69
+ convert_btn.click(fn=pdf_to_text, inputs=pdf_file, outputs=text_input)
70
+ summarize_btn.click(fn=summarize_input, inputs=[text_input, max_words], outputs=output_text)
71
+
72
+ # Step 5: Launch the app
73
+ if __name__ == "__main__":
74
+ demo.launch()