Spaces:
Sleeping
Sleeping
import gradio as gr | |
import PyPDF2 | |
import tempfile | |
from transformers import pipeline | |
# Step 1: Summarizer class using HuggingFace directly | |
class TextSummarizer: | |
def __init__(self): | |
self.summarizer = pipeline( | |
"summarization", | |
model="facebook/bart-large-cnn" | |
) | |
def summarize_text(self, article_text, max_length=150, min_length=30): | |
# Truncate very long inputs | |
article_text = article_text.strip() | |
if len(article_text) > 1024: | |
article_text = article_text[:1024] | |
summary = self.summarizer( | |
article_text, | |
max_length=max_length, | |
min_length=min_length, | |
do_sample=False | |
) | |
return summary[0]['summary_text'] if summary else "No summary generated." | |
# Step 2: PDF text extraction | |
def pdf_to_text(pdf_file): | |
try: | |
with tempfile.NamedTemporaryFile(delete=False) as tmp: | |
tmp.write(pdf_file) | |
tmp.flush() | |
reader = PyPDF2.PdfReader(tmp.name) | |
text = "\n".join(page.extract_text() or "" for page in reader.pages) | |
return text.strip() if text.strip() else "No extractable text found in the PDF." | |
except Exception as e: | |
return f"Error reading PDF: {str(e)}" | |
# Step 3: Summarization function for Gradio | |
summarizer = TextSummarizer() | |
def summarize_input(text, max_words): | |
if not text.strip(): | |
return "Please enter or extract some text first." | |
try: | |
max_length = int(max_words) | |
min_length = max(30, max_length // 4) | |
return summarizer.summarize_text(text, max_length=max_length, min_length=min_length) | |
except Exception as e: | |
return f"Error during summarization: {str(e)}" | |
# Step 4: Gradio UI setup | |
with gr.Blocks() as demo: | |
gr.Markdown("## π Text & PDF Summarizer") | |
with gr.Row(): | |
text_input = gr.Textbox(label="Enter text to summarize", lines=15, placeholder="Paste your text here...") | |
pdf_file = gr.File(label="Or upload a PDF", file_types=[".pdf"], type="binary") | |
max_words = gr.Number(label="Max summary word count", value=150, precision=0) | |
with gr.Row(): | |
convert_btn = gr.Button("Convert PDF to Text") | |
summarize_btn = gr.Button("Summarize Text") | |
output_text = gr.Textbox(label="Summary", lines=10) | |
convert_btn.click(fn=pdf_to_text, inputs=pdf_file, outputs=text_input) | |
summarize_btn.click(fn=summarize_input, inputs=[text_input, max_words], outputs=output_text) | |
# Step 5: Launch the app | |
if __name__ == "__main__": | |
demo.launch() | |