rahimizadeh commited on
Commit
2e6f8b0
·
verified ·
1 Parent(s): 88390ff

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. UI.JPG +3 -0
  3. app.py +106 -0
  4. requirements.txt +6 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ UI.JPG filter=lfs diff=lfs merge=lfs -text
UI.JPG ADDED

Git LFS Details

  • SHA256: 6cae29af48d943d5c1fd67e016c1f36a198f944d46defeb0aa5677ff84ef41b0
  • Pointer size: 131 Bytes
  • Size of remote file: 102 kB
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Running on local URL: http://127.0.0.1:7860
2
+
3
+ import gradio as gr # Gradio: for creating web-based user interfaces
4
+ import PyPDF2 # PyPDF2: for reading PDF files
5
+ import tempfile # tempfile: to safely handle temporary files
6
+ from langchain.prompts import PromptTemplate # LangChain: for managing prompt templates
7
+ from langchain_huggingface.llms import HuggingFacePipeline # LangChain integration with HuggingFace models
8
+
9
+ # Define a summarization class
10
+ class TextSummarizer:
11
+ def __init__(self):
12
+ # Define the model to use for summarization
13
+ self.model_id = "facebook/bart-large-cnn"
14
+
15
+ def summarize_text(self, article_text, max_length=150, min_length=30):
16
+ # Load a summarization pipeline with custom length settings
17
+ llm = HuggingFacePipeline.from_model_id(
18
+ model_id=self.model_id,
19
+ task="summarization",
20
+ pipeline_kwargs={
21
+ "max_length": max_length,
22
+ "min_length": min_length,
23
+ "do_sample": False # Deterministic output
24
+ }
25
+ )
26
+
27
+ # Create a basic prompt template that just passes the text
28
+ prompt = PromptTemplate(input_variables=["document"], template="""{document}""")
29
+
30
+ # Format the article text into the prompt
31
+ prompt_input = prompt.format(document=article_text)
32
+
33
+ # Generate the summary using the model
34
+ summary = llm.__call__(prompt_input)
35
+
36
+ # If the model returns a list of summaries, extract the actual summary text
37
+ if isinstance(summary, list):
38
+ return summary[0]['summary_text'] if 'summary_text' in summary[0] else str(summary[0])
39
+ return str(summary) # Fallback for other formats
40
+
41
+ # Function to extract text from an uploaded PDF
42
+ def pdf_to_text(pdf_file):
43
+ try:
44
+ # Create a temporary file to write the uploaded PDF bytes
45
+ with tempfile.NamedTemporaryFile(delete=False) as tmp:
46
+ tmp.write(pdf_file) # Write raw bytes directly
47
+ tmp.flush() # Make sure data is written to disk
48
+
49
+ # Use PyPDF2 to read and extract text
50
+ reader = PyPDF2.PdfReader(tmp.name)
51
+ text = "\n".join(page.extract_text() or "" for page in reader.pages)
52
+
53
+ # Return cleaned-up text or a message if extraction fails
54
+ return text.strip() if text.strip() else "No extractable text found in the PDF."
55
+ except Exception as e:
56
+ return f"Error reading PDF: {str(e)}" # Return readable error message
57
+
58
+ # Instantiate the summarizer class
59
+ summarizer = TextSummarizer()
60
+
61
+ # Summarize input with user-defined maximum length
62
+ def summarize_input(text, max_words):
63
+ if not text.strip():
64
+ return "Please enter or extract some text first."
65
+
66
+ try:
67
+ # Convert max_words input to integer
68
+ max_length = int(max_words)
69
+ # Set a safe minimum length for quality summaries
70
+ min_length = max(30, max_length // 4)
71
+
72
+ # Generate the summary
73
+ return summarizer.summarize_text(text, max_length=max_length, min_length=min_length)
74
+ except Exception as e:
75
+ return f"Error during summarization: {str(e)}"
76
+
77
+ # Build the Gradio UI
78
+ with gr.Blocks() as demo:
79
+ gr.Markdown("## 📝 Text & PDF Summarizer with Length Control")
80
+
81
+ with gr.Row():
82
+ # Text input for manually entering article
83
+ text_input = gr.Textbox(label="Enter article text", lines=15, placeholder="Paste your article here...")
84
+
85
+ # Upload input for PDF files
86
+ pdf_file = gr.File(label="Or upload PDF", file_types=[".pdf"], type="binary")
87
+
88
+ # User input for controlling max summary length
89
+ max_words = gr.Number(label="Max summary word count", value=150, precision=0)
90
+
91
+ with gr.Row():
92
+ # Button to convert PDF to text
93
+ convert_btn = gr.Button("Convert PDF to Text")
94
+ # Button to generate the summary
95
+ summary_btn = gr.Button("Summarize Text")
96
+
97
+ # Textbox to display the summary output
98
+ output_text = gr.Textbox(label="Summary", lines=10)
99
+
100
+ # Link buttons to their respective functions
101
+ convert_btn.click(fn=pdf_to_text, inputs=pdf_file, outputs=text_input)
102
+ summary_btn.click(fn=summarize_input, inputs=[text_input, max_words], outputs=output_text)
103
+
104
+ # Launch the app if run directly
105
+ if __name__ == "__main__":
106
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ PyPDF2
3
+ langchain
4
+ langchain-huggingface
5
+ transformers
6
+ torch