rahimizadeh commited on
Commit
145c622
Β·
verified Β·
1 Parent(s): b6d6235

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -106
app.py CHANGED
@@ -1,106 +1,112 @@
1
- # Running on local URL: http://127.0.0.1:7860
2
-
3
- import gradio as gr # Gradio: for creating web-based user interfaces
4
- import PyPDF2 # PyPDF2: for reading PDF files
5
- import tempfile # tempfile: to safely handle temporary files
6
- from langchain.prompts import PromptTemplate # LangChain: for managing prompt templates
7
- from langchain_huggingface.llms import HuggingFacePipeline # LangChain integration with HuggingFace models
8
-
9
- # Define a summarization class
10
- class TextSummarizer:
11
- def __init__(self):
12
- # Define the model to use for summarization
13
- self.model_id = "facebook/bart-large-cnn"
14
-
15
- def summarize_text(self, article_text, max_length=150, min_length=30):
16
- # Load a summarization pipeline with custom length settings
17
- llm = HuggingFacePipeline.from_model_id(
18
- model_id=self.model_id,
19
- task="summarization",
20
- pipeline_kwargs={
21
- "max_length": max_length,
22
- "min_length": min_length,
23
- "do_sample": False # Deterministic output
24
- }
25
- )
26
-
27
- # Create a basic prompt template that just passes the text
28
- prompt = PromptTemplate(input_variables=["document"], template="""{document}""")
29
-
30
- # Format the article text into the prompt
31
- prompt_input = prompt.format(document=article_text)
32
-
33
- # Generate the summary using the model
34
- summary = llm.__call__(prompt_input)
35
-
36
- # If the model returns a list of summaries, extract the actual summary text
37
- if isinstance(summary, list):
38
- return summary[0]['summary_text'] if 'summary_text' in summary[0] else str(summary[0])
39
- return str(summary) # Fallback for other formats
40
-
41
- # Function to extract text from an uploaded PDF
42
- def pdf_to_text(pdf_file):
43
- try:
44
- # Create a temporary file to write the uploaded PDF bytes
45
- with tempfile.NamedTemporaryFile(delete=False) as tmp:
46
- tmp.write(pdf_file) # Write raw bytes directly
47
- tmp.flush() # Make sure data is written to disk
48
-
49
- # Use PyPDF2 to read and extract text
50
- reader = PyPDF2.PdfReader(tmp.name)
51
- text = "\n".join(page.extract_text() or "" for page in reader.pages)
52
-
53
- # Return cleaned-up text or a message if extraction fails
54
- return text.strip() if text.strip() else "No extractable text found in the PDF."
55
- except Exception as e:
56
- return f"Error reading PDF: {str(e)}" # Return readable error message
57
-
58
- # Instantiate the summarizer class
59
- summarizer = TextSummarizer()
60
-
61
- # Summarize input with user-defined maximum length
62
- def summarize_input(text, max_words):
63
- if not text.strip():
64
- return "Please enter or extract some text first."
65
-
66
- try:
67
- # Convert max_words input to integer
68
- max_length = int(max_words)
69
- # Set a safe minimum length for quality summaries
70
- min_length = max(30, max_length // 4)
71
-
72
- # Generate the summary
73
- return summarizer.summarize_text(text, max_length=max_length, min_length=min_length)
74
- except Exception as e:
75
- return f"Error during summarization: {str(e)}"
76
-
77
- # Build the Gradio UI
78
- with gr.Blocks() as demo:
79
- gr.Markdown("## πŸ“ Text & PDF Summarizer with Length Control")
80
-
81
- with gr.Row():
82
- # Text input for manually entering article
83
- text_input = gr.Textbox(label="Enter article text", lines=15, placeholder="Paste your article here...")
84
-
85
- # Upload input for PDF files
86
- pdf_file = gr.File(label="Or upload PDF", file_types=[".pdf"], type="binary")
87
-
88
- # User input for controlling max summary length
89
- max_words = gr.Number(label="Max summary word count", value=150, precision=0)
90
-
91
- with gr.Row():
92
- # Button to convert PDF to text
93
- convert_btn = gr.Button("Convert PDF to Text")
94
- # Button to generate the summary
95
- summary_btn = gr.Button("Summarize Text")
96
-
97
- # Textbox to display the summary output
98
- output_text = gr.Textbox(label="Summary", lines=10)
99
-
100
- # Link buttons to their respective functions
101
- convert_btn.click(fn=pdf_to_text, inputs=pdf_file, outputs=text_input)
102
- summary_btn.click(fn=summarize_input, inputs=[text_input, max_words], outputs=output_text)
103
-
104
- # Launch the app if run directly
105
- if __name__ == "__main__":
106
- demo.launch()
 
 
 
 
 
 
 
1
+ # Running on local URL: http://127.0.0.1:7860
2
+
3
+ import gradio as gr # Gradio: for creating web-based user interfaces
4
+ import PyPDF2 # PyPDF2: for reading PDF files
5
+ import tempfile # tempfile: to safely handle temporary files
6
+ from langchain.prompts import PromptTemplate # LangChain: for managing prompt templates
7
+ from langchain_huggingface.llms import HuggingFacePipeline # LangChain integration with HuggingFace models
8
+
9
+ # Define a summarization class
10
+ class TextSummarizer:
11
+ def __init__(self):
12
+ # Define the model to use for summarization
13
+ self.model_id = "facebook/bart-large-cnn"
14
+
15
+ def summarize_text(self, article_text, max_length=150, min_length=30):
16
+ # Load a summarization pipeline with custom length settings
17
+ llm = HuggingFacePipeline.from_model_id(
18
+ model_id=self.model_id,
19
+ task="summarization",
20
+ pipeline_kwargs={
21
+ "max_length": max_length,
22
+ "min_length": min_length,
23
+ "do_sample": False # Deterministic output
24
+ }
25
+ )
26
+ """pipeline_kwargs = {
27
+ "max_length": 250,
28
+ "do_sample": True,
29
+ "temperature": 0.7, # More creative
30
+ "top_k": 50, # Limit to top 50 tokens
31
+ "top_p": 0.95 # Use nucleus sampling
32
+ }"""
33
+ # Create a basic prompt template that just passes the text
34
+ prompt = PromptTemplate(input_variables=["document"], template="""{document}""")
35
+
36
+ # Format the article text into the prompt
37
+ prompt_input = prompt.format(document=article_text)
38
+
39
+ # Generate the summary using the model
40
+ summary = llm.__call__(prompt_input)
41
+
42
+ # If the model returns a list of summaries, extract the actual summary text
43
+ if isinstance(summary, list):
44
+ return summary[0]['summary_text'] if 'summary_text' in summary[0] else str(summary[0])
45
+ return str(summary) # Fallback for other formats
46
+
47
+ # Function to extract text from an uploaded PDF
48
+ def pdf_to_text(pdf_file):
49
+ try:
50
+ # Create a temporary file to write the uploaded PDF bytes
51
+ with tempfile.NamedTemporaryFile(delete=False) as tmp:
52
+ tmp.write(pdf_file) # Write raw bytes directly
53
+ tmp.flush() # Make sure data is written to disk
54
+
55
+ # Use PyPDF2 to read and extract text
56
+ reader = PyPDF2.PdfReader(tmp.name)
57
+ text = "\n".join(page.extract_text() or "" for page in reader.pages)
58
+
59
+ # Return cleaned-up text or a message if extraction fails
60
+ return text.strip() if text.strip() else "No extractable text found in the PDF."
61
+ except Exception as e:
62
+ return f"Error reading PDF: {str(e)}" # Return readable error message
63
+
64
+ # Instantiate the summarizer class
65
+ summarizer = TextSummarizer()
66
+
67
+ # Summarize input with user-defined maximum length
68
+ def summarize_input(text, max_words):
69
+ if not text.strip():
70
+ return "Please enter or extract some text first."
71
+
72
+ try:
73
+ # Convert max_words input to integer
74
+ max_length = int(max_words)
75
+ # Set a safe minimum length for quality summaries
76
+ min_length = max(30, max_length // 4)
77
+
78
+ # Generate the summary
79
+ return summarizer.summarize_text(text, max_length=max_length, min_length=min_length)
80
+ except Exception as e:
81
+ return f"Error during summarization: {str(e)}"
82
+
83
+ # Build the Gradio UI
84
+ with gr.Blocks() as demo:
85
+ gr.Markdown("## πŸ“ Text & PDF Summarizer with Length Control")
86
+
87
+ with gr.Row():
88
+ # Text input for manually entering article
89
+ text_input = gr.Textbox(label="Enter article text", lines=15, placeholder="Paste your article here...")
90
+
91
+ # Upload input for PDF files
92
+ pdf_file = gr.File(label="Or upload PDF", file_types=[".pdf"], type="binary")
93
+
94
+ # User input for controlling max summary length
95
+ max_words = gr.Number(label="Max summary word count", value=150, precision=0)
96
+
97
+ with gr.Row():
98
+ # Button to convert PDF to text
99
+ convert_btn = gr.Button("Convert PDF to Text")
100
+ # Button to generate the summary
101
+ summary_btn = gr.Button("Summarize Text")
102
+
103
+ # Textbox to display the summary output
104
+ output_text = gr.Textbox(label="Summary", lines=10)
105
+
106
+ # Link buttons to their respective functions
107
+ convert_btn.click(fn=pdf_to_text, inputs=pdf_file, outputs=text_input)
108
+ summary_btn.click(fn=summarize_input, inputs=[text_input, max_words], outputs=output_text)
109
+
110
+ # Launch the app if run directly
111
+ if __name__ == "__main__":
112
+ demo.launch()