fourth
Browse files- app.py +19 -118
- requirements.txt +3 -1
app.py
CHANGED
@@ -1,141 +1,42 @@
|
|
1 |
import gradio as gr
|
2 |
import logging
|
3 |
import sys
|
4 |
-
import
|
5 |
|
6 |
# Configure logging
|
7 |
-
logging.basicConfig(
|
8 |
-
level=logging.INFO,
|
9 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
10 |
-
handlers=[logging.StreamHandler(sys.stdout)]
|
11 |
-
)
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
14 |
-
#
|
15 |
-
logger.info("
|
16 |
-
|
17 |
-
|
18 |
-
def extract_sentences(text):
|
19 |
-
"""Extract sentences from text"""
|
20 |
-
return re.split(r'(?<=[.!?])\s+', text)
|
21 |
-
|
22 |
-
def calculate_word_frequency(sentences):
|
23 |
-
"""Calculate word frequency across all sentences"""
|
24 |
-
word_freq = {}
|
25 |
-
for sentence in sentences:
|
26 |
-
for word in sentence.lower().split():
|
27 |
-
if word.isalnum():
|
28 |
-
word_freq[word] = word_freq.get(word, 0) + 1
|
29 |
-
return word_freq
|
30 |
|
31 |
-
def
|
32 |
-
|
33 |
-
sentence_scores = []
|
34 |
-
for i, sentence in enumerate(sentences):
|
35 |
-
score = 0
|
36 |
-
for word in sentence.lower().split():
|
37 |
-
if word.isalnum():
|
38 |
-
score += word_freq.get(word, 0)
|
39 |
-
# Give bonus to first and last sentences
|
40 |
-
if i == 0 or i == len(sentences) - 1:
|
41 |
-
score *= 1.25
|
42 |
-
sentence_scores.append((i, score, sentence))
|
43 |
-
return sentence_scores
|
44 |
-
|
45 |
-
def rule_based_summarize(text, max_length=150, min_length=30):
|
46 |
-
"""Intelligent rule-based extractive summarization"""
|
47 |
-
logger.info(f"Summarizing text of length {len(text)}")
|
48 |
-
|
49 |
-
# Handle short texts
|
50 |
-
if not text or len(text.strip()) < 100:
|
51 |
-
return text
|
52 |
-
|
53 |
-
# Extract sentences
|
54 |
-
sentences = extract_sentences(text)
|
55 |
-
if len(sentences) <= 5:
|
56 |
return text
|
57 |
-
|
58 |
-
# Calculate word frequencies
|
59 |
-
word_freq = calculate_word_frequency(sentences)
|
60 |
-
|
61 |
-
# Score sentences
|
62 |
-
sentence_scores = score_sentences(sentences, word_freq)
|
63 |
-
|
64 |
-
# Sort by score and select top sentences
|
65 |
-
sentence_scores.sort(key=lambda x: x[1], reverse=True)
|
66 |
-
|
67 |
-
summary_sentences = []
|
68 |
-
summary_length = 0
|
69 |
-
char_count = 0
|
70 |
-
|
71 |
-
# Always include first sentence for context
|
72 |
-
first_sentence = sentences[0]
|
73 |
-
last_sentence = sentences[-1]
|
74 |
-
summary_sentences.append((0, first_sentence))
|
75 |
-
char_count += len(first_sentence)
|
76 |
-
|
77 |
-
# Add highest scoring sentences until we reach minimum length
|
78 |
-
for i, score, sentence in sentence_scores:
|
79 |
-
# Skip first and last sentences (already included)
|
80 |
-
if i == 0 or i == len(sentences) - 1:
|
81 |
-
continue
|
82 |
-
|
83 |
-
summary_sentences.append((i, sentence))
|
84 |
-
summary_length += 1
|
85 |
-
char_count += len(sentence)
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
summary_sentences.sort(key=lambda x: x[0])
|
96 |
-
|
97 |
-
# Join sentences into summary
|
98 |
-
summary = " ".join(s for _, s in summary_sentences)
|
99 |
-
|
100 |
-
# Truncate if too long
|
101 |
-
if len(summary) > max_length:
|
102 |
-
summary = summary[:max_length].rsplit(' ', 1)[0] + '...'
|
103 |
-
|
104 |
-
logger.info(f"Generated summary of length {len(summary)}")
|
105 |
return summary
|
106 |
|
107 |
-
def summarize_text(text, max_length=150, min_length=30):
|
108 |
-
"""Wrapper function for summarization"""
|
109 |
-
try:
|
110 |
-
return rule_based_summarize(text, max_length, min_length)
|
111 |
-
except Exception as e:
|
112 |
-
logger.error(f"Error during summarization: {str(e)}")
|
113 |
-
# Simple fallback if algo fails
|
114 |
-
sentences = extract_sentences(text)
|
115 |
-
if len(sentences) <= 3:
|
116 |
-
return text
|
117 |
-
return " ".join([sentences[0], sentences[len(sentences)//2], sentences[-1]])
|
118 |
-
|
119 |
# Create Gradio interface
|
120 |
demo = gr.Interface(
|
121 |
fn=summarize_text,
|
122 |
inputs=[
|
123 |
-
gr.Textbox(
|
124 |
-
lines=10,
|
125 |
-
label="Text to Summarize",
|
126 |
-
placeholder="Enter text to summarize (at least 100 characters)"
|
127 |
-
),
|
128 |
gr.Slider(50, 500, value=150, label="Max Length"),
|
129 |
gr.Slider(10, 200, value=30, label="Min Length")
|
130 |
],
|
131 |
outputs=gr.Textbox(label="Summary"),
|
132 |
title="StudAI Text Summarization",
|
133 |
-
description="
|
134 |
-
examples=[
|
135 |
-
["The coronavirus pandemic has led to a surge in remote work. Companies around the world have had to adapt to new ways of working, with many employees setting up home offices. This shift has led to changes in productivity, work-life balance, and communication patterns. Some studies suggest that remote work can increase productivity, while others point to challenges in collaboration and team cohesion. Organizations are now considering hybrid models for the future of work.", 150, 30]
|
136 |
-
],
|
137 |
-
allow_flagging="never"
|
138 |
)
|
139 |
|
140 |
-
|
141 |
-
demo.launch(share=False, server_name="0.0.0.0")
|
|
|
1 |
import gradio as gr
|
2 |
import logging
|
3 |
import sys
|
4 |
+
from transformers import pipeline
|
5 |
|
6 |
# Configure logging
|
7 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
|
|
|
|
|
|
8 |
logger = logging.getLogger(__name__)
|
9 |
|
10 |
+
# Load the model
|
11 |
+
logger.info("Loading bart-large-cnn model...")
|
12 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
13 |
+
logger.info("Model loaded successfully!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
def summarize_text(text, max_length=150, min_length=30):
|
16 |
+
if not text or len(text.strip()) < 50:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
logger.info(f"Summarizing text of length {len(text)}")
|
20 |
+
result = summarizer(
|
21 |
+
text,
|
22 |
+
max_length=max_length,
|
23 |
+
min_length=min_length,
|
24 |
+
truncation=True
|
25 |
+
)
|
26 |
+
summary = result[0]["summary_text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
return summary
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
# Create Gradio interface
|
30 |
demo = gr.Interface(
|
31 |
fn=summarize_text,
|
32 |
inputs=[
|
33 |
+
gr.Textbox(lines=10, label="Text to Summarize"),
|
|
|
|
|
|
|
|
|
34 |
gr.Slider(50, 500, value=150, label="Max Length"),
|
35 |
gr.Slider(10, 200, value=30, label="Min Length")
|
36 |
],
|
37 |
outputs=gr.Textbox(label="Summary"),
|
38 |
title="StudAI Text Summarization",
|
39 |
+
description="Powered by facebook/bart-large-cnn model"
|
|
|
|
|
|
|
|
|
40 |
)
|
41 |
|
42 |
+
demo.launch()
|
|
requirements.txt
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
gradio==4.13.0
|
|
|
|
|
|
1 |
+
gradio==4.13.0
|
2 |
+
transformers==4.35.2
|
3 |
+
torch==2.0.1
|