third commit
Browse files- app.py +90 -77
- requirements.txt +1 -6
app.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import logging
|
3 |
import sys
|
4 |
-
import
|
5 |
-
import gc
|
6 |
|
7 |
# Configure logging
|
8 |
logging.basicConfig(
|
@@ -13,95 +12,109 @@ logging.basicConfig(
|
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
# Log startup information
|
16 |
-
logger.info("Starting StudAI Summarization Service with Gradio")
|
17 |
logger.info(f"Python version: {sys.version}")
|
18 |
|
19 |
-
|
20 |
-
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
# Take first, middle and last sentences
|
32 |
-
summary = [
|
33 |
-
sentences[0],
|
34 |
-
sentences[len(sentences) // 2],
|
35 |
-
sentences[-1]
|
36 |
-
]
|
37 |
-
return " ".join(summary)
|
38 |
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
|
|
|
49 |
|
50 |
-
#
|
51 |
-
|
52 |
|
53 |
-
#
|
54 |
-
|
55 |
-
logger.info("CUDA available, using GPU")
|
56 |
-
device = 0
|
57 |
-
else:
|
58 |
-
logger.info("CUDA not available, using CPU")
|
59 |
-
device = -1
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
model=model_name,
|
65 |
-
device=device,
|
66 |
-
framework="pt"
|
67 |
-
)
|
68 |
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
71 |
|
72 |
-
#
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
def summarize_text(text, max_length=150, min_length=30):
|
82 |
-
"""
|
83 |
-
if not text or len(text.strip()) < 50:
|
84 |
-
return text
|
85 |
-
|
86 |
try:
|
87 |
-
|
88 |
-
logger.info(f"Summarizing text of length {len(text)} with model")
|
89 |
-
result = summarizer(
|
90 |
-
text,
|
91 |
-
max_length=max_length,
|
92 |
-
min_length=min_length,
|
93 |
-
truncation=True
|
94 |
-
)
|
95 |
-
summary = result[0]["summary_text"]
|
96 |
-
else:
|
97 |
-
logger.info(f"Using simple summarization for text of length {len(text)}")
|
98 |
-
summary = simple_summarize(text, max_length, min_length)
|
99 |
-
|
100 |
-
return summary
|
101 |
except Exception as e:
|
102 |
logger.error(f"Error during summarization: {str(e)}")
|
103 |
-
#
|
104 |
-
|
|
|
|
|
|
|
105 |
|
106 |
# Create Gradio interface
|
107 |
demo = gr.Interface(
|
@@ -110,14 +123,14 @@ demo = gr.Interface(
|
|
110 |
gr.Textbox(
|
111 |
lines=10,
|
112 |
label="Text to Summarize",
|
113 |
-
placeholder="Enter text to summarize (at least
|
114 |
),
|
115 |
gr.Slider(50, 500, value=150, label="Max Length"),
|
116 |
gr.Slider(10, 200, value=30, label="Min Length")
|
117 |
],
|
118 |
outputs=gr.Textbox(label="Summary"),
|
119 |
title="StudAI Text Summarization",
|
120 |
-
description="This service provides text summarization for the StudAI Android app.",
|
121 |
examples=[
|
122 |
["The coronavirus pandemic has led to a surge in remote work. Companies around the world have had to adapt to new ways of working, with many employees setting up home offices. This shift has led to changes in productivity, work-life balance, and communication patterns. Some studies suggest that remote work can increase productivity, while others point to challenges in collaboration and team cohesion. Organizations are now considering hybrid models for the future of work.", 150, 30]
|
123 |
],
|
@@ -125,4 +138,4 @@ demo = gr.Interface(
|
|
125 |
)
|
126 |
|
127 |
# Launch with parameters optimized for Spaces
|
128 |
-
demo.launch(share=False, server_name="0.0.0.0"
|
|
|
1 |
import gradio as gr
|
2 |
import logging
|
3 |
import sys
|
4 |
+
import re
|
|
|
5 |
|
6 |
# Configure logging
|
7 |
logging.basicConfig(
|
|
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
14 |
# Log startup information
|
15 |
+
logger.info("Starting StudAI Summarization Service with Gradio (Rule-based version)")
|
16 |
logger.info(f"Python version: {sys.version}")
|
17 |
|
18 |
+
def extract_sentences(text):
|
19 |
+
"""Extract sentences from text"""
|
20 |
+
return re.split(r'(?<=[.!?])\s+', text)
|
21 |
|
22 |
+
def calculate_word_frequency(sentences):
|
23 |
+
"""Calculate word frequency across all sentences"""
|
24 |
+
word_freq = {}
|
25 |
+
for sentence in sentences:
|
26 |
+
for word in sentence.lower().split():
|
27 |
+
if word.isalnum():
|
28 |
+
word_freq[word] = word_freq.get(word, 0) + 1
|
29 |
+
return word_freq
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
def score_sentences(sentences, word_freq):
|
32 |
+
"""Score sentences based on word importance"""
|
33 |
+
sentence_scores = []
|
34 |
+
for i, sentence in enumerate(sentences):
|
35 |
+
score = 0
|
36 |
+
for word in sentence.lower().split():
|
37 |
+
if word.isalnum():
|
38 |
+
score += word_freq.get(word, 0)
|
39 |
+
# Give bonus to first and last sentences
|
40 |
+
if i == 0 or i == len(sentences) - 1:
|
41 |
+
score *= 1.25
|
42 |
+
sentence_scores.append((i, score, sentence))
|
43 |
+
return sentence_scores
|
44 |
|
45 |
+
def rule_based_summarize(text, max_length=150, min_length=30):
|
46 |
+
"""Intelligent rule-based extractive summarization"""
|
47 |
+
logger.info(f"Summarizing text of length {len(text)}")
|
48 |
+
|
49 |
+
# Handle short texts
|
50 |
+
if not text or len(text.strip()) < 100:
|
51 |
+
return text
|
52 |
+
|
53 |
+
# Extract sentences
|
54 |
+
sentences = extract_sentences(text)
|
55 |
+
if len(sentences) <= 5:
|
56 |
+
return text
|
57 |
|
58 |
+
# Calculate word frequencies
|
59 |
+
word_freq = calculate_word_frequency(sentences)
|
60 |
|
61 |
+
# Score sentences
|
62 |
+
sentence_scores = score_sentences(sentences, word_freq)
|
63 |
|
64 |
+
# Sort by score and select top sentences
|
65 |
+
sentence_scores.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
+
summary_sentences = []
|
68 |
+
summary_length = 0
|
69 |
+
char_count = 0
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
# Always include first sentence for context
|
72 |
+
first_sentence = sentences[0]
|
73 |
+
last_sentence = sentences[-1]
|
74 |
+
summary_sentences.append((0, first_sentence))
|
75 |
+
char_count += len(first_sentence)
|
76 |
|
77 |
+
# Add highest scoring sentences until we reach minimum length
|
78 |
+
for i, score, sentence in sentence_scores:
|
79 |
+
# Skip first and last sentences (already included)
|
80 |
+
if i == 0 or i == len(sentences) - 1:
|
81 |
+
continue
|
82 |
+
|
83 |
+
summary_sentences.append((i, sentence))
|
84 |
+
summary_length += 1
|
85 |
+
char_count += len(sentence)
|
86 |
|
87 |
+
if char_count >= min_length and summary_length >= 3:
|
88 |
+
break
|
89 |
+
|
90 |
+
# Make sure last sentence is included
|
91 |
+
if not any(i == len(sentences) - 1 for i, _ in summary_sentences):
|
92 |
+
summary_sentences.append((len(sentences) - 1, last_sentence))
|
93 |
+
|
94 |
+
# Sort by original position to maintain flow
|
95 |
+
summary_sentences.sort(key=lambda x: x[0])
|
96 |
+
|
97 |
+
# Join sentences into summary
|
98 |
+
summary = " ".join(s for _, s in summary_sentences)
|
99 |
+
|
100 |
+
# Truncate if too long
|
101 |
+
if len(summary) > max_length:
|
102 |
+
summary = summary[:max_length].rsplit(' ', 1)[0] + '...'
|
103 |
+
|
104 |
+
logger.info(f"Generated summary of length {len(summary)}")
|
105 |
+
return summary
|
106 |
|
107 |
def summarize_text(text, max_length=150, min_length=30):
|
108 |
+
"""Wrapper function for summarization"""
|
|
|
|
|
|
|
109 |
try:
|
110 |
+
return rule_based_summarize(text, max_length, min_length)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
except Exception as e:
|
112 |
logger.error(f"Error during summarization: {str(e)}")
|
113 |
+
# Simple fallback if algo fails
|
114 |
+
sentences = extract_sentences(text)
|
115 |
+
if len(sentences) <= 3:
|
116 |
+
return text
|
117 |
+
return " ".join([sentences[0], sentences[len(sentences)//2], sentences[-1]])
|
118 |
|
119 |
# Create Gradio interface
|
120 |
demo = gr.Interface(
|
|
|
123 |
gr.Textbox(
|
124 |
lines=10,
|
125 |
label="Text to Summarize",
|
126 |
+
placeholder="Enter text to summarize (at least 100 characters)"
|
127 |
),
|
128 |
gr.Slider(50, 500, value=150, label="Max Length"),
|
129 |
gr.Slider(10, 200, value=30, label="Min Length")
|
130 |
],
|
131 |
outputs=gr.Textbox(label="Summary"),
|
132 |
title="StudAI Text Summarization",
|
133 |
+
description="This service provides text summarization for the StudAI Android app using an intelligent rule-based approach.",
|
134 |
examples=[
|
135 |
["The coronavirus pandemic has led to a surge in remote work. Companies around the world have had to adapt to new ways of working, with many employees setting up home offices. This shift has led to changes in productivity, work-life balance, and communication patterns. Some studies suggest that remote work can increase productivity, while others point to challenges in collaboration and team cohesion. Organizations are now considering hybrid models for the future of work.", 150, 30]
|
136 |
],
|
|
|
138 |
)
|
139 |
|
140 |
# Launch with parameters optimized for Spaces
|
141 |
+
demo.launch(share=False, server_name="0.0.0.0")
|
requirements.txt
CHANGED
@@ -1,6 +1 @@
|
|
1 |
-
gradio==4.13.0
|
2 |
-
transformers==4.35.2
|
3 |
-
torch==2.0.1
|
4 |
-
numpy<2.0.0
|
5 |
-
requests==2.31.0
|
6 |
-
accelerate==0.25.0
|
|
|
1 |
+
gradio==4.13.0
|
|
|
|
|
|
|
|
|
|