WolfeLeo2 commited on
Commit
fce3660
·
1 Parent(s): d4a9032
Files changed (2) hide show
  1. app.py +19 -118
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,141 +1,42 @@
1
  import gradio as gr
2
  import logging
3
  import sys
4
- import re
5
 
6
  # Configure logging
7
- logging.basicConfig(
8
- level=logging.INFO,
9
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
10
- handlers=[logging.StreamHandler(sys.stdout)]
11
- )
12
  logger = logging.getLogger(__name__)
13
 
14
- # Log startup information
15
- logger.info("Starting StudAI Summarization Service with Gradio (Rule-based version)")
16
- logger.info(f"Python version: {sys.version}")
17
-
18
- def extract_sentences(text):
19
- """Extract sentences from text"""
20
- return re.split(r'(?<=[.!?])\s+', text)
21
-
22
- def calculate_word_frequency(sentences):
23
- """Calculate word frequency across all sentences"""
24
- word_freq = {}
25
- for sentence in sentences:
26
- for word in sentence.lower().split():
27
- if word.isalnum():
28
- word_freq[word] = word_freq.get(word, 0) + 1
29
- return word_freq
30
 
31
- def score_sentences(sentences, word_freq):
32
- """Score sentences based on word importance"""
33
- sentence_scores = []
34
- for i, sentence in enumerate(sentences):
35
- score = 0
36
- for word in sentence.lower().split():
37
- if word.isalnum():
38
- score += word_freq.get(word, 0)
39
- # Give bonus to first and last sentences
40
- if i == 0 or i == len(sentences) - 1:
41
- score *= 1.25
42
- sentence_scores.append((i, score, sentence))
43
- return sentence_scores
44
-
45
- def rule_based_summarize(text, max_length=150, min_length=30):
46
- """Intelligent rule-based extractive summarization"""
47
- logger.info(f"Summarizing text of length {len(text)}")
48
-
49
- # Handle short texts
50
- if not text or len(text.strip()) < 100:
51
- return text
52
-
53
- # Extract sentences
54
- sentences = extract_sentences(text)
55
- if len(sentences) <= 5:
56
  return text
57
-
58
- # Calculate word frequencies
59
- word_freq = calculate_word_frequency(sentences)
60
-
61
- # Score sentences
62
- sentence_scores = score_sentences(sentences, word_freq)
63
-
64
- # Sort by score and select top sentences
65
- sentence_scores.sort(key=lambda x: x[1], reverse=True)
66
-
67
- summary_sentences = []
68
- summary_length = 0
69
- char_count = 0
70
-
71
- # Always include first sentence for context
72
- first_sentence = sentences[0]
73
- last_sentence = sentences[-1]
74
- summary_sentences.append((0, first_sentence))
75
- char_count += len(first_sentence)
76
-
77
- # Add highest scoring sentences until we reach minimum length
78
- for i, score, sentence in sentence_scores:
79
- # Skip first and last sentences (already included)
80
- if i == 0 or i == len(sentences) - 1:
81
- continue
82
-
83
- summary_sentences.append((i, sentence))
84
- summary_length += 1
85
- char_count += len(sentence)
86
 
87
- if char_count >= min_length and summary_length >= 3:
88
- break
89
-
90
- # Make sure last sentence is included
91
- if not any(i == len(sentences) - 1 for i, _ in summary_sentences):
92
- summary_sentences.append((len(sentences) - 1, last_sentence))
93
-
94
- # Sort by original position to maintain flow
95
- summary_sentences.sort(key=lambda x: x[0])
96
-
97
- # Join sentences into summary
98
- summary = " ".join(s for _, s in summary_sentences)
99
-
100
- # Truncate if too long
101
- if len(summary) > max_length:
102
- summary = summary[:max_length].rsplit(' ', 1)[0] + '...'
103
-
104
- logger.info(f"Generated summary of length {len(summary)}")
105
  return summary
106
 
107
- def summarize_text(text, max_length=150, min_length=30):
108
- """Wrapper function for summarization"""
109
- try:
110
- return rule_based_summarize(text, max_length, min_length)
111
- except Exception as e:
112
- logger.error(f"Error during summarization: {str(e)}")
113
- # Simple fallback if algo fails
114
- sentences = extract_sentences(text)
115
- if len(sentences) <= 3:
116
- return text
117
- return " ".join([sentences[0], sentences[len(sentences)//2], sentences[-1]])
118
-
119
  # Create Gradio interface
120
  demo = gr.Interface(
121
  fn=summarize_text,
122
  inputs=[
123
- gr.Textbox(
124
- lines=10,
125
- label="Text to Summarize",
126
- placeholder="Enter text to summarize (at least 100 characters)"
127
- ),
128
  gr.Slider(50, 500, value=150, label="Max Length"),
129
  gr.Slider(10, 200, value=30, label="Min Length")
130
  ],
131
  outputs=gr.Textbox(label="Summary"),
132
  title="StudAI Text Summarization",
133
- description="This service provides text summarization for the StudAI Android app using an intelligent rule-based approach.",
134
- examples=[
135
- ["The coronavirus pandemic has led to a surge in remote work. Companies around the world have had to adapt to new ways of working, with many employees setting up home offices. This shift has led to changes in productivity, work-life balance, and communication patterns. Some studies suggest that remote work can increase productivity, while others point to challenges in collaboration and team cohesion. Organizations are now considering hybrid models for the future of work.", 150, 30]
136
- ],
137
- allow_flagging="never"
138
  )
139
 
140
- # Launch with parameters optimized for Spaces
141
- demo.launch(share=False, server_name="0.0.0.0")
 
1
  import gradio as gr
2
  import logging
3
  import sys
4
+ from transformers import pipeline
5
 
6
  # Configure logging
7
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 
 
 
 
8
  logger = logging.getLogger(__name__)
9
 
10
+ # Load the model
11
+ logger.info("Loading bart-large-cnn model...")
12
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
13
+ logger.info("Model loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ def summarize_text(text, max_length=150, min_length=30):
16
+ if not text or len(text.strip()) < 50:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ logger.info(f"Summarizing text of length {len(text)}")
20
+ result = summarizer(
21
+ text,
22
+ max_length=max_length,
23
+ min_length=min_length,
24
+ truncation=True
25
+ )
26
+ summary = result[0]["summary_text"]
 
 
 
 
 
 
 
 
 
 
27
  return summary
28
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # Create Gradio interface
30
  demo = gr.Interface(
31
  fn=summarize_text,
32
  inputs=[
33
+ gr.Textbox(lines=10, label="Text to Summarize"),
 
 
 
 
34
  gr.Slider(50, 500, value=150, label="Max Length"),
35
  gr.Slider(10, 200, value=30, label="Min Length")
36
  ],
37
  outputs=gr.Textbox(label="Summary"),
38
  title="StudAI Text Summarization",
39
+ description="Powered by facebook/bart-large-cnn model"
 
 
 
 
40
  )
41
 
42
+ demo.launch()
 
requirements.txt CHANGED
@@ -1 +1,3 @@
1
- gradio==4.13.0
 
 
 
1
+ gradio==4.13.0
2
+ transformers==4.35.2
3
+ torch==2.0.1