WolfeLeo2 commited on
Commit
d4a9032
·
1 Parent(s): 71754ec

third commit

Browse files
Files changed (2) hide show
  1. app.py +90 -77
  2. requirements.txt +1 -6
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import gradio as gr
2
  import logging
3
  import sys
4
- import os
5
- import gc
6
 
7
  # Configure logging
8
  logging.basicConfig(
@@ -13,95 +12,109 @@ logging.basicConfig(
13
  logger = logging.getLogger(__name__)
14
 
15
  # Log startup information
16
- logger.info("Starting StudAI Summarization Service with Gradio")
17
  logger.info(f"Python version: {sys.version}")
18
 
19
- # Force garbage collection
20
- gc.collect()
 
21
 
22
- # Create a simple function for summarization that doesn't use ML in case model loading fails
23
- def simple_summarize(text, max_length=150, min_length=30):
24
- """Simple extractive summarization as fallback"""
25
- import re
26
- sentences = re.split(r'(?<=[.!?])\s+', text)
27
-
28
- if len(sentences) <= 3:
29
- return text
30
-
31
- # Take first, middle and last sentences
32
- summary = [
33
- sentences[0],
34
- sentences[len(sentences) // 2],
35
- sentences[-1]
36
- ]
37
- return " ".join(summary)
38
 
39
- # Set a flag for model availability
40
- model_available = False
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- # Try to import and load the model with memory optimizations
43
- try:
44
- # Import and load only when needed
45
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
46
- import torch
 
 
 
 
 
 
 
47
 
48
- logger.info("Loading small model for summarization...")
 
49
 
50
- # Use a tiny model instead of t5-small
51
- model_name = "facebook/bart-large-cnn"
52
 
53
- # Enable memory optimization
54
- if torch.cuda.is_available():
55
- logger.info("CUDA available, using GPU")
56
- device = 0
57
- else:
58
- logger.info("CUDA not available, using CPU")
59
- device = -1
60
 
61
- # Enable memory-efficient loading
62
- summarizer = pipeline(
63
- "summarization",
64
- model=model_name,
65
- device=device,
66
- framework="pt"
67
- )
68
 
69
- logger.info("Model loaded successfully!")
70
- model_available = True
 
 
 
71
 
72
- # Force garbage collection after model loading
73
- gc.collect()
74
- if torch.cuda.is_available():
75
- torch.cuda.empty_cache()
 
 
 
 
 
76
 
77
- except Exception as e:
78
- logger.error(f"Failed to load model: {str(e)}")
79
- logger.info("Will use simple extractive summarization instead")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  def summarize_text(text, max_length=150, min_length=30):
82
- """Summarize the provided text"""
83
- if not text or len(text.strip()) < 50:
84
- return text
85
-
86
  try:
87
- if model_available:
88
- logger.info(f"Summarizing text of length {len(text)} with model")
89
- result = summarizer(
90
- text,
91
- max_length=max_length,
92
- min_length=min_length,
93
- truncation=True
94
- )
95
- summary = result[0]["summary_text"]
96
- else:
97
- logger.info(f"Using simple summarization for text of length {len(text)}")
98
- summary = simple_summarize(text, max_length, min_length)
99
-
100
- return summary
101
  except Exception as e:
102
  logger.error(f"Error during summarization: {str(e)}")
103
- # Fall back to simple summarization on error
104
- return simple_summarize(text, max_length, min_length)
 
 
 
105
 
106
  # Create Gradio interface
107
  demo = gr.Interface(
@@ -110,14 +123,14 @@ demo = gr.Interface(
110
  gr.Textbox(
111
  lines=10,
112
  label="Text to Summarize",
113
- placeholder="Enter text to summarize (at least 50 characters)"
114
  ),
115
  gr.Slider(50, 500, value=150, label="Max Length"),
116
  gr.Slider(10, 200, value=30, label="Min Length")
117
  ],
118
  outputs=gr.Textbox(label="Summary"),
119
  title="StudAI Text Summarization",
120
- description="This service provides text summarization for the StudAI Android app.",
121
  examples=[
122
  ["The coronavirus pandemic has led to a surge in remote work. Companies around the world have had to adapt to new ways of working, with many employees setting up home offices. This shift has led to changes in productivity, work-life balance, and communication patterns. Some studies suggest that remote work can increase productivity, while others point to challenges in collaboration and team cohesion. Organizations are now considering hybrid models for the future of work.", 150, 30]
123
  ],
@@ -125,4 +138,4 @@ demo = gr.Interface(
125
  )
126
 
127
  # Launch with parameters optimized for Spaces
128
- demo.launch(share=False, server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  import logging
3
  import sys
4
+ import re
 
5
 
6
  # Configure logging
7
  logging.basicConfig(
 
12
  logger = logging.getLogger(__name__)
13
 
14
  # Log startup information
15
+ logger.info("Starting StudAI Summarization Service with Gradio (Rule-based version)")
16
  logger.info(f"Python version: {sys.version}")
17
 
18
+ def extract_sentences(text):
19
+ """Extract sentences from text"""
20
+ return re.split(r'(?<=[.!?])\s+', text)
21
 
22
+ def calculate_word_frequency(sentences):
23
+ """Calculate word frequency across all sentences"""
24
+ word_freq = {}
25
+ for sentence in sentences:
26
+ for word in sentence.lower().split():
27
+ if word.isalnum():
28
+ word_freq[word] = word_freq.get(word, 0) + 1
29
+ return word_freq
 
 
 
 
 
 
 
 
30
 
31
+ def score_sentences(sentences, word_freq):
32
+ """Score sentences based on word importance"""
33
+ sentence_scores = []
34
+ for i, sentence in enumerate(sentences):
35
+ score = 0
36
+ for word in sentence.lower().split():
37
+ if word.isalnum():
38
+ score += word_freq.get(word, 0)
39
+ # Give bonus to first and last sentences
40
+ if i == 0 or i == len(sentences) - 1:
41
+ score *= 1.25
42
+ sentence_scores.append((i, score, sentence))
43
+ return sentence_scores
44
 
45
+ def rule_based_summarize(text, max_length=150, min_length=30):
46
+ """Intelligent rule-based extractive summarization"""
47
+ logger.info(f"Summarizing text of length {len(text)}")
48
+
49
+ # Handle short texts
50
+ if not text or len(text.strip()) < 100:
51
+ return text
52
+
53
+ # Extract sentences
54
+ sentences = extract_sentences(text)
55
+ if len(sentences) <= 5:
56
+ return text
57
 
58
+ # Calculate word frequencies
59
+ word_freq = calculate_word_frequency(sentences)
60
 
61
+ # Score sentences
62
+ sentence_scores = score_sentences(sentences, word_freq)
63
 
64
+ # Sort by score and select top sentences
65
+ sentence_scores.sort(key=lambda x: x[1], reverse=True)
 
 
 
 
 
66
 
67
+ summary_sentences = []
68
+ summary_length = 0
69
+ char_count = 0
 
 
 
 
70
 
71
+ # Always include first sentence for context
72
+ first_sentence = sentences[0]
73
+ last_sentence = sentences[-1]
74
+ summary_sentences.append((0, first_sentence))
75
+ char_count += len(first_sentence)
76
 
77
+ # Add highest scoring sentences until we reach minimum length
78
+ for i, score, sentence in sentence_scores:
79
+ # Skip first and last sentences (already included)
80
+ if i == 0 or i == len(sentences) - 1:
81
+ continue
82
+
83
+ summary_sentences.append((i, sentence))
84
+ summary_length += 1
85
+ char_count += len(sentence)
86
 
87
+ if char_count >= min_length and summary_length >= 3:
88
+ break
89
+
90
+ # Make sure last sentence is included
91
+ if not any(i == len(sentences) - 1 for i, _ in summary_sentences):
92
+ summary_sentences.append((len(sentences) - 1, last_sentence))
93
+
94
+ # Sort by original position to maintain flow
95
+ summary_sentences.sort(key=lambda x: x[0])
96
+
97
+ # Join sentences into summary
98
+ summary = " ".join(s for _, s in summary_sentences)
99
+
100
+ # Truncate if too long
101
+ if len(summary) > max_length:
102
+ summary = summary[:max_length].rsplit(' ', 1)[0] + '...'
103
+
104
+ logger.info(f"Generated summary of length {len(summary)}")
105
+ return summary
106
 
107
  def summarize_text(text, max_length=150, min_length=30):
108
+ """Wrapper function for summarization"""
 
 
 
109
  try:
110
+ return rule_based_summarize(text, max_length, min_length)
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  except Exception as e:
112
  logger.error(f"Error during summarization: {str(e)}")
113
+ # Simple fallback if algo fails
114
+ sentences = extract_sentences(text)
115
+ if len(sentences) <= 3:
116
+ return text
117
+ return " ".join([sentences[0], sentences[len(sentences)//2], sentences[-1]])
118
 
119
  # Create Gradio interface
120
  demo = gr.Interface(
 
123
  gr.Textbox(
124
  lines=10,
125
  label="Text to Summarize",
126
+ placeholder="Enter text to summarize (at least 100 characters)"
127
  ),
128
  gr.Slider(50, 500, value=150, label="Max Length"),
129
  gr.Slider(10, 200, value=30, label="Min Length")
130
  ],
131
  outputs=gr.Textbox(label="Summary"),
132
  title="StudAI Text Summarization",
133
+ description="This service provides text summarization for the StudAI Android app using an intelligent rule-based approach.",
134
  examples=[
135
  ["The coronavirus pandemic has led to a surge in remote work. Companies around the world have had to adapt to new ways of working, with many employees setting up home offices. This shift has led to changes in productivity, work-life balance, and communication patterns. Some studies suggest that remote work can increase productivity, while others point to challenges in collaboration and team cohesion. Organizations are now considering hybrid models for the future of work.", 150, 30]
136
  ],
 
138
  )
139
 
140
  # Launch with parameters optimized for Spaces
141
+ demo.launch(share=False, server_name="0.0.0.0")
requirements.txt CHANGED
@@ -1,6 +1 @@
1
- gradio==4.13.0
2
- transformers==4.35.2
3
- torch==2.0.1
4
- numpy<2.0.0
5
- requests==2.31.0
6
- accelerate==0.25.0
 
1
+ gradio==4.13.0