Spaces:

WolfeLeo2
/

Studai

Sleeping

App Files Files Community

WolfeLeo2 commited on May 6

Commit

d4a9032

1 Parent(s): 71754ec

third commit

Browse files

Files changed (2) hide show

app.py +90 -77
requirements.txt +1 -6

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import gradio as gr
 import logging
 import sys
-import os
-import gc
 # Configure logging
 logging.basicConfig(
@@ -13,95 +12,109 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 # Log startup information
-logger.info("Starting StudAI Summarization Service with Gradio")
 logger.info(f"Python version: {sys.version}")
-# Force garbage collection
-gc.collect()
-# Create a simple function for summarization that doesn't use ML in case model loading fails
-def simple_summarize(text, max_length=150, min_length=30):
-    """Simple extractive summarization as fallback"""
-    import re
-    sentences = re.split(r'(?<=[.!?])\s+', text)
-    if len(sentences) <= 3:
-        return text
-    # Take first, middle and last sentences
-    summary = [
-        sentences[0],
-        sentences[len(sentences) // 2],
-        sentences[-1]
-    ]
-    return " ".join(summary)
-# Set a flag for model availability
-model_available = False
-# Try to import and load the model with memory optimizations
-try:
-    # Import and load only when needed
-    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-    import torch
-    logger.info("Loading small model for summarization...")
-    # Use a tiny model instead of t5-small
-    model_name = "facebook/bart-large-cnn"
-    # Enable memory optimization
-    if torch.cuda.is_available():
-        logger.info("CUDA available, using GPU")
-        device = 0
-    else:
-        logger.info("CUDA not available, using CPU")
-        device = -1
-    # Enable memory-efficient loading
-    summarizer = pipeline(
-        "summarization",
-        model=model_name,
-        device=device,
-        framework="pt"
-    )
-    logger.info("Model loaded successfully!")
-    model_available = True
-    # Force garbage collection after model loading
-    gc.collect()
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-except Exception as e:
-    logger.error(f"Failed to load model: {str(e)}")
-    logger.info("Will use simple extractive summarization instead")
 def summarize_text(text, max_length=150, min_length=30):
-    """Summarize the provided text"""
-    if not text or len(text.strip()) < 50:
-        return text
     try:
-        if model_available:
-            logger.info(f"Summarizing text of length {len(text)} with model")
-            result = summarizer(
-                text,
-                max_length=max_length,
-                min_length=min_length,
-                truncation=True
-            )
-            summary = result[0]["summary_text"]
-        else:
-            logger.info(f"Using simple summarization for text of length {len(text)}")
-            summary = simple_summarize(text, max_length, min_length)
-        return summary
     except Exception as e:
         logger.error(f"Error during summarization: {str(e)}")
-        # Fall back to simple summarization on error
-        return simple_summarize(text, max_length, min_length)
 # Create Gradio interface
 demo = gr.Interface(
@@ -110,14 +123,14 @@ demo = gr.Interface(
         gr.Textbox(
             lines=10,
             label="Text to Summarize",
-            placeholder="Enter text to summarize (at least 50 characters)"
         ),
         gr.Slider(50, 500, value=150, label="Max Length"),
         gr.Slider(10, 200, value=30, label="Min Length")
     ],
     outputs=gr.Textbox(label="Summary"),
     title="StudAI Text Summarization",
-    description="This service provides text summarization for the StudAI Android app.",
     examples=[
         ["The coronavirus pandemic has led to a surge in remote work. Companies around the world have had to adapt to new ways of working, with many employees setting up home offices. This shift has led to changes in productivity, work-life balance, and communication patterns. Some studies suggest that remote work can increase productivity, while others point to challenges in collaboration and team cohesion. Organizations are now considering hybrid models for the future of work.", 150, 30]
     ],
@@ -125,4 +138,4 @@ demo = gr.Interface(
 )
 # Launch with parameters optimized for Spaces
-demo.launch(share=False, server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import logging
 import sys
+import re
 # Configure logging
 logging.basicConfig(
 logger = logging.getLogger(__name__)
 # Log startup information
+logger.info("Starting StudAI Summarization Service with Gradio (Rule-based version)")
 logger.info(f"Python version: {sys.version}")
+def extract_sentences(text):
+    """Extract sentences from text"""
+    return re.split(r'(?<=[.!?])\s+', text)
+def calculate_word_frequency(sentences):
+    """Calculate word frequency across all sentences"""
+    word_freq = {}
+    for sentence in sentences:
+        for word in sentence.lower().split():
+            if word.isalnum():
+                word_freq[word] = word_freq.get(word, 0) + 1
+    return word_freq
+def score_sentences(sentences, word_freq):
+    """Score sentences based on word importance"""
+    sentence_scores = []
+    for i, sentence in enumerate(sentences):
+        score = 0
+        for word in sentence.lower().split():
+            if word.isalnum():
+                score += word_freq.get(word, 0)
+        # Give bonus to first and last sentences
+        if i == 0 or i == len(sentences) - 1:
+            score *= 1.25
+        sentence_scores.append((i, score, sentence))
+    return sentence_scores
+def rule_based_summarize(text, max_length=150, min_length=30):
+    """Intelligent rule-based extractive summarization"""
+    logger.info(f"Summarizing text of length {len(text)}")
+    # Handle short texts
+    if not text or len(text.strip()) < 100:
+        return text
+    # Extract sentences
+    sentences = extract_sentences(text)
+    if len(sentences) <= 5:
+        return text
+    # Calculate word frequencies
+    word_freq = calculate_word_frequency(sentences)
+    # Score sentences
+    sentence_scores = score_sentences(sentences, word_freq)
+    # Sort by score and select top sentences
+    sentence_scores.sort(key=lambda x: x[1], reverse=True)
+    summary_sentences = []
+    summary_length = 0
+    char_count = 0
+    # Always include first sentence for context
+    first_sentence = sentences[0]
+    last_sentence = sentences[-1]
+    summary_sentences.append((0, first_sentence))
+    char_count += len(first_sentence)
+    # Add highest scoring sentences until we reach minimum length
+    for i, score, sentence in sentence_scores:
+        # Skip first and last sentences (already included)
+        if i == 0 or i == len(sentences) - 1:
+            continue
+        summary_sentences.append((i, sentence))
+        summary_length += 1
+        char_count += len(sentence)
+        if char_count >= min_length and summary_length >= 3:
+            break
+    # Make sure last sentence is included
+    if not any(i == len(sentences) - 1 for i, _ in summary_sentences):
+        summary_sentences.append((len(sentences) - 1, last_sentence))
+    # Sort by original position to maintain flow
+    summary_sentences.sort(key=lambda x: x[0])
+    # Join sentences into summary
+    summary = " ".join(s for _, s in summary_sentences)
+    # Truncate if too long
+    if len(summary) > max_length:
+        summary = summary[:max_length].rsplit(' ', 1)[0] + '...'
+    logger.info(f"Generated summary of length {len(summary)}")
+    return summary
 def summarize_text(text, max_length=150, min_length=30):
+    """Wrapper function for summarization"""
     try:
+        return rule_based_summarize(text, max_length, min_length)
     except Exception as e:
         logger.error(f"Error during summarization: {str(e)}")
+        # Simple fallback if algo fails
+        sentences = extract_sentences(text)
+        if len(sentences) <= 3:
+            return text
+        return " ".join([sentences[0], sentences[len(sentences)//2], sentences[-1]])
 # Create Gradio interface
 demo = gr.Interface(
         gr.Textbox(
             lines=10,
             label="Text to Summarize",
+            placeholder="Enter text to summarize (at least 100 characters)"
         ),
         gr.Slider(50, 500, value=150, label="Max Length"),
         gr.Slider(10, 200, value=30, label="Min Length")
     ],
     outputs=gr.Textbox(label="Summary"),
     title="StudAI Text Summarization",
+    description="This service provides text summarization for the StudAI Android app using an intelligent rule-based approach.",
     examples=[
         ["The coronavirus pandemic has led to a surge in remote work. Companies around the world have had to adapt to new ways of working, with many employees setting up home offices. This shift has led to changes in productivity, work-life balance, and communication patterns. Some studies suggest that remote work can increase productivity, while others point to challenges in collaboration and team cohesion. Organizations are now considering hybrid models for the future of work.", 150, 30]
     ],
 )
 # Launch with parameters optimized for Spaces
+demo.launch(share=False, server_name="0.0.0.0")

requirements.txt CHANGED Viewed

@@ -1,6 +1 @@
-gradio==4.13.0
-transformers==4.35.2
-torch==2.0.1
-numpy<2.0.0
-requests==2.31.0
-accelerate==0.25.0


1	+ gradio==4.13.0