|
import gradio as gr |
|
import logging |
|
import sys |
|
import os |
|
import gc |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
|
handlers=[logging.StreamHandler(sys.stdout)] |
|
) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
logger.info("Starting StudAI Summarization Service with Gradio") |
|
logger.info(f"Python version: {sys.version}") |
|
|
|
|
|
gc.collect() |
|
|
|
|
|
def simple_summarize(text, max_length=150, min_length=30): |
|
"""Simple extractive summarization as fallback""" |
|
import re |
|
sentences = re.split(r'(?<=[.!?])\s+', text) |
|
|
|
if len(sentences) <= 3: |
|
return text |
|
|
|
|
|
summary = [ |
|
sentences[0], |
|
sentences[len(sentences) // 2], |
|
sentences[-1] |
|
] |
|
return " ".join(summary) |
|
|
|
|
|
model_available = False |
|
|
|
|
|
try: |
|
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
import torch |
|
|
|
logger.info("Loading small model for summarization...") |
|
|
|
|
|
model_name = "facebook/bart-large-cnn" |
|
|
|
|
|
if torch.cuda.is_available(): |
|
logger.info("CUDA available, using GPU") |
|
device = 0 |
|
else: |
|
logger.info("CUDA not available, using CPU") |
|
device = -1 |
|
|
|
|
|
summarizer = pipeline( |
|
"summarization", |
|
model=model_name, |
|
device=device, |
|
framework="pt" |
|
) |
|
|
|
logger.info("Model loaded successfully!") |
|
model_available = True |
|
|
|
|
|
gc.collect() |
|
if torch.cuda.is_available(): |
|
torch.cuda.empty_cache() |
|
|
|
except Exception as e: |
|
logger.error(f"Failed to load model: {str(e)}") |
|
logger.info("Will use simple extractive summarization instead") |
|
|
|
def summarize_text(text, max_length=150, min_length=30): |
|
"""Summarize the provided text""" |
|
if not text or len(text.strip()) < 50: |
|
return text |
|
|
|
try: |
|
if model_available: |
|
logger.info(f"Summarizing text of length {len(text)} with model") |
|
result = summarizer( |
|
text, |
|
max_length=max_length, |
|
min_length=min_length, |
|
truncation=True |
|
) |
|
summary = result[0]["summary_text"] |
|
else: |
|
logger.info(f"Using simple summarization for text of length {len(text)}") |
|
summary = simple_summarize(text, max_length, min_length) |
|
|
|
return summary |
|
except Exception as e: |
|
logger.error(f"Error during summarization: {str(e)}") |
|
|
|
return simple_summarize(text, max_length, min_length) |
|
|
|
|
|
demo = gr.Interface( |
|
fn=summarize_text, |
|
inputs=[ |
|
gr.Textbox( |
|
lines=10, |
|
label="Text to Summarize", |
|
placeholder="Enter text to summarize (at least 50 characters)" |
|
), |
|
gr.Slider(50, 500, value=150, label="Max Length"), |
|
gr.Slider(10, 200, value=30, label="Min Length") |
|
], |
|
outputs=gr.Textbox(label="Summary"), |
|
title="StudAI Text Summarization", |
|
description="This service provides text summarization for the StudAI Android app.", |
|
examples=[ |
|
["The coronavirus pandemic has led to a surge in remote work. Companies around the world have had to adapt to new ways of working, with many employees setting up home offices. This shift has led to changes in productivity, work-life balance, and communication patterns. Some studies suggest that remote work can increase productivity, while others point to challenges in collaboration and team cohesion. Organizations are now considering hybrid models for the future of work.", 150, 30] |
|
], |
|
allow_flagging="never" |
|
) |
|
|
|
|
|
demo.launch(share=False, server_name="0.0.0.0", server_port=7860) |