Spaces:
Sleeping
Sleeping
# ============================================================================= | |
# AI DOCUMENT SUMMARIZER - BULLETPROOF HF SPACES VERSION | |
# This version handles all dependency issues gracefully | |
# ============================================================================= | |
import gradio as gr | |
import sys | |
import time | |
import warnings | |
from typing import Tuple, Optional | |
print("π Starting AI Document Summarizer...") | |
print(f"Python version: {sys.version}") | |
# Handle dependencies with proper error messages | |
dependencies_available = True | |
error_messages = [] | |
try: | |
import torch | |
print("β PyTorch imported successfully") | |
TORCH_AVAILABLE = True | |
device_info = f"π₯οΈ Device: {'GPU' if torch.cuda.is_available() else 'CPU'}" | |
except ImportError as e: | |
print(f"β PyTorch import failed: {e}") | |
TORCH_AVAILABLE = False | |
dependencies_available = False | |
error_messages.append("PyTorch not found") | |
try: | |
from transformers import pipeline | |
print("β Transformers imported successfully") | |
TRANSFORMERS_AVAILABLE = True | |
except ImportError as e: | |
print(f"β Transformers import failed: {e}") | |
TRANSFORMERS_AVAILABLE = False | |
dependencies_available = False | |
error_messages.append("Transformers not found") | |
# Suppress warnings if libraries are available | |
if dependencies_available: | |
warnings.filterwarnings("ignore") | |
class DocumentSummarizer: | |
"""Robust document summarizer with dependency checking""" | |
def __init__(self): | |
self.models = {} | |
self.available = dependencies_available | |
if not self.available: | |
print("β Dependencies not available - running in demo mode") | |
return | |
self.configs = { | |
"BART": { | |
"model_id": "facebook/bart-large-cnn", | |
"description": "π° Great for news and general text" | |
}, | |
"T5-Small": { | |
"model_id": "t5-small", | |
"description": "β‘ Fastest processing" | |
}, | |
"DistilBART": { | |
"model_id": "sshleifer/distilbart-cnn-12-6", | |
"description": "π Lightweight and fast" | |
} | |
} | |
print("β Document Summarizer initialized!") | |
def load_model(self, model_name: str): | |
"""Load model with comprehensive error handling""" | |
if not self.available: | |
return None | |
if model_name not in self.models: | |
try: | |
config = self.configs.get(model_name, self.configs["T5-Small"]) | |
print(f"π Loading {model_name}...") | |
model = pipeline( | |
"summarization", | |
model=config["model_id"], | |
device=-1, # Force CPU for HF Spaces stability | |
return_tensors="pt" | |
) | |
self.models[model_name] = model | |
print(f"β {model_name} loaded successfully!") | |
return model | |
except Exception as e: | |
print(f"β Failed to load {model_name}: {str(e)}") | |
return None | |
return self.models[model_name] | |
def summarize(self, text: str, model_name: str, max_length: int, min_length: int) -> Tuple[str, str, str, str]: | |
"""Generate summary or return appropriate error message""" | |
# Check if dependencies are available | |
if not self.available: | |
error_msg = f""" | |
β **Dependencies Missing** | |
The following packages are not installed: | |
{', '.join(error_messages)} | |
**To fix this issue:** | |
1. Make sure your `requirements.txt` contains: | |
``` | |
torch | |
transformers | |
gradio | |
accelerate | |
sentencepiece | |
``` | |
2. Or try the corrected requirements.txt below. | |
This is likely a Hugging Face Spaces configuration issue. | |
""" | |
return error_msg, "Please check the requirements.txt file", "", "" | |
# Input validation | |
if not text or not text.strip(): | |
return "β οΈ Please enter some text to summarize.", "", "", "" | |
text = text.strip() | |
word_count = len(text.split()) | |
if word_count < 10: | |
return "β οΈ Text too short. Please provide at least 10 words.", "", "", "" | |
# Load model | |
model = self.load_model(model_name) | |
if model is None: | |
return f"β Could not load {model_name} model. Try T5-Small.", "", "", "" | |
try: | |
start_time = time.time() | |
# Handle T5 special case | |
input_text = f"summarize: {text}" if "T5" in model_name else text | |
result = model( | |
input_text, | |
max_length=max_length, | |
min_length=min_length, | |
do_sample=False, | |
truncation=True | |
) | |
processing_time = time.time() - start_time | |
summary = result[0]['summary_text'] | |
# Calculate metrics | |
summary_words = len(summary.split()) | |
compression_ratio = (summary_words / word_count) * 100 | |
metrics = f""" | |
π **Results:** | |
- Original: {word_count:,} words | |
- Summary: {summary_words:,} words | |
- Compression: {compression_ratio:.1f}% | |
- Time: {processing_time:.1f}s | |
- Model: {model_name} | |
""" | |
return summary, metrics, f"{word_count:,}", f"{summary_words:,}" | |
except Exception as e: | |
return f"β Error: {str(e)}", "", "", "" | |
# Initialize summarizer | |
summarizer = DocumentSummarizer() | |
# Sample texts | |
SAMPLES = { | |
"Technology News": "Artificial intelligence and machine learning are transforming industries worldwide. From healthcare diagnostics to autonomous vehicles, AI systems are becoming more sophisticated and capable. However, this rapid advancement also raises questions about ethics, job displacement, and the need for proper regulation. Companies are investing billions in AI research while governments work to establish frameworks for responsible AI development.", | |
"Business Update": "The global economy shows mixed signals as markets navigate inflation concerns and supply chain disruptions. Technology stocks have seen volatility while energy sectors gain momentum. Central banks worldwide are adjusting monetary policies to balance growth and inflation targets. Investors remain cautious but optimistic about long-term recovery prospects.", | |
"Health Research": "Recent studies highlight the importance of preventive healthcare and lifestyle modifications. Research shows that regular exercise, balanced nutrition, and adequate sleep significantly impact long-term health outcomes. Mental health awareness is also growing, with new therapeutic approaches showing promising results in clinical trials." | |
} | |
def get_sample(choice): | |
return SAMPLES.get(choice, "") | |
def process_request(text, model, max_len, min_len, sample): | |
if sample != "None": | |
text = get_sample(sample) | |
return summarizer.summarize(text, model, max_len, min_len) | |
# Create Gradio interface | |
def create_app(): | |
with gr.Blocks( | |
title="AI Document Summarizer", | |
theme=gr.themes.Default(primary_hue="blue") | |
) as app: | |
gr.Markdown(""" | |
# π AI Document Summarizer | |
### Intelligent text summarization using transformer models | |
""") | |
# Show status | |
if dependencies_available: | |
gr.Markdown("β **Status:** All dependencies loaded successfully!") | |
else: | |
gr.Markdown(f""" | |
β **Status:** Missing dependencies - {', '.join(error_messages)} | |
**Fix:** Update your `requirements.txt` with the correct dependencies (see below). | |
""") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
gr.Markdown("## Input") | |
sample_choice = gr.Dropdown( | |
["None"] + list(SAMPLES.keys()), | |
label="π Try a sample:", | |
value="None" | |
) | |
text_input = gr.Textbox( | |
label="π Your text:", | |
placeholder="Enter text to summarize...", | |
lines=8 | |
) | |
sample_choice.change(get_sample, sample_choice, text_input) | |
with gr.Column(scale=1): | |
gr.Markdown("## Settings") | |
model_select = gr.Dropdown( | |
["BART", "T5-Small", "DistilBART"], | |
label="π€ Model:", | |
value="T5-Small" | |
) | |
max_length = gr.Slider(50, 200, 100, label="Max length") | |
min_length = gr.Slider(20, 80, 30, label="Min length") | |
submit_btn = gr.Button("π Summarize", variant="primary") | |
with gr.Row(): | |
summary_out = gr.Textbox(label="π Summary", lines=5) | |
metrics_out = gr.Markdown("Metrics will appear here") | |
with gr.Row(): | |
orig_count = gr.Textbox(label="Original", interactive=False) | |
summ_count = gr.Textbox(label="Summary", interactive=False) | |
submit_btn.click( | |
process_request, | |
[text_input, model_select, max_length, min_length, sample_choice], | |
[summary_out, metrics_out, orig_count, summ_count] | |
) | |
# Troubleshooting section | |
gr.Markdown(""" | |
--- | |
## π§ Troubleshooting | |
**If you see dependency errors, create these files:** | |
### requirements.txt | |
``` | |
torch | |
transformers | |
gradio | |
accelerate | |
sentencepiece | |
numpy | |
``` | |
### README.md | |
``` | |
--- | |
title: AI Document Summarizer | |
emoji: π | |
colorFrom: blue | |
colorTo: green | |
sdk: gradio | |
sdk_version: 4.0.0 | |
app_file: app.py | |
pinned: false | |
--- | |
``` | |
""") | |
return app | |
# Launch | |
if __name__ == "__main__": | |
app = create_app() | |
app.launch() | |
# ============================================================================= | |
# DEPLOYMENT FILES - COPY THESE EXACTLY | |
# ============================================================================= | |
REQUIREMENTS_TXT = """torch | |
transformers | |
gradio | |
accelerate | |
sentencepiece | |
numpy""" | |
README_MD = """--- | |
title: AI Document Summarizer | |
emoji: π | |
colorFrom: blue | |
colorTo: green | |
sdk: gradio | |
sdk_version: 4.0.0 | |
app_file: app.py | |
pinned: false | |
--- | |
# AI Document Summarizer | |
Transform long documents into concise summaries using AI. | |
## Features | |
- Multiple transformer models | |
- Real-time processing | |
- Sample documents included | |
- Error handling and troubleshooting | |
Built for Hugging Face Spaces deployment.""" | |
print(f""" | |
π BULLETPROOF DEPLOYMENT GUIDE | |
π CREATE EXACTLY THESE 3 FILES: | |
1οΈβ£ requirements.txt: | |
{REQUIREMENTS_TXT} | |
2οΈβ£ README.md: | |
{README_MD} | |
3οΈβ£ app.py: | |
[Copy the entire code above] | |
π― DEPLOYMENT STEPS: | |
1. Go to hf.co/spaces | |
2. Create new Space | |
3. Choose Gradio SDK | |
4. Upload these 3 files EXACTLY as shown | |
5. Wait for build to complete | |
β This version will: | |
- Show clear error messages if dependencies fail | |
- Provide troubleshooting instructions | |
- Work with minimal requirements | |
- Handle all edge cases gracefully | |
β Common issues fixed: | |
- Torch import errors | |
- Transformers import errors | |
- Version conflicts | |
- HF Spaces compatibility | |
""") |