Spaces:

mike23415
/

Seal

Sleeping

App Files Files Community

Seal / app.py

mike23415

Update app.py

75cbe07 verified about 2 months ago

raw

history blame contribute delete

11.4 kB

	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import torch
	import os
	import json
	import logging
	import gc
	from contextlib import contextmanager

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	app = Flask(__name__)
	CORS(app)

	# Global variables for model and tokenizer
	model = None
	tokenizer = None
	device = None

	# Configuration
	MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
	DATA_FILE = "data/train_data.json"
	MODEL_SAVE_DIR = "./results/model"

	# Set environment variables
	os.environ["HF_HOME"] = "/data/.huggingface"
	os.environ["TRANSFORMERS_CACHE"] = "/data/.huggingface"

	def initialize_model():
	"""Initialize model and tokenizer with error handling"""
	global model, tokenizer, device

	try:
	logger.info("Initializing model and tokenizer...")

	# Set device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	logger.info(f"Using device: {device}")

	# Import here to avoid import errors during startup
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Load tokenizer first (lighter)
	logger.info("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_NAME,
	trust_remote_code=True,
	cache_dir="/data/.huggingface"
	)

	# Add padding token if it doesn't exist
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	logger.info("Loading model...")
	# Load model with specific configuration for stability
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
	device_map="auto" if device.type == "cuda" else None,
	trust_remote_code=True,
	cache_dir="/data/.huggingface",
	low_cpu_mem_usage=True
	)

	# Move to device if not using device_map
	if device.type == "cpu":
	model = model.to(device)

	logger.info("Model initialization completed successfully")
	return True

	except Exception as e:
	logger.error(f"Failed to initialize model: {str(e)}")
	return False

	def load_training_data():
	"""Load or initialize training data"""
	try:
	if os.path.exists(DATA_FILE):
	with open(DATA_FILE, 'r') as f:
	train_texts = json.load(f)
	else:
	train_texts = []
	os.makedirs(os.path.dirname(DATA_FILE), exist_ok=True)
	with open(DATA_FILE, 'w') as f:
	json.dump(train_texts, f)

	logger.info(f"Loaded {len(train_texts)} training examples")
	return train_texts
	except Exception as e:
	logger.error(f"Error loading training data: {str(e)}")
	return []

	@contextmanager
	def torch_no_grad():
	"""Context manager for torch.no_grad with error handling"""
	try:
	with torch.no_grad():
	yield
	except Exception as e:
	logger.error(f"Error in torch context: {str(e)}")
	raise

	# Initialize data
	train_texts = load_training_data()

	@app.route('/')
	def home():
	"""Root endpoint with system information"""
	return jsonify({
	'status': 'SEAL Framework API is running',
	'version': '1.0.0',
	'model': MODEL_NAME,
	'model_loaded': model is not None,
	'device': str(device) if device else 'Not initialized',
	'training_examples': len(train_texts),
	'endpoints': {
	'/': 'GET - API status and information',
	'/adapt': 'POST - Adaptive model training and response',
	'/health': 'GET - Health check',
	'/init': 'POST - Initialize model (if not already loaded)'
	},
	'usage': {
	'adapt_endpoint': {
	'method': 'POST',
	'content_type': 'application/json',
	'body': {'text': 'Your input text here'},
	'example': 'curl -X POST -H "Content-Type: application/json" -d \'{"text":"Hello world"}\' /adapt'
	}
	}
	})

	@app.route('/init', methods=['POST'])
	def init_model():
	"""Manual model initialization endpoint"""
	global model, tokenizer

	if model is not None:
	return jsonify({'status': 'Model already initialized', 'success': True})

	success = initialize_model()
	if success:
	return jsonify({'status': 'Model initialized successfully', 'success': True})
	else:
	return jsonify({'status': 'Model initialization failed', 'success': False}), 500

	@app.route('/health')
	def health():
	"""Comprehensive health check"""
	try:
	# Check if model is loaded
	if model is None or tokenizer is None:
	return jsonify({
	'status': 'unhealthy',
	'error': 'Model not initialized',
	'model_loaded': False,
	'suggestion': 'Call /init endpoint to initialize model'
	}), 500

	# Simple model test
	test_input = "Health check"
	try:
	with torch_no_grad():
	inputs = tokenizer(
	test_input,
	return_tensors="pt",
	truncation=True,
	max_length=32,
	padding=True
	).to(device)

	outputs = model.generate(
	**inputs,
	max_length=40,
	num_return_sequences=1,
	do_sample=False,
	pad_token_id=tokenizer.pad_token_id
	)
	except Exception as e:
	raise Exception(f"Model inference failed: {str(e)}")

	return jsonify({
	'status': 'healthy',
	'model_loaded': True,
	'device': str(device),
	'training_examples': len(train_texts),
	'torch_version': torch.__version__
	})

	except Exception as e:
	logger.error(f"Health check failed: {str(e)}")
	return jsonify({
	'status': 'unhealthy',
	'error': str(e),
	'model_loaded': model is not None
	}), 500

	@app.route('/adapt', methods=['POST'])
	def adapt_model():
	"""Simplified adaptive model endpoint"""
	global train_texts

	try:
	# Check if model is initialized
	if model is None or tokenizer is None:
	return jsonify({
	'error': 'Model not initialized. Call /init endpoint first.',
	'suggestion': 'POST to /init to initialize the model'
	}), 500

	# Get input
	data = request.json
	if not data or 'text' not in data:
	return jsonify({'error': 'No text provided in request body'}), 400

	user_input = data['text'].strip()
	if not user_input:
	return jsonify({'error': 'Empty text provided'}), 400

	logger.info(f"Processing input: {user_input[:50]}...")

	# Generate self-edit (simplified approach)
	try:
	with torch_no_grad():
	prompt = f"Rephrase this text: {user_input}"
	inputs = tokenizer(
	prompt,
	return_tensors="pt",
	truncation=True,
	max_length=128,
	padding=True
	).to(device)

	self_edit_output = model.generate(
	**inputs,
	max_length=200,
	num_return_sequences=1,
	do_sample=True,
	temperature=0.7,
	pad_token_id=tokenizer.pad_token_id
	)

	self_edit = tokenizer.decode(
	self_edit_output[0],
	skip_special_tokens=True
	).replace(prompt, "").strip()

	except Exception as e:
	logger.error(f"Self-edit generation failed: {str(e)}")
	self_edit = f"Self-edit failed: {str(e)}"

	# Generate response (simplified)
	try:
	with torch_no_grad():
	response_inputs = tokenizer(
	user_input,
	return_tensors="pt",
	truncation=True,
	max_length=128,
	padding=True
	).to(device)

	response_output = model.generate(
	**response_inputs,
	max_length=256,
	num_return_sequences=1,
	do_sample=True,
	temperature=0.8,
	pad_token_id=tokenizer.pad_token_id
	)

	response = tokenizer.decode(
	response_output[0],
	skip_special_tokens=True
	).replace(user_input, "").strip()

	except Exception as e:
	logger.error(f"Response generation failed: {str(e)}")
	response = f"Response generation failed: {str(e)}"

	# Save training data (simplified - no actual fine-tuning for stability)
	try:
	train_texts.append({
	"prompt": user_input,
	"completion": self_edit,
	"timestamp": str(torch.now() if hasattr(torch, 'now') else 'unknown')
	})

	# Save to file
	with open(DATA_FILE, 'w') as f:
	json.dump(train_texts, f, indent=2)

	except Exception as e:
	logger.error(f"Failed to save training data: {str(e)}")

	# Clean up GPU memory
	if device.type == "cuda":
	torch.cuda.empty_cache()
	gc.collect()

	return jsonify({
	'input': user_input,
	'self_edit': self_edit,
	'response': response,
	'training_examples': len(train_texts),
	'status': 'Processing completed successfully',
	'note': 'Fine-tuning disabled for stability - using generation only'
	})

	except Exception as e:
	logger.error(f"Adapt endpoint error: {str(e)}")
	return jsonify({
	'error': str(e),
	'type': type(e).__name__,
	'suggestion': 'Check logs for detailed error information'
	}), 500

	@app.errorhandler(404)
	def not_found(error):
	return jsonify({
	'error': 'Endpoint not found',
	'available_endpoints': ['/health', '/adapt', '/init', '/']
	}), 404

	@app.errorhandler(500)
	def internal_error(error):
	return jsonify({
	'error': 'Internal server error',
	'message': 'Check server logs for details'
	}), 500

	# Initialize model on startup (with fallback)
	if __name__ == '__main__':
	logger.info("Starting SEAL Framework API...")
	initialize_model()
	app.run(host='0.0.0.0', port=7860, debug=False)
	else:
	# For production deployment
	logger.info("SEAL Framework API starting in production mode...")
	# Don't initialize model immediately in production to avoid startup timeouts