Spaces:

mike23415
/

Seal

Sleeping

App Files Files Community

Seal / app.py

mike23415

Update app.py

e373285 verified 3 months ago

raw

history blame

6.04 kB

	from flask import Flask, request, jsonify
	from flask_cors import CORS
	from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
	import torch
	import os
	import json

	app = Flask(__name__)
	CORS(app) # Enable CORS for all routes

	# Set Hugging Face cache to ephemeral storage
	os.environ["HF_HOME"] = "/data/.huggingface"

	# Load Qwen2.5-1.5B model and tokenizer
	model_name = "Qwen/Qwen2.5-1.5B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

	# Move to GPU if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	# Data file for preloaded and dynamic data
	data_file = "data/train_data.json"

	# Load or initialize dataset
	if os.path.exists(data_file):
	with open(data_file, 'r') as f:
	train_texts = json.load(f)
	else:
	train_texts = []
	os.makedirs(os.path.dirname(data_file), exist_ok=True)
	with open(data_file, 'w') as f:
	json.dump(train_texts, f)
	print(f"Loaded {len(train_texts)} examples from {data_file}")

	# Model save directory
	model_save_dir = "./results/model"

	@app.route('/')
	def home():
	"""Root endpoint to show API status and usage"""
	return jsonify({
	'status': 'SEAL Framework API is running',
	'version': '1.0.0',
	'model': model_name,
	'device': str(device),
	'training_examples': len(train_texts),
	'endpoints': {
	'/': 'GET - API status and information',
	'/adapt': 'POST - Adaptive model training and response',
	'/health': 'GET - Health check'
	},
	'usage': {
	'adapt_endpoint': {
	'method': 'POST',
	'content_type': 'application/json',
	'body': {'text': 'Your input text here'},
	'example': 'curl -X POST -H "Content-Type: application/json" -d \'{"text":"Hello world"}\' /adapt'
	}
	}
	})

	@app.route('/health')
	def health():
	"""Health check endpoint"""
	try:
	# Simple model test
	test_input = "Health check"
	inputs = tokenizer(test_input, return_tensors="pt", truncation=True, max_length=32).to(device)
	with torch.no_grad():
	outputs = model.generate(**inputs, max_length=40, num_return_sequences=1, do_sample=False)

	return jsonify({
	'status': 'healthy',
	'model_loaded': True,
	'device': str(device),
	'training_examples': len(train_texts)
	})
	except Exception as e:
	return jsonify({
	'status': 'unhealthy',
	'error': str(e)
	}), 500

	@app.route('/adapt', methods=['POST'])
	def adapt_model():
	try:
	data = request.json
	user_input = data.get('text', '')

	if not user_input:
	return jsonify({'error': 'No input provided'}), 400

	# Generate self-edit
	prompt = f"Rephrase this: {user_input}"
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=128).to(device)
	self_edit_output = model.generate(**inputs, max_length=150, num_return_sequences=1)
	self_edit = tokenizer.decode(self_edit_output[0], skip_special_tokens=True)

	# Add to training data and save to disk
	train_texts.append({"prompt": user_input, "completion": self_edit})
	with open(data_file, 'w') as f:
	json.dump(train_texts, f, indent=2)

	# Prepare dataset for fine-tuning
	encodings = tokenizer(
	[t["prompt"] + " " + t["completion"] for t in train_texts],
	truncation=True,
	padding=True,
	max_length=256,
	return_tensors="pt"
	)
	dataset = [
	{
	"input_ids": encodings["input_ids"][i],
	"attention_mask": encodings["attention_mask"][i],
	"labels": encodings["input_ids"][i]
	} for i in range(len(train_texts))
	]

	# Fine-tune model
	training_args = TrainingArguments(
	output_dir=model_save_dir,
	num_train_epochs=1,
	per_device_train_batch_size=2,
	gradient_accumulation_steps=4,
	logging_steps=10,
	save_steps=10,
	save_total_limit=1, # Keep only latest checkpoint
	disable_tqdm=True,
	fp16=True if torch.cuda.is_available() else False
	)
	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=dataset
	)
	trainer.train()

	# Save model weights
	trainer.save_model(model_save_dir)
	tokenizer.save_pretrained(model_save_dir)

	# Generate response
	response_inputs = tokenizer(user_input, return_tensors="pt", truncation=True, max_length=128).to(device)
	response_output = model.generate(**response_inputs, max_length=200, num_return_sequences=1)
	response = tokenizer.decode(response_output[0], skip_special_tokens=True)

	return jsonify({
	'input': user_input,
	'self_edit': self_edit,
	'response': response,
	'training_examples': len(train_texts),
	'status': 'Model adapted successfully'
	})

	except Exception as e:
	return jsonify({'error': str(e)}), 500

	@app.errorhandler(404)
	def not_found(error):
	"""Custom 404 handler"""
	return jsonify({
	'error': 'Endpoint not found',
	'available_endpoints': {
	'/': 'GET - API information',
	'/health': 'GET - Health check',
	'/adapt': 'POST - Adaptive model training'
	}
	}), 404

	@app.errorhandler(500)
	def internal_error(error):
	"""Custom 500 handler"""
	return jsonify({
	'error': 'Internal server error',
	'message': 'Please check the server logs for more details'
	}), 500

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860, debug=False)