File size: 6,040 Bytes
1f2df23 e373285 1f2df23 e373285 1f2df23 e373285 1f2df23 e373285 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
import torch
import os
import json
app = Flask(__name__)
CORS(app) # Enable CORS for all routes
# Set Hugging Face cache to ephemeral storage
os.environ["HF_HOME"] = "/data/.huggingface"
# Load Qwen2.5-1.5B model and tokenizer
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Data file for preloaded and dynamic data
data_file = "data/train_data.json"
# Load or initialize dataset
if os.path.exists(data_file):
with open(data_file, 'r') as f:
train_texts = json.load(f)
else:
train_texts = []
os.makedirs(os.path.dirname(data_file), exist_ok=True)
with open(data_file, 'w') as f:
json.dump(train_texts, f)
print(f"Loaded {len(train_texts)} examples from {data_file}")
# Model save directory
model_save_dir = "./results/model"
@app.route('/')
def home():
"""Root endpoint to show API status and usage"""
return jsonify({
'status': 'SEAL Framework API is running',
'version': '1.0.0',
'model': model_name,
'device': str(device),
'training_examples': len(train_texts),
'endpoints': {
'/': 'GET - API status and information',
'/adapt': 'POST - Adaptive model training and response',
'/health': 'GET - Health check'
},
'usage': {
'adapt_endpoint': {
'method': 'POST',
'content_type': 'application/json',
'body': {'text': 'Your input text here'},
'example': 'curl -X POST -H "Content-Type: application/json" -d \'{"text":"Hello world"}\' /adapt'
}
}
})
@app.route('/health')
def health():
"""Health check endpoint"""
try:
# Simple model test
test_input = "Health check"
inputs = tokenizer(test_input, return_tensors="pt", truncation=True, max_length=32).to(device)
with torch.no_grad():
outputs = model.generate(**inputs, max_length=40, num_return_sequences=1, do_sample=False)
return jsonify({
'status': 'healthy',
'model_loaded': True,
'device': str(device),
'training_examples': len(train_texts)
})
except Exception as e:
return jsonify({
'status': 'unhealthy',
'error': str(e)
}), 500
@app.route('/adapt', methods=['POST'])
def adapt_model():
try:
data = request.json
user_input = data.get('text', '')
if not user_input:
return jsonify({'error': 'No input provided'}), 400
# Generate self-edit
prompt = f"Rephrase this: {user_input}"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=128).to(device)
self_edit_output = model.generate(**inputs, max_length=150, num_return_sequences=1)
self_edit = tokenizer.decode(self_edit_output[0], skip_special_tokens=True)
# Add to training data and save to disk
train_texts.append({"prompt": user_input, "completion": self_edit})
with open(data_file, 'w') as f:
json.dump(train_texts, f, indent=2)
# Prepare dataset for fine-tuning
encodings = tokenizer(
[t["prompt"] + " " + t["completion"] for t in train_texts],
truncation=True,
padding=True,
max_length=256,
return_tensors="pt"
)
dataset = [
{
"input_ids": encodings["input_ids"][i],
"attention_mask": encodings["attention_mask"][i],
"labels": encodings["input_ids"][i]
} for i in range(len(train_texts))
]
# Fine-tune model
training_args = TrainingArguments(
output_dir=model_save_dir,
num_train_epochs=1,
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
logging_steps=10,
save_steps=10,
save_total_limit=1, # Keep only latest checkpoint
disable_tqdm=True,
fp16=True if torch.cuda.is_available() else False
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset
)
trainer.train()
# Save model weights
trainer.save_model(model_save_dir)
tokenizer.save_pretrained(model_save_dir)
# Generate response
response_inputs = tokenizer(user_input, return_tensors="pt", truncation=True, max_length=128).to(device)
response_output = model.generate(**response_inputs, max_length=200, num_return_sequences=1)
response = tokenizer.decode(response_output[0], skip_special_tokens=True)
return jsonify({
'input': user_input,
'self_edit': self_edit,
'response': response,
'training_examples': len(train_texts),
'status': 'Model adapted successfully'
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.errorhandler(404)
def not_found(error):
"""Custom 404 handler"""
return jsonify({
'error': 'Endpoint not found',
'available_endpoints': {
'/': 'GET - API information',
'/health': 'GET - Health check',
'/adapt': 'POST - Adaptive model training'
}
}), 404
@app.errorhandler(500)
def internal_error(error):
"""Custom 500 handler"""
return jsonify({
'error': 'Internal server error',
'message': 'Please check the server logs for more details'
}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860, debug=False) |