Seal / app.py
mike23415's picture
Create app.py
1f2df23 verified
raw
history blame
3.83 kB
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
import torch
import os
import json
app = Flask(__name__)
CORS(app) # Enable CORS for all routes
# Set Hugging Face cache to ephemeral storage
os.environ["HF_HOME"] = "/data/.huggingface"
# Load Qwen2.5-1.5B model and tokenizer
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Data file for preloaded and dynamic data
data_file = "data/train_data.json"
# Load or initialize dataset
if os.path.exists(data_file):
with open(data_file, 'r') as f:
train_texts = json.load(f)
else:
train_texts = []
os.makedirs(os.path.dirname(data_file), exist_ok=True)
with open(data_file, 'w') as f:
json.dump(train_texts, f)
print(f"Loaded {len(train_texts)} examples from {data_file}")
# Model save directory
model_save_dir = "./results/model"
@app.route('/adapt', methods=['POST'])
def adapt_model():
try:
data = request.json
user_input = data.get('text', '')
if not user_input:
return jsonify({'error': 'No input provided'}), 400
# Generate self-edit
prompt = f"Rephrase this: {user_input}"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=128).to(device)
self_edit_output = model.generate(**inputs, max_length=150, num_return_sequences=1)
self_edit = tokenizer.decode(self_edit_output[0], skip_special_tokens=True)
# Add to training data and save to disk
train_texts.append({"prompt": user_input, "completion": self_edit})
with open(data_file, 'w') as f:
json.dump(train_texts, f, indent=2)
# Prepare dataset for fine-tuning
encodings = tokenizer(
[t["prompt"] + " " + t["completion"] for t in train_texts],
truncation=True,
padding=True,
max_length=256,
return_tensors="pt"
)
dataset = [
{
"input_ids": encodings["input_ids"][i],
"attention_mask": encodings["attention_mask"][i],
"labels": encodings["input_ids"][i]
} for i in range(len(train_texts))
]
# Fine-tune model
training_args = TrainingArguments(
output_dir=model_save_dir,
num_train_epochs=1,
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
logging_steps=10,
save_steps=10,
save_total_limit=1, # Keep only latest checkpoint
disable_tqdm=True,
fp16=True if torch.cuda.is_available() else False
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset
)
trainer.train()
# Save model weights
trainer.save_model(model_save_dir)
tokenizer.save_pretrained(model_save_dir)
# Generate response
response_inputs = tokenizer(user_input, return_tensors="pt", truncation=True, max_length=128).to(device)
response_output = model.generate(**response_inputs, max_length=200, num_return_sequences=1)
response = tokenizer.decode(response_output[0], skip_special_tokens=True)
return jsonify({
'input': user_input,
'self_edit': self_edit,
'response': response
})
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)