Spaces:

mike23415
/

Seal

Sleeping

File size: 3,834 Bytes

1f2df23

from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
import torch
import os
import json

app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Set Hugging Face cache to ephemeral storage
os.environ["HF_HOME"] = "/data/.huggingface"

# Load Qwen2.5-1.5B model and tokenizer
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Data file for preloaded and dynamic data
data_file = "data/train_data.json"

# Load or initialize dataset
if os.path.exists(data_file):
    with open(data_file, 'r') as f:
        train_texts = json.load(f)
else:
    train_texts = []
    os.makedirs(os.path.dirname(data_file), exist_ok=True)
    with open(data_file, 'w') as f:
        json.dump(train_texts, f)
print(f"Loaded {len(train_texts)} examples from {data_file}")

# Model save directory
model_save_dir = "./results/model"

@app.route('/adapt', methods=['POST'])
def adapt_model():
    try:
        data = request.json
        user_input = data.get('text', '')

        if not user_input:
            return jsonify({'error': 'No input provided'}), 400

        # Generate self-edit
        prompt = f"Rephrase this: {user_input}"
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=128).to(device)
        self_edit_output = model.generate(**inputs, max_length=150, num_return_sequences=1)
        self_edit = tokenizer.decode(self_edit_output[0], skip_special_tokens=True)

        # Add to training data and save to disk
        train_texts.append({"prompt": user_input, "completion": self_edit})
        with open(data_file, 'w') as f:
            json.dump(train_texts, f, indent=2)

        # Prepare dataset for fine-tuning
        encodings = tokenizer(
            [t["prompt"] + " " + t["completion"] for t in train_texts],
            truncation=True,
            padding=True,
            max_length=256,
            return_tensors="pt"
        )
        dataset = [
            {
                "input_ids": encodings["input_ids"][i],
                "attention_mask": encodings["attention_mask"][i],
                "labels": encodings["input_ids"][i]
            } for i in range(len(train_texts))
        ]

        # Fine-tune model
        training_args = TrainingArguments(
            output_dir=model_save_dir,
            num_train_epochs=1,
            per_device_train_batch_size=2,
            gradient_accumulation_steps=4,
            logging_steps=10,
            save_steps=10,
            save_total_limit=1,  # Keep only latest checkpoint
            disable_tqdm=True,
            fp16=True if torch.cuda.is_available() else False
        )
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=dataset
        )
        trainer.train()

        # Save model weights
        trainer.save_model(model_save_dir)
        tokenizer.save_pretrained(model_save_dir)

        # Generate response
        response_inputs = tokenizer(user_input, return_tensors="pt", truncation=True, max_length=128).to(device)
        response_output = model.generate(**response_inputs, max_length=200, num_return_sequences=1)
        response = tokenizer.decode(response_output[0], skip_special_tokens=True)

        return jsonify({
            'input': user_input,
            'self_edit': self_edit,
            'response': response
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)