File size: 6,040 Bytes
1f2df23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e373285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f2df23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e373285
 
 
1f2df23
 
 
 
 
e373285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f2df23
e373285
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
import torch
import os
import json

app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Set Hugging Face cache to ephemeral storage
os.environ["HF_HOME"] = "/data/.huggingface"

# Load Qwen2.5-1.5B model and tokenizer
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Data file for preloaded and dynamic data
data_file = "data/train_data.json"

# Load or initialize dataset
if os.path.exists(data_file):
    with open(data_file, 'r') as f:
        train_texts = json.load(f)
else:
    train_texts = []
    os.makedirs(os.path.dirname(data_file), exist_ok=True)
    with open(data_file, 'w') as f:
        json.dump(train_texts, f)
print(f"Loaded {len(train_texts)} examples from {data_file}")

# Model save directory
model_save_dir = "./results/model"

@app.route('/')
def home():
    """Root endpoint to show API status and usage"""
    return jsonify({
        'status': 'SEAL Framework API is running',
        'version': '1.0.0',
        'model': model_name,
        'device': str(device),
        'training_examples': len(train_texts),
        'endpoints': {
            '/': 'GET - API status and information',
            '/adapt': 'POST - Adaptive model training and response',
            '/health': 'GET - Health check'
        },
        'usage': {
            'adapt_endpoint': {
                'method': 'POST',
                'content_type': 'application/json',
                'body': {'text': 'Your input text here'},
                'example': 'curl -X POST -H "Content-Type: application/json" -d \'{"text":"Hello world"}\' /adapt'
            }
        }
    })

@app.route('/health')
def health():
    """Health check endpoint"""
    try:
        # Simple model test
        test_input = "Health check"
        inputs = tokenizer(test_input, return_tensors="pt", truncation=True, max_length=32).to(device)
        with torch.no_grad():
            outputs = model.generate(**inputs, max_length=40, num_return_sequences=1, do_sample=False)
        
        return jsonify({
            'status': 'healthy',
            'model_loaded': True,
            'device': str(device),
            'training_examples': len(train_texts)
        })
    except Exception as e:
        return jsonify({
            'status': 'unhealthy',
            'error': str(e)
        }), 500

@app.route('/adapt', methods=['POST'])
def adapt_model():
    try:
        data = request.json
        user_input = data.get('text', '')

        if not user_input:
            return jsonify({'error': 'No input provided'}), 400

        # Generate self-edit
        prompt = f"Rephrase this: {user_input}"
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=128).to(device)
        self_edit_output = model.generate(**inputs, max_length=150, num_return_sequences=1)
        self_edit = tokenizer.decode(self_edit_output[0], skip_special_tokens=True)

        # Add to training data and save to disk
        train_texts.append({"prompt": user_input, "completion": self_edit})
        with open(data_file, 'w') as f:
            json.dump(train_texts, f, indent=2)

        # Prepare dataset for fine-tuning
        encodings = tokenizer(
            [t["prompt"] + " " + t["completion"] for t in train_texts],
            truncation=True,
            padding=True,
            max_length=256,
            return_tensors="pt"
        )
        dataset = [
            {
                "input_ids": encodings["input_ids"][i],
                "attention_mask": encodings["attention_mask"][i],
                "labels": encodings["input_ids"][i]
            } for i in range(len(train_texts))
        ]

        # Fine-tune model
        training_args = TrainingArguments(
            output_dir=model_save_dir,
            num_train_epochs=1,
            per_device_train_batch_size=2,
            gradient_accumulation_steps=4,
            logging_steps=10,
            save_steps=10,
            save_total_limit=1,  # Keep only latest checkpoint
            disable_tqdm=True,
            fp16=True if torch.cuda.is_available() else False
        )
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=dataset
        )
        trainer.train()

        # Save model weights
        trainer.save_model(model_save_dir)
        tokenizer.save_pretrained(model_save_dir)

        # Generate response
        response_inputs = tokenizer(user_input, return_tensors="pt", truncation=True, max_length=128).to(device)
        response_output = model.generate(**response_inputs, max_length=200, num_return_sequences=1)
        response = tokenizer.decode(response_output[0], skip_special_tokens=True)

        return jsonify({
            'input': user_input,
            'self_edit': self_edit,
            'response': response,
            'training_examples': len(train_texts),
            'status': 'Model adapted successfully'
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.errorhandler(404)
def not_found(error):
    """Custom 404 handler"""
    return jsonify({
        'error': 'Endpoint not found',
        'available_endpoints': {
            '/': 'GET - API information',
            '/health': 'GET - Health check',
            '/adapt': 'POST - Adaptive model training'
        }
    }), 404

@app.errorhandler(500)
def internal_error(error):
    """Custom 500 handler"""
    return jsonify({
        'error': 'Internal server error',
        'message': 'Please check the server logs for more details'
    }), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860, debug=False)