Spaces:

mike23415
/

Seal

Sleeping

App Files Files Community

mike23415 commited on Jun 18

Commit

75cbe07

verified ·

1 Parent(s): 3bd81ff

Update app.py

Browse files

Files changed (1) hide show

app.py +267 -106

app.py CHANGED Viewed

@@ -1,55 +1,127 @@
 from flask import Flask, request, jsonify
 from flask_cors import CORS
-from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
 import torch
 import os
 import json
 app = Flask(__name__)
-CORS(app)  # Enable CORS for all routes
-# Set Hugging Face cache to ephemeral storage
-os.environ["HF_HOME"] = "/data/.huggingface"
-# Load Qwen2.5-1.5B model and tokenizer
-model_name = "Qwen/Qwen2.5-1.5B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
-# Move to GPU if available
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-# Data file for preloaded and dynamic data
-data_file = "data/train_data.json"
-# Load or initialize dataset
-if os.path.exists(data_file):
-    with open(data_file, 'r') as f:
-        train_texts = json.load(f)
-else:
-    train_texts = []
-    os.makedirs(os.path.dirname(data_file), exist_ok=True)
-    with open(data_file, 'w') as f:
-        json.dump(train_texts, f)
-print(f"Loaded {len(train_texts)} examples from {data_file}")
-# Model save directory
-model_save_dir = "./results/model"
 @app.route('/')
 def home():
-    """Root endpoint to show API status and usage"""
     return jsonify({
         'status': 'SEAL Framework API is running',
         'version': '1.0.0',
-        'model': model_name,
-        'device': str(device),
         'training_examples': len(train_texts),
         'endpoints': {
             '/': 'GET - API status and information',
             '/adapt': 'POST - Adaptive model training and response',
-            '/health': 'GET - Health check'
         },
         'usage': {
             'adapt_endpoint': {
@@ -61,122 +133,211 @@ def home():
         }
     })
 @app.route('/health')
 def health():
-    """Health check endpoint"""
     try:
         # Simple model test
         test_input = "Health check"
-        inputs = tokenizer(test_input, return_tensors="pt", truncation=True, max_length=32).to(device)
-        with torch.no_grad():
-            outputs = model.generate(**inputs, max_length=40, num_return_sequences=1, do_sample=False)
         return jsonify({
             'status': 'healthy',
             'model_loaded': True,
             'device': str(device),
-            'training_examples': len(train_texts)
         })
     except Exception as e:
         return jsonify({
             'status': 'unhealthy',
-            'error': str(e)
         }), 500
 @app.route('/adapt', methods=['POST'])
 def adapt_model():
     try:
         data = request.json
-        user_input = data.get('text', '')
         if not user_input:
-            return jsonify({'error': 'No input provided'}), 400
-        # Generate self-edit
-        prompt = f"Rephrase this: {user_input}"
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=128).to(device)
-        self_edit_output = model.generate(**inputs, max_length=150, num_return_sequences=1)
-        self_edit = tokenizer.decode(self_edit_output[0], skip_special_tokens=True)
-        # Add to training data and save to disk
-        train_texts.append({"prompt": user_input, "completion": self_edit})
-        with open(data_file, 'w') as f:
-            json.dump(train_texts, f, indent=2)
-        # Prepare dataset for fine-tuning
-        encodings = tokenizer(
-            [t["prompt"] + " " + t["completion"] for t in train_texts],
-            truncation=True,
-            padding=True,
-            max_length=256,
-            return_tensors="pt"
-        )
-        dataset = [
-            {
-                "input_ids": encodings["input_ids"][i],
-                "attention_mask": encodings["attention_mask"][i],
-                "labels": encodings["input_ids"][i]
-            } for i in range(len(train_texts))
-        ]
-        # Fine-tune model
-        training_args = TrainingArguments(
-            output_dir=model_save_dir,
-            num_train_epochs=1,
-            per_device_train_batch_size=2,
-            gradient_accumulation_steps=4,
-            logging_steps=10,
-            save_steps=10,
-            save_total_limit=1,  # Keep only latest checkpoint
-            disable_tqdm=True,
-            fp16=True if torch.cuda.is_available() else False
-        )
-        trainer = Trainer(
-            model=model,
-            args=training_args,
-            train_dataset=dataset
-        )
-        trainer.train()
-        # Save model weights
-        trainer.save_model(model_save_dir)
-        tokenizer.save_pretrained(model_save_dir)
-        # Generate response
-        response_inputs = tokenizer(user_input, return_tensors="pt", truncation=True, max_length=128).to(device)
-        response_output = model.generate(**response_inputs, max_length=200, num_return_sequences=1)
-        response = tokenizer.decode(response_output[0], skip_special_tokens=True)
         return jsonify({
             'input': user_input,
             'self_edit': self_edit,
             'response': response,
             'training_examples': len(train_texts),
-            'status': 'Model adapted successfully'
         })
     except Exception as e:
-        return jsonify({'error': str(e)}), 500
 @app.errorhandler(404)
 def not_found(error):
-    """Custom 404 handler"""
     return jsonify({
         'error': 'Endpoint not found',
-        'available_endpoints': {
-            '/': 'GET - API information',
-            '/health': 'GET - Health check',
-            '/adapt': 'POST - Adaptive model training'
-        }
     }), 404
 @app.errorhandler(500)
 def internal_error(error):
-    """Custom 500 handler"""
     return jsonify({
         'error': 'Internal server error',
-        'message': 'Please check the server logs for more details'
     }), 500
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860, debug=False)

 from flask import Flask, request, jsonify
 from flask_cors import CORS
 import torch
 import os
 import json
+import logging
+import gc
+from contextlib import contextmanager
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 app = Flask(__name__)
+CORS(app)
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+device = None
+# Configuration
+MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
+DATA_FILE = "data/train_data.json"
+MODEL_SAVE_DIR = "./results/model"
+# Set environment variables
+os.environ["HF_HOME"] = "/data/.huggingface"
+os.environ["TRANSFORMERS_CACHE"] = "/data/.huggingface"
+def initialize_model():
+    """Initialize model and tokenizer with error handling"""
+    global model, tokenizer, device
+    try:
+        logger.info("Initializing model and tokenizer...")
+        # Set device
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {device}")
+        # Import here to avoid import errors during startup
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        # Load tokenizer first (lighter)
+        logger.info("Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_NAME,
+            trust_remote_code=True,
+            cache_dir="/data/.huggingface"
+        )
+        # Add padding token if it doesn't exist
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        logger.info("Loading model...")
+        # Load model with specific configuration for stability
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
+            device_map="auto" if device.type == "cuda" else None,
+            trust_remote_code=True,
+            cache_dir="/data/.huggingface",
+            low_cpu_mem_usage=True
+        )
+        # Move to device if not using device_map
+        if device.type == "cpu":
+            model = model.to(device)
+        logger.info("Model initialization completed successfully")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to initialize model: {str(e)}")
+        return False
+def load_training_data():
+    """Load or initialize training data"""
+    try:
+        if os.path.exists(DATA_FILE):
+            with open(DATA_FILE, 'r') as f:
+                train_texts = json.load(f)
+        else:
+            train_texts = []
+            os.makedirs(os.path.dirname(DATA_FILE), exist_ok=True)
+            with open(DATA_FILE, 'w') as f:
+                json.dump(train_texts, f)
+        logger.info(f"Loaded {len(train_texts)} training examples")
+        return train_texts
+    except Exception as e:
+        logger.error(f"Error loading training data: {str(e)}")
+        return []
+@contextmanager
+def torch_no_grad():
+    """Context manager for torch.no_grad with error handling"""
+    try:
+        with torch.no_grad():
+            yield
+    except Exception as e:
+        logger.error(f"Error in torch context: {str(e)}")
+        raise
+# Initialize data
+train_texts = load_training_data()
 @app.route('/')
 def home():
+    """Root endpoint with system information"""
     return jsonify({
         'status': 'SEAL Framework API is running',
         'version': '1.0.0',
+        'model': MODEL_NAME,
+        'model_loaded': model is not None,
+        'device': str(device) if device else 'Not initialized',
         'training_examples': len(train_texts),
         'endpoints': {
             '/': 'GET - API status and information',
             '/adapt': 'POST - Adaptive model training and response',
+            '/health': 'GET - Health check',
+            '/init': 'POST - Initialize model (if not already loaded)'
         },
         'usage': {
             'adapt_endpoint': {
         }
     })
+@app.route('/init', methods=['POST'])
+def init_model():
+    """Manual model initialization endpoint"""
+    global model, tokenizer
+    if model is not None:
+        return jsonify({'status': 'Model already initialized', 'success': True})
+    success = initialize_model()
+    if success:
+        return jsonify({'status': 'Model initialized successfully', 'success': True})
+    else:
+        return jsonify({'status': 'Model initialization failed', 'success': False}), 500
 @app.route('/health')
 def health():
+    """Comprehensive health check"""
     try:
+        # Check if model is loaded
+        if model is None or tokenizer is None:
+            return jsonify({
+                'status': 'unhealthy',
+                'error': 'Model not initialized',
+                'model_loaded': False,
+                'suggestion': 'Call /init endpoint to initialize model'
+            }), 500
         # Simple model test
         test_input = "Health check"
+        try:
+            with torch_no_grad():
+                inputs = tokenizer(
+                    test_input,
+                    return_tensors="pt",
+                    truncation=True,
+                    max_length=32,
+                    padding=True
+                ).to(device)
+                outputs = model.generate(
+                    **inputs,
+                    max_length=40,
+                    num_return_sequences=1,
+                    do_sample=False,
+                    pad_token_id=tokenizer.pad_token_id
+                )
+        except Exception as e:
+            raise Exception(f"Model inference failed: {str(e)}")
         return jsonify({
             'status': 'healthy',
             'model_loaded': True,
             'device': str(device),
+            'training_examples': len(train_texts),
+            'torch_version': torch.__version__
         })
     except Exception as e:
+        logger.error(f"Health check failed: {str(e)}")
         return jsonify({
             'status': 'unhealthy',
+            'error': str(e),
+            'model_loaded': model is not None
         }), 500
 @app.route('/adapt', methods=['POST'])
 def adapt_model():
+    """Simplified adaptive model endpoint"""
+    global train_texts
     try:
+        # Check if model is initialized
+        if model is None or tokenizer is None:
+            return jsonify({
+                'error': 'Model not initialized. Call /init endpoint first.',
+                'suggestion': 'POST to /init to initialize the model'
+            }), 500
+        # Get input
         data = request.json
+        if not data or 'text' not in data:
+            return jsonify({'error': 'No text provided in request body'}), 400
+        user_input = data['text'].strip()
         if not user_input:
+            return jsonify({'error': 'Empty text provided'}), 400
+        logger.info(f"Processing input: {user_input[:50]}...")
+        # Generate self-edit (simplified approach)
+        try:
+            with torch_no_grad():
+                prompt = f"Rephrase this text: {user_input}"
+                inputs = tokenizer(
+                    prompt,
+                    return_tensors="pt",
+                    truncation=True,
+                    max_length=128,
+                    padding=True
+                ).to(device)
+                self_edit_output = model.generate(
+                    **inputs,
+                    max_length=200,
+                    num_return_sequences=1,
+                    do_sample=True,
+                    temperature=0.7,
+                    pad_token_id=tokenizer.pad_token_id
+                )
+                self_edit = tokenizer.decode(
+                    self_edit_output[0],
+                    skip_special_tokens=True
+                ).replace(prompt, "").strip()
+        except Exception as e:
+            logger.error(f"Self-edit generation failed: {str(e)}")
+            self_edit = f"Self-edit failed: {str(e)}"
+        # Generate response (simplified)
+        try:
+            with torch_no_grad():
+                response_inputs = tokenizer(
+                    user_input,
+                    return_tensors="pt",
+                    truncation=True,
+                    max_length=128,
+                    padding=True
+                ).to(device)
+                response_output = model.generate(
+                    **response_inputs,
+                    max_length=256,
+                    num_return_sequences=1,
+                    do_sample=True,
+                    temperature=0.8,
+                    pad_token_id=tokenizer.pad_token_id
+                )
+                response = tokenizer.decode(
+                    response_output[0],
+                    skip_special_tokens=True
+                ).replace(user_input, "").strip()
+        except Exception as e:
+            logger.error(f"Response generation failed: {str(e)}")
+            response = f"Response generation failed: {str(e)}"
+        # Save training data (simplified - no actual fine-tuning for stability)
+        try:
+            train_texts.append({
+                "prompt": user_input,
+                "completion": self_edit,
+                "timestamp": str(torch.now() if hasattr(torch, 'now') else 'unknown')
+            })
+            # Save to file
+            with open(DATA_FILE, 'w') as f:
+                json.dump(train_texts, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save training data: {str(e)}")
+        # Clean up GPU memory
+        if device.type == "cuda":
+            torch.cuda.empty_cache()
+        gc.collect()
         return jsonify({
             'input': user_input,
             'self_edit': self_edit,
             'response': response,
             'training_examples': len(train_texts),
+            'status': 'Processing completed successfully',
+            'note': 'Fine-tuning disabled for stability - using generation only'
         })
     except Exception as e:
+        logger.error(f"Adapt endpoint error: {str(e)}")
+        return jsonify({
+            'error': str(e),
+            'type': type(e).__name__,
+            'suggestion': 'Check logs for detailed error information'
+        }), 500
 @app.errorhandler(404)
 def not_found(error):
     return jsonify({
         'error': 'Endpoint not found',
+        'available_endpoints': ['/health', '/adapt', '/init', '/']
     }), 404
 @app.errorhandler(500)
 def internal_error(error):
     return jsonify({
         'error': 'Internal server error',
+        'message': 'Check server logs for details'
     }), 500
+# Initialize model on startup (with fallback)
 if __name__ == '__main__':
+    logger.info("Starting SEAL Framework API...")
+    initialize_model()
+    app.run(host='0.0.0.0', port=7860, debug=False)
+else:
+    # For production deployment
+    logger.info("SEAL Framework API starting in production mode...")
+    # Don't initialize model immediately in production to avoid startup timeouts