from functools import lru_cache import os from flask import Flask, request, jsonify, render_template from transformers import AutoTokenizer, AutoModelForCausalLM import torch import logging import time app = Flask(__name__) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Önbellek dizini ayarı os.environ['TRANSFORMERS_CACHE'] = '/app/cache' os.makedirs('/app/cache', exist_ok=True) # Model konfigürasyonu MODEL_NAME = "redrussianarmy/gpt2-turkish-cased" @lru_cache(maxsize=1) def load_model(): try: logger.info("Model yükleniyor...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Pad token kontrolü ve ayarlama if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # CPU için float32 kullan model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) model = model.to('cpu').float() # Float32 formatına dönüştür torch.set_num_threads(1) logger.info("Model başarıyla yüklendi") return model, tokenizer except Exception as e: logger.error(f"Model yükleme hatası: {str(e)}") raise RuntimeError(f"Model yüklenemedi: {str(e)}") @app.route('/') def home(): return render_template('index.html') @app.route('/health') def health_check(): try: load_model() return jsonify({"status": "healthy"}), 200 except Exception as e: return jsonify({"status": "unhealthy", "error": str(e)}), 500 @app.route('/generate', methods=['POST']) def generate(): try: start_time = time.time() data = request.get_json() prompt = data.get('prompt', '')[:300] # 300 karakter sınır if not prompt: return jsonify({"error": "Prompt gereklidir", "success": False}), 400 model, tokenizer = load_model() inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to('cpu') with torch.no_grad(): outputs = model.generate( inputs.input_ids, attention_mask=inputs.attention_mask, max_length=80, do_sample=True, top_k=40, temperature=0.7, pad_token_id=tokenizer.pad_token_id, num_return_sequences=1, early_stopping=True, use_cache=True ) result = tokenizer.decode(outputs[0], skip_special_tokens=True) processing_time = round(time.time() - start_time, 2) return jsonify({ "result": result, "success": True, "processing_time": processing_time }) except Exception as e: logger.error(f"Hata: {str(e)}") return jsonify({ "error": str(e), "success": False }), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=7860, threaded=False)