Spaces:
Sleeping
Sleeping
File size: 2,998 Bytes
ff49eed 2d16a47 ff49eed 2e6eb83 2d16a47 ff49eed 2d16a47 ff49eed 2d16a47 ff49eed db847c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
from functools import lru_cache
import os
from flask import Flask, request, jsonify, render_template
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import logging
import time
app = Flask(__name__)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Önbellek dizini ayarı
os.environ['TRANSFORMERS_CACHE'] = '/app/cache'
os.makedirs('/app/cache', exist_ok=True)
# Model konfigürasyonu
MODEL_NAME = "redrussianarmy/gpt2-turkish-cased"
@lru_cache(maxsize=1)
def load_model():
try:
logger.info("Model yükleniyor...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Pad token kontrolü ve ayarlama
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# CPU için float32 kullan
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
model = model.to('cpu').float() # Float32 formatına dönüştür
torch.set_num_threads(1)
logger.info("Model başarıyla yüklendi")
return model, tokenizer
except Exception as e:
logger.error(f"Model yükleme hatası: {str(e)}")
raise RuntimeError(f"Model yüklenemedi: {str(e)}")
@app.route('/')
def home():
return render_template('index.html')
@app.route('/health')
def health_check():
try:
load_model()
return jsonify({"status": "healthy"}), 200
except Exception as e:
return jsonify({"status": "unhealthy", "error": str(e)}), 500
@app.route('/generate', methods=['POST'])
def generate():
try:
start_time = time.time()
data = request.get_json()
prompt = data.get('prompt', '')[:300] # 300 karakter sınır
if not prompt:
return jsonify({"error": "Prompt gereklidir", "success": False}), 400
model, tokenizer = load_model()
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to('cpu')
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
attention_mask=inputs.attention_mask,
max_length=80,
do_sample=True,
top_k=40,
temperature=0.7,
pad_token_id=tokenizer.pad_token_id,
num_return_sequences=1,
early_stopping=True,
use_cache=True
)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
processing_time = round(time.time() - start_time, 2)
return jsonify({
"result": result,
"success": True,
"processing_time": processing_time
})
except Exception as e:
logger.error(f"Hata: {str(e)}")
return jsonify({
"error": str(e),
"success": False
}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860, threaded=False) |