Update app.py
Browse files
app.py
CHANGED
@@ -58,66 +58,3 @@ class UserInputRequest(BaseModel):
|
|
58 |
def generate(req: UserInputRequest):
|
59 |
try:
|
60 |
start_time = time.time()
|
61 |
-
log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
|
62 |
-
|
63 |
-
messages = [
|
64 |
-
{"role": "system", "content": req.system_prompt},
|
65 |
-
{"role": "user", "content": req.user_input}
|
66 |
-
]
|
67 |
-
|
68 |
-
input_data = tokenizer.apply_chat_template(
|
69 |
-
messages,
|
70 |
-
add_generation_prompt=True,
|
71 |
-
return_tensors="pt",
|
72 |
-
padding=True
|
73 |
-
).to(model.device)
|
74 |
-
|
75 |
-
input_ids = input_data['input_ids']
|
76 |
-
attention_mask = input_data['attention_mask']
|
77 |
-
|
78 |
-
# === Dinamik token hesaplama
|
79 |
-
total_ctx = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 4096
|
80 |
-
input_len = input_ids.shape[-1]
|
81 |
-
max_new_tokens = max(1, total_ctx - input_len)
|
82 |
-
|
83 |
-
log(f"ℹ️ Input uzunluğu: {input_len}, max_new_tokens ayarlandı: {max_new_tokens}")
|
84 |
-
|
85 |
-
terminators = [
|
86 |
-
tokenizer.eos_token_id,
|
87 |
-
tokenizer.convert_tokens_to_ids("<|eot_id|>") if "<|eot_id|>" in tokenizer.get_vocab() else tokenizer.eos_token_id
|
88 |
-
]
|
89 |
-
|
90 |
-
outputs = model.generate(
|
91 |
-
input_ids=input_ids,
|
92 |
-
attention_mask=attention_mask,
|
93 |
-
max_new_tokens=max_new_tokens,
|
94 |
-
eos_token_id=terminators
|
95 |
-
)
|
96 |
-
|
97 |
-
response = outputs[0][input_len:]
|
98 |
-
answer = tokenizer.decode(response, skip_special_tokens=True)
|
99 |
-
|
100 |
-
end_time = time.time()
|
101 |
-
elapsed = end_time - start_time
|
102 |
-
log(f"✅ Yanıt süresi: {elapsed:.2f} saniye")
|
103 |
-
|
104 |
-
return {"response": answer}
|
105 |
-
|
106 |
-
except Exception as e:
|
107 |
-
log(f"❌ /generate hatası: {e}")
|
108 |
-
traceback.print_exc()
|
109 |
-
raise HTTPException(status_code=500, detail=str(e))
|
110 |
-
|
111 |
-
@app.get("/")
|
112 |
-
def health():
|
113 |
-
return {"status": "ok"}
|
114 |
-
|
115 |
-
def run_health_server():
|
116 |
-
import uvicorn
|
117 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
118 |
-
|
119 |
-
threading.Thread(target=run_health_server, daemon=True).start()
|
120 |
-
|
121 |
-
log("⏸️ Uygulama bekleme modunda...")
|
122 |
-
while True:
|
123 |
-
time.sleep(60)
|
|
|
58 |
def generate(req: UserInputRequest):
|
59 |
try:
|
60 |
start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|