Update app.py
Browse files
app.py
CHANGED
@@ -58,3 +58,60 @@ class UserInputRequest(BaseModel):
|
|
58 |
def generate(req: UserInputRequest):
|
59 |
try:
|
60 |
start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
def generate(req: UserInputRequest):
|
59 |
try:
|
60 |
start_time = time.time()
|
61 |
+
log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
|
62 |
+
|
63 |
+
messages = [
|
64 |
+
{"role": "system", "content": req.system_prompt},
|
65 |
+
{"role": "user", "content": req.user_input}
|
66 |
+
]
|
67 |
+
|
68 |
+
chat_input = tokenizer.apply_chat_template(
|
69 |
+
messages,
|
70 |
+
add_generation_prompt=True,
|
71 |
+
return_tensors="pt"
|
72 |
+
).to(model.device)
|
73 |
+
|
74 |
+
input_len = chat_input.shape[-1]
|
75 |
+
total_ctx = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 4096
|
76 |
+
max_new_tokens = max(1, total_ctx - input_len)
|
77 |
+
|
78 |
+
log(f"ℹ️ Input uzunluğu: {input_len}, max_new_tokens ayarlandı: {max_new_tokens}")
|
79 |
+
|
80 |
+
terminators = [
|
81 |
+
tokenizer.eos_token_id,
|
82 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>") if "<|eot_id|>" in tokenizer.get_vocab() else tokenizer.eos_token_id
|
83 |
+
]
|
84 |
+
|
85 |
+
outputs = model.generate(
|
86 |
+
input_ids=chat_input,
|
87 |
+
max_new_tokens=max_new_tokens,
|
88 |
+
eos_token_id=terminators
|
89 |
+
)
|
90 |
+
|
91 |
+
response = outputs[0][input_len:]
|
92 |
+
answer = tokenizer.decode(response, skip_special_tokens=True)
|
93 |
+
|
94 |
+
end_time = time.time()
|
95 |
+
elapsed = end_time - start_time
|
96 |
+
log(f"✅ Yanıt süresi: {elapsed:.2f} saniye")
|
97 |
+
|
98 |
+
return {"response": answer}
|
99 |
+
|
100 |
+
except Exception as e:
|
101 |
+
log(f"❌ /generate hatası: {e}")
|
102 |
+
traceback.print_exc()
|
103 |
+
raise HTTPException(status_code=500, detail=str(e))
|
104 |
+
|
105 |
+
@app.get("/")
|
106 |
+
def health():
|
107 |
+
return {"status": "ok"}
|
108 |
+
|
109 |
+
def run_health_server():
|
110 |
+
import uvicorn
|
111 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
112 |
+
|
113 |
+
threading.Thread(target=run_health_server, daemon=True).start()
|
114 |
+
|
115 |
+
log("⏸️ Uygulama bekleme modunda...")
|
116 |
+
while True:
|
117 |
+
time.sleep(60)
|