ciyidogan commited on
Commit
4113d2a
·
verified ·
1 Parent(s): 883403d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py CHANGED
@@ -58,3 +58,60 @@ class UserInputRequest(BaseModel):
58
  def generate(req: UserInputRequest):
59
  try:
60
  start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def generate(req: UserInputRequest):
59
  try:
60
  start_time = time.time()
61
+ log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
62
+
63
+ messages = [
64
+ {"role": "system", "content": req.system_prompt},
65
+ {"role": "user", "content": req.user_input}
66
+ ]
67
+
68
+ chat_input = tokenizer.apply_chat_template(
69
+ messages,
70
+ add_generation_prompt=True,
71
+ return_tensors="pt"
72
+ ).to(model.device)
73
+
74
+ input_len = chat_input.shape[-1]
75
+ total_ctx = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 4096
76
+ max_new_tokens = max(1, total_ctx - input_len)
77
+
78
+ log(f"ℹ️ Input uzunluğu: {input_len}, max_new_tokens ayarlandı: {max_new_tokens}")
79
+
80
+ terminators = [
81
+ tokenizer.eos_token_id,
82
+ tokenizer.convert_tokens_to_ids("<|eot_id|>") if "<|eot_id|>" in tokenizer.get_vocab() else tokenizer.eos_token_id
83
+ ]
84
+
85
+ outputs = model.generate(
86
+ input_ids=chat_input,
87
+ max_new_tokens=max_new_tokens,
88
+ eos_token_id=terminators
89
+ )
90
+
91
+ response = outputs[0][input_len:]
92
+ answer = tokenizer.decode(response, skip_special_tokens=True)
93
+
94
+ end_time = time.time()
95
+ elapsed = end_time - start_time
96
+ log(f"✅ Yanıt süresi: {elapsed:.2f} saniye")
97
+
98
+ return {"response": answer}
99
+
100
+ except Exception as e:
101
+ log(f"❌ /generate hatası: {e}")
102
+ traceback.print_exc()
103
+ raise HTTPException(status_code=500, detail=str(e))
104
+
105
+ @app.get("/")
106
+ def health():
107
+ return {"status": "ok"}
108
+
109
+ def run_health_server():
110
+ import uvicorn
111
+ uvicorn.run(app, host="0.0.0.0", port=7860)
112
+
113
+ threading.Thread(target=run_health_server, daemon=True).start()
114
+
115
+ log("⏸️ Uygulama bekleme modunda...")
116
+ while True:
117
+ time.sleep(60)