ciyidogan commited on
Commit
089f657
·
verified ·
1 Parent(s): 933f767

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -32
app.py CHANGED
@@ -2,8 +2,9 @@ import os
2
  import sys
3
  import time
4
  import threading
 
5
  from datetime import datetime
6
- from fastapi import FastAPI
7
  from pydantic import BaseModel
8
  from transformers import pipeline
9
  from unsloth import FastLanguageModel
@@ -27,42 +28,62 @@ tokenizer = None
27
  @app.on_event("startup")
28
  def load_model():
29
  global pipe, model, tokenizer
30
- model_name = "atasoglu/Turkish-Llama-3-8B-function-calling"
31
- log(f"⬇️ Model yükleniyor: {model_name}")
32
- model, tokenizer = FastLanguageModel.from_pretrained(
33
- model_name=model_name,
34
- load_in_4bit=True,
35
- device_map="auto"
36
- )
37
- FastLanguageModel.for_inference(model)
38
- pipe = pipeline(
39
- "text-generation",
40
- model=model,
41
- tokenizer=tokenizer,
42
- device_map="auto"
43
- )
44
- log("✅ Model yüklendi, test etmeye hazır.")
 
 
 
 
 
 
 
 
 
 
45
 
46
  class TestRequest(BaseModel):
47
  user_input: str
48
 
49
  @app.post("/test")
50
  def test(req: TestRequest):
51
- prompt = f"Kullanıcı: {req.user_input}\nAsistan:"
52
- log(f"💬 Prompt alındı: {req.user_input}")
53
- inputs = tokenizer([prompt], return_tensors="pt") # .to("cuda") KALDIRILDI
54
- outputs = model.generate(
55
- **inputs,
56
- max_new_tokens=256,
57
- temperature=0.2,
58
- top_p=0.95,
59
- repetition_penalty=1.1,
60
- do_sample=True
61
- )
62
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
63
- answer_clean = answer.split("Asistan:")[-1].strip()
64
- log("✅ Cevap üretildi.")
65
- return {"response": answer_clean}
 
 
 
 
 
 
 
 
 
 
66
 
67
  @app.get("/")
68
  def health():
@@ -74,6 +95,6 @@ def run_health_server():
74
 
75
  threading.Thread(target=run_health_server, daemon=True).start()
76
 
77
- log("⏸️ Uygulama bekleme modunda...")
78
  while True:
79
  time.sleep(60)
 
2
  import sys
3
  import time
4
  import threading
5
+ import traceback
6
  from datetime import datetime
7
+ from fastapi import FastAPI, HTTPException
8
  from pydantic import BaseModel
9
  from transformers import pipeline
10
  from unsloth import FastLanguageModel
 
28
  @app.on_event("startup")
29
  def load_model():
30
  global pipe, model, tokenizer
31
+ try:
32
+ model_name = "atasoglu/Turkish-Llama-3-8B-function-calling"
33
+ log(f"⬇️ [1] Model yükleme başlatılıyor: {model_name}")
34
+ model, tokenizer = FastLanguageModel.from_pretrained(
35
+ model_name=model_name,
36
+ load_in_4bit=True,
37
+ device_map="auto"
38
+ )
39
+ log("✅ [2] Model ve tokenizer çekildi.")
40
+
41
+ FastLanguageModel.for_inference(model)
42
+ log("✅ [3] Model inference moduna alındı.")
43
+
44
+ pipe = pipeline(
45
+ "text-generation",
46
+ model=model,
47
+ tokenizer=tokenizer,
48
+ device_map="auto"
49
+ )
50
+ log("✅ [4] Pipeline başarıyla kuruldu, test etmeye hazır.")
51
+
52
+ except Exception as e:
53
+ log(f"❌ [ERROR] Model yükleme sırasında hata: {e}")
54
+ traceback.print_exc()
55
+ raise
56
 
57
  class TestRequest(BaseModel):
58
  user_input: str
59
 
60
  @app.post("/test")
61
  def test(req: TestRequest):
62
+ try:
63
+ prompt = f"Kullanıcı: {req.user_input}\nAsistan:"
64
+ log(f"💬 [5] Prompt alındı: {req.user_input}")
65
+
66
+ inputs = tokenizer([prompt], return_tensors="pt")
67
+ log("🧠 [6] Tokenizer çıktılarını hazırladı, generate başlıyor...")
68
+
69
+ outputs = model.generate(
70
+ **inputs,
71
+ max_new_tokens=256,
72
+ temperature=0.2,
73
+ top_p=0.95,
74
+ repetition_penalty=1.1,
75
+ do_sample=True
76
+ )
77
+ log("✅ [7] Generate tamamlandı, cevap dönülüyor.")
78
+
79
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
80
+ answer_clean = answer.split("Asistan:")[-1].strip()
81
+ return {"response": answer_clean}
82
+
83
+ except Exception as e:
84
+ log(f"❌ [ERROR] /test sırasında hata: {e}")
85
+ traceback.print_exc()
86
+ raise HTTPException(status_code=500, detail=str(e))
87
 
88
  @app.get("/")
89
  def health():
 
95
 
96
  threading.Thread(target=run_health_server, daemon=True).start()
97
 
98
+ log("⏸️ [0] Uygulama bekleme modunda, startup bekleniyor...")
99
  while True:
100
  time.sleep(60)