ciyidogan commited on
Commit
fcff67e
·
verified ·
1 Parent(s): d6845a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -45
app.py CHANGED
@@ -4,17 +4,13 @@ import time
4
  import threading
5
  import traceback
6
  from datetime import datetime
7
-
8
- import unsloth
9
  from fastapi import FastAPI, HTTPException
10
  from pydantic import BaseModel
11
- from transformers import pipeline
12
- from unsloth import FastLanguageModel
13
 
14
  # === Ortam değişkenleri
15
  os.environ.setdefault("HF_HOME", "/app/.cache")
16
  os.environ.setdefault("HF_HUB_CACHE", "/app/.cache")
17
- os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1")
18
 
19
  # === Zamanlı log fonksiyonu
20
  def log(message):
@@ -24,67 +20,51 @@ def log(message):
24
  # === FastAPI başlat
25
  app = FastAPI()
26
  pipe = None
27
- model = None
28
- tokenizer = None
29
 
30
  @app.on_event("startup")
31
  def load_model():
32
- global pipe, model, tokenizer
33
  try:
34
- model_name = "atasoglu/Turkish-Llama-3-8B-function-calling"
35
- log(f"⬇️ [1] Model yükleme başlatılıyor: {model_name}")
36
-
37
- model, tokenizer = FastLanguageModel.from_pretrained(
38
- model_name=model_name,
39
- load_in_4bit=True,
40
- device_map="auto"
 
 
41
  )
42
- log("✅ [2] Model ve tokenizer çekildi.")
43
-
44
- FastLanguageModel.for_inference(model)
45
- log("✅ [3] Model inference moduna alındı.")
46
 
47
- pipe = pipeline(
48
- "text-generation",
49
- model=model,
50
- tokenizer=tokenizer,
51
- device_map="auto"
52
- )
53
- log("✅ [4] Pipeline başarıyla kuruldu, test etmeye hazır.")
54
 
55
  except Exception as e:
56
- log(f"❌ [ERROR] Model yükleme sırasında hata: {e}")
57
  traceback.print_exc()
58
  raise
59
 
60
- class TestRequest(BaseModel):
61
  user_input: str
62
 
63
- @app.post("/test")
64
- def test(req: TestRequest):
65
  try:
66
- prompt = f"Kullanıcı: {req.user_input}\nAsistan:"
67
- log(f"💬 [5] Prompt alındı: {req.user_input}")
68
-
69
- inputs = tokenizer([prompt], return_tensors="pt")
70
- log("🧠 [6] Tokenizer çıktılarını hazırladı, generate başlıyor...")
71
 
72
- outputs = model.generate(
73
- **inputs,
74
- max_new_tokens=256,
75
  temperature=0.2,
76
  top_p=0.95,
77
  repetition_penalty=1.1,
78
  do_sample=True
79
  )
80
- log("✅ [7] Generate tamamlandı, cevap dönülüyor.")
81
-
82
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
83
- answer_clean = answer.split("Asistan:")[-1].strip()
84
- return {"response": answer_clean}
85
 
86
  except Exception as e:
87
- log(f"❌ [ERROR] /test sırasında hata: {e}")
88
  traceback.print_exc()
89
  raise HTTPException(status_code=500, detail=str(e))
90
 
@@ -98,6 +78,6 @@ def run_health_server():
98
 
99
  threading.Thread(target=run_health_server, daemon=True).start()
100
 
101
- log("⏸️ [0] Uygulama bekleme modunda, startup bekleniyor...")
102
  while True:
103
  time.sleep(60)
 
4
  import threading
5
  import traceback
6
  from datetime import datetime
 
 
7
  from fastapi import FastAPI, HTTPException
8
  from pydantic import BaseModel
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
10
 
11
  # === Ortam değişkenleri
12
  os.environ.setdefault("HF_HOME", "/app/.cache")
13
  os.environ.setdefault("HF_HUB_CACHE", "/app/.cache")
 
14
 
15
  # === Zamanlı log fonksiyonu
16
  def log(message):
 
20
  # === FastAPI başlat
21
  app = FastAPI()
22
  pipe = None
 
 
23
 
24
  @app.on_event("startup")
25
  def load_model():
26
+ global pipe
27
  try:
28
+ model_name = "ytu-ce-cosmos/Turkish-Llama-8b-Instruct-v0.1"
29
+ log(f"⬇️ Model yükleme başlatılıyor: {model_name}")
30
+
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ model_name,
34
+ device_map="auto",
35
+ torch_dtype="auto" # A100 ortamında bf16 otomatik seçer
36
+ # Eğer istersen load_in_8bit=True parametresini ekleyebiliriz
37
  )
 
 
 
 
38
 
39
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
40
+ log("✅ Model ve pipeline başarıyla hazır.")
 
 
 
 
 
41
 
42
  except Exception as e:
43
+ log(f"❌ Model yükleme hatası: {e}")
44
  traceback.print_exc()
45
  raise
46
 
47
+ class UserInputRequest(BaseModel):
48
  user_input: str
49
 
50
+ @app.post("/generate")
51
+ def generate(req: UserInputRequest):
52
  try:
53
+ log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
 
 
 
 
54
 
55
+ result = pipe(
56
+ req.user_input,
57
+ max_new_tokens=200,
58
  temperature=0.2,
59
  top_p=0.95,
60
  repetition_penalty=1.1,
61
  do_sample=True
62
  )
63
+ answer = result[0]["generated_text"]
64
+ return {"response": answer}
 
 
 
65
 
66
  except Exception as e:
67
+ log(f"❌ /generate hatası: {e}")
68
  traceback.print_exc()
69
  raise HTTPException(status_code=500, detail=str(e))
70
 
 
78
 
79
  threading.Thread(target=run_health_server, daemon=True).start()
80
 
81
+ log("⏸️ Uygulama bekleme modunda...")
82
  while True:
83
  time.sleep(60)