ciyidogan commited on
Commit
eb8847f
·
verified ·
1 Parent(s): c7a5eec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -29
app.py CHANGED
@@ -6,7 +6,7 @@ import traceback
6
  from datetime import datetime
7
  from fastapi import FastAPI, HTTPException
8
  from pydantic import BaseModel
9
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
10
 
11
  # === Ortam değişkenleri
12
  os.environ.setdefault("HF_HOME", "/app/.cache")
@@ -19,11 +19,12 @@ def log(message):
19
 
20
  # === FastAPI başlat
21
  app = FastAPI()
22
- pipe = None
 
23
 
24
  @app.on_event("startup")
25
  def load_model():
26
- global pipe
27
  try:
28
  model_name = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
29
  log(f"⬇️ Model yükleme başlatılıyor: {model_name}")
@@ -31,12 +32,11 @@ def load_model():
31
  tokenizer = AutoTokenizer.from_pretrained(model_name)
32
  model = AutoModelForCausalLM.from_pretrained(
33
  model_name,
34
- device_map="auto",
35
- torch_dtype="auto"
36
  )
37
 
38
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
39
- log("✅ Model ve pipeline başarıyla hazır.")
40
 
41
  except Exception as e:
42
  log(f"❌ Model yükleme hatası: {e}")
@@ -53,34 +53,34 @@ def generate(req: UserInputRequest):
53
  start_time = time.time()
54
  log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
55
 
56
- # Kısa ve net system prompt, okunabilir bölünmüş
57
- concise_system_prompt = (
58
- f"{req.system_prompt}\n"
59
- "❗ Cevaplarını sadece aşağıdaki formatta döndür, fazladan açıklama yazma, örnek ekleme:\n"
60
- "#ANSWER: <cevap>\n"
61
- "#INTENT: <intent>\n"
62
- "#PARAMS: {...}\n"
63
- "#MISSING: [...]\n"
64
- "#ACTION_JSON: {...}\n"
65
- "Şimdi sadece kullanıcının sorusunu bekliyorsun ve formatlı cevap veriyorsun."
66
- )
67
 
68
- # Role separation: System, User, Assistant blokları
69
- full_prompt = (
70
- f"### System:\n{concise_system_prompt}\n\n"
71
- f"### User:\n{req.user_input}\n\n"
72
- f"### Assistant:"
73
- )
74
 
75
- result = pipe(
76
- full_prompt,
 
 
 
 
 
77
  max_new_tokens=200,
 
 
78
  temperature=0.0,
79
  top_p=1.0,
80
- repetition_penalty=1.0,
81
- do_sample=False
82
  )
83
- answer = result[0]["generated_text"]
 
 
84
 
85
  end_time = time.time()
86
  elapsed = end_time - start_time
 
6
  from datetime import datetime
7
  from fastapi import FastAPI, HTTPException
8
  from pydantic import BaseModel
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM
10
 
11
  # === Ortam değişkenleri
12
  os.environ.setdefault("HF_HOME", "/app/.cache")
 
19
 
20
  # === FastAPI başlat
21
  app = FastAPI()
22
+ tokenizer = None
23
+ model = None
24
 
25
  @app.on_event("startup")
26
  def load_model():
27
+ global tokenizer, model
28
  try:
29
  model_name = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
30
  log(f"⬇️ Model yükleme başlatılıyor: {model_name}")
 
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
33
  model = AutoModelForCausalLM.from_pretrained(
34
  model_name,
35
+ torch_dtype="auto", # A100 için bf16
36
+ device_map="auto"
37
  )
38
 
39
+ log(" Model ve tokenizer başarıyla hazır.")
 
40
 
41
  except Exception as e:
42
  log(f"❌ Model yükleme hatası: {e}")
 
53
  start_time = time.time()
54
  log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
55
 
56
+ messages = [
57
+ {"role": "system", "content": req.system_prompt},
58
+ {"role": "user", "content": req.user_input}
59
+ ]
 
 
 
 
 
 
 
60
 
61
+ input_ids = tokenizer.apply_chat_template(
62
+ messages,
63
+ add_generation_prompt=True,
64
+ return_tensors="pt"
65
+ ).to(model.device)
 
66
 
67
+ terminators = [
68
+ tokenizer.eos_token_id,
69
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
70
+ ]
71
+
72
+ outputs = model.generate(
73
+ input_ids,
74
  max_new_tokens=200,
75
+ eos_token_id=terminators,
76
+ do_sample=False,
77
  temperature=0.0,
78
  top_p=1.0,
79
+ repetition_penalty=1.0
 
80
  )
81
+
82
+ response = outputs[0][input_ids.shape[-1]:]
83
+ answer = tokenizer.decode(response, skip_special_tokens=True)
84
 
85
  end_time = time.time()
86
  elapsed = end_time - start_time