ciyidogan commited on
Commit
3ed827f
·
verified ·
1 Parent(s): 8a39f61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -15
app.py CHANGED
@@ -20,17 +20,9 @@ def log(message):
20
 
21
  # === Helper fonksiyonlar
22
  def trim_history(messages, max_blocks=20):
23
- """
24
- Mesaj geçmişini en fazla max_blocks (örn. 20: 10 user + 10 assistant) ile sınırla.
25
- En eski mesajlar atılır.
26
- """
27
  return messages[-max_blocks:]
28
 
29
- def enforce_token_budget(tokenizer, system_prompt, history_messages, user_input, total_ctx=4096, max_new_tokens=300):
30
- """
31
- Toplam token limitini aşmamak için history’yi gerekirse budar.
32
- Her mesaj komple kesilir, kısmen alınmaz.
33
- """
34
  system_tokens = len(tokenizer(system_prompt)['input_ids'])
35
  user_tokens = len(tokenizer(user_input)['input_ids'])
36
  history_tokens = sum(len(tokenizer(m['content'])['input_ids']) for m in history_messages)
@@ -76,8 +68,8 @@ async def lifespan(app: FastAPI):
76
  tokenizer.pad_token = tokenizer.eos_token
77
 
78
  quant_config = BitsAndBytesConfig(
79
- load_in_8bit=True,
80
- llm_int8_threshold=6.0
81
  )
82
 
83
  model = AutoModelForCausalLM.from_pretrained(
@@ -86,7 +78,7 @@ async def lifespan(app: FastAPI):
86
  quantization_config=quant_config
87
  )
88
 
89
- log("✅ Model ve tokenizer başarıyla hazır (8-bit quantized, BitsAndBytesConfig).")
90
  yield
91
 
92
  except Exception as e:
@@ -108,9 +100,8 @@ def generate(req: UserInputRequest):
108
  overall_start = time.time()
109
  log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
110
 
111
- # === History budama
112
  trimmed_history = trim_history(req.history, max_blocks=20)
113
- trimmed_history = enforce_token_budget(tokenizer, req.system_prompt, trimmed_history, req.user_input, total_ctx=4096, max_new_tokens=300)
114
 
115
  # === Apply chat template
116
  t0 = time.time()
@@ -143,7 +134,7 @@ def generate(req: UserInputRequest):
143
 
144
  input_len = input_ids.shape[-1]
145
  total_ctx = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 4096
146
- max_new_tokens = min(300, max(1, total_ctx - input_len))
147
 
148
  log(f"ℹ️ Input uzunluğu: {input_len}, max_new_tokens ayarlandı: {max_new_tokens}")
149
 
 
20
 
21
  # === Helper fonksiyonlar
22
  def trim_history(messages, max_blocks=20):
 
 
 
 
23
  return messages[-max_blocks:]
24
 
25
+ def enforce_token_budget(tokenizer, system_prompt, history_messages, user_input, total_ctx=4096, max_new_tokens=200):
 
 
 
 
26
  system_tokens = len(tokenizer(system_prompt)['input_ids'])
27
  user_tokens = len(tokenizer(user_input)['input_ids'])
28
  history_tokens = sum(len(tokenizer(m['content'])['input_ids']) for m in history_messages)
 
68
  tokenizer.pad_token = tokenizer.eos_token
69
 
70
  quant_config = BitsAndBytesConfig(
71
+ load_in_4bit=True, # ✅ 4-bit quantization
72
+ llm_int4_threshold=6.0
73
  )
74
 
75
  model = AutoModelForCausalLM.from_pretrained(
 
78
  quantization_config=quant_config
79
  )
80
 
81
+ log("✅ Model ve tokenizer başarıyla hazır (4-bit quantized, BitsAndBytesConfig).")
82
  yield
83
 
84
  except Exception as e:
 
100
  overall_start = time.time()
101
  log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
102
 
 
103
  trimmed_history = trim_history(req.history, max_blocks=20)
104
+ trimmed_history = enforce_token_budget(tokenizer, req.system_prompt, trimmed_history, req.user_input, total_ctx=4096, max_new_tokens=200)
105
 
106
  # === Apply chat template
107
  t0 = time.time()
 
134
 
135
  input_len = input_ids.shape[-1]
136
  total_ctx = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 4096
137
+ max_new_tokens = min(200, max(1, total_ctx - input_len))
138
 
139
  log(f"ℹ️ Input uzunluğu: {input_len}, max_new_tokens ayarlandı: {max_new_tokens}")
140