ciyidogan commited on
Commit
ae70d2d
·
verified ·
1 Parent(s): f1a2f26

Update llm_model.py

Browse files
Files changed (1) hide show
  1. llm_model.py +83 -84
llm_model.py CHANGED
@@ -1,84 +1,83 @@
1
- import torch
2
- import traceback
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- from log import log
5
- from pydantic import BaseModel
6
-
7
- class Message(BaseModel):
8
- user_input: str
9
-
10
- class LLMModel:
11
- def __init__(self):
12
- self.model = None
13
- self.tokenizer = None
14
- self.eos_token_id = None
15
-
16
- def setup(self, s_config, project_config):
17
- try:
18
- log("🧠 LLMModel setup() başladı")
19
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
- log(f"📡 Kullanılan cihaz: {device}")
21
-
22
- model_base = project_config["model_base"]
23
-
24
- if s_config.work_mode == "hfcloud":
25
- token = s_config.get_auth_token()
26
- log(f"📦 Hugging Face cloud modeli yükleniyor: {model_base}")
27
- self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_auth_token=token, use_fast=False)
28
- self.model = AutoModelForCausalLM.from_pretrained(model_base, use_auth_token=token, torch_dtype=torch.float32).to(device)
29
-
30
- elif s_config.work_mode == "cloud":
31
- log(f"📦 Diğer cloud ortamından model indiriliyor: {model_base}")
32
- self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
33
- self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
34
-
35
- elif s_config.work_mode == "on-prem":
36
- log(f"📦 On-prem model path: {model_base}")
37
- self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
38
- self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
39
-
40
- else:
41
- raise Exception(f"Bilinmeyen work_mode: {s_config.work_mode}")
42
-
43
- self.tokenizer.pad_token = self.tokenizer.pad_token or self.tokenizer.eos_token
44
- self.model.config.pad_token_id = self.tokenizer.pad_token_id
45
- self.eos_token_id = self.tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
46
- self.model.eval()
47
-
48
- log("✅ LLMModel setup() başarıyla tamamlandı.")
49
- except Exception as e:
50
- log(f"❌ LLMModel setup() hatası: {e}")
51
- traceback.print_exc()
52
-
53
- async def generate_response(self, text, project_config):
54
- messages = [{"role": "user", "content": text}]
55
- encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
56
- input_ids = encodeds.to(self.model.device)
57
- attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
58
-
59
- with torch.no_grad():
60
- output = self.model.generate(
61
- input_ids=input_ids,
62
- attention_mask=attention_mask,
63
- max_new_tokens=128,
64
- do_sample=project_config["use_sampling"],
65
- eos_token_id=self.eos_token_id,
66
- pad_token_id=self.tokenizer.pad_token_id,
67
- return_dict_in_generate=True,
68
- output_scores=True
69
- )
70
-
71
- if not project_config["use_sampling"]:
72
- scores = torch.stack(output.scores, dim=1)
73
- probs = torch.nn.functional.softmax(scores[0], dim=-1)
74
- top_conf = probs.max().item()
75
- else:
76
- top_conf = None
77
-
78
- decoded = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
79
- for tag in ["assistant", "<|im_start|>assistant"]:
80
- start = decoded.find(tag)
81
- if start != -1:
82
- decoded = decoded[start + len(tag):].strip()
83
- break
84
- return decoded, top_conf
 
1
+ import torch
2
+ import traceback
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from log import log
5
+ from pydantic import BaseModel
6
+
7
+ class Message(BaseModel):
8
+ user_input: str
9
+
10
+ class LLMModel:
11
+ def __init__(self):
12
+ self.model = None
13
+ self.tokenizer = None
14
+ self.eos_token_id = None
15
+
16
+ def setup(self, s_config, project_config):
17
+ try:
18
+ log("🧠 LLMModel setup() başladı")
19
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
+ log(f"📡 Kullanılan cihaz: {device}")
21
+
22
+ model_base = project_config["model_base"]
23
+
24
+ if s_config.work_mode == "hfcloud":
25
+ token = s_config.get_auth_token()
26
+ log(f"📦 Hugging Face cloud modeli yükleniyor: {model_base}")
27
+ self.tokenizer = AutoTokenizer.from_pretrained(model_base, token=token, use_fast=False)
28
+ self.model = AutoModelForCausalLM.from_pretrained(model_base, token=token, torch_dtype=torch.float32).to(device)
29
+ elif s_config.work_mode == "cloud":
30
+ log(f"📦 Diğer cloud ortamından model indiriliyor: {model_base}")
31
+ self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
32
+ self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
33
+
34
+ elif s_config.work_mode == "on-prem":
35
+ log(f"📦 On-prem model path: {model_base}")
36
+ self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
37
+ self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
38
+
39
+ else:
40
+ raise Exception(f"Bilinmeyen work_mode: {s_config.work_mode}")
41
+
42
+ self.tokenizer.pad_token = self.tokenizer.pad_token or self.tokenizer.eos_token
43
+ self.model.config.pad_token_id = self.tokenizer.pad_token_id
44
+ self.eos_token_id = self.tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
45
+ self.model.eval()
46
+
47
+ log("✅ LLMModel setup() başarıyla tamamlandı.")
48
+ except Exception as e:
49
+ log(f"❌ LLMModel setup() hatası: {e}")
50
+ traceback.print_exc()
51
+
52
+ async def generate_response(self, text, project_config):
53
+ messages = [{"role": "user", "content": text}]
54
+ encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
55
+ input_ids = encodeds.to(self.model.device)
56
+ attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
57
+
58
+ with torch.no_grad():
59
+ output = self.model.generate(
60
+ input_ids=input_ids,
61
+ attention_mask=attention_mask,
62
+ max_new_tokens=128,
63
+ do_sample=project_config["use_sampling"],
64
+ eos_token_id=self.eos_token_id,
65
+ pad_token_id=self.tokenizer.pad_token_id,
66
+ return_dict_in_generate=True,
67
+ output_scores=True
68
+ )
69
+
70
+ if not project_config["use_sampling"]:
71
+ scores = torch.stack(output.scores, dim=1)
72
+ probs = torch.nn.functional.softmax(scores[0], dim=-1)
73
+ top_conf = probs.max().item()
74
+ else:
75
+ top_conf = None
76
+
77
+ decoded = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
78
+ for tag in ["assistant", "<|im_start|>assistant"]:
79
+ start = decoded.find(tag)
80
+ if start != -1:
81
+ decoded = decoded[start + len(tag):].strip()
82
+ break
83
+ return decoded, top_conf