Spaces:
Running
Running
Update intent.py
Browse files
intent.py
CHANGED
@@ -10,7 +10,7 @@ global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID
|
|
10 |
class TrainInput(BaseModel):
|
11 |
intents: list
|
12 |
|
13 |
-
def background_training(intents,
|
14 |
try:
|
15 |
log("🔧 Intent eğitimi başlatıldı...")
|
16 |
texts, labels, label2id = [], [], {}
|
@@ -21,11 +21,11 @@ def background_training(intents, service_config):
|
|
21 |
labels.append(idx)
|
22 |
|
23 |
dataset = Dataset.from_dict({"text": texts, "label": labels})
|
24 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
25 |
-
config = AutoConfig.from_pretrained(
|
26 |
config.problem_type = "single_label_classification"
|
27 |
config.num_labels = len(label2id)
|
28 |
-
model = AutoModelForSequenceClassification.from_pretrained(
|
29 |
|
30 |
tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
|
31 |
for row in dataset:
|
@@ -69,15 +69,15 @@ def background_training(intents, service_config):
|
|
69 |
for intent, total in counts.items():
|
70 |
accuracy = correct.get(intent, 0) / total
|
71 |
log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
|
72 |
-
if accuracy <
|
73 |
log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
|
74 |
|
75 |
log("📦 Intent modeli eğitimi kaydediliyor...")
|
76 |
-
if os.path.exists(
|
77 |
-
shutil.rmtree(
|
78 |
-
model.save_pretrained(
|
79 |
-
tokenizer.save_pretrained(
|
80 |
-
with open(os.path.join(
|
81 |
json.dump(label2id, f)
|
82 |
|
83 |
log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
|
|
|
10 |
class TrainInput(BaseModel):
|
11 |
intents: list
|
12 |
|
13 |
+
def background_training(intents, s_config):
|
14 |
try:
|
15 |
log("🔧 Intent eğitimi başlatıldı...")
|
16 |
texts, labels, label2id = [], [], {}
|
|
|
21 |
labels.append(idx)
|
22 |
|
23 |
dataset = Dataset.from_dict({"text": texts, "label": labels})
|
24 |
+
tokenizer = AutoTokenizer.from_pretrained(s_config.INTENT_MODEL_ID)
|
25 |
+
config = AutoConfig.from_pretrained(s_config.INTENT_MODEL_ID)
|
26 |
config.problem_type = "single_label_classification"
|
27 |
config.num_labels = len(label2id)
|
28 |
+
model = AutoModelForSequenceClassification.from_pretrained(s_config.INTENT_MODEL_ID, config=config)
|
29 |
|
30 |
tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
|
31 |
for row in dataset:
|
|
|
69 |
for intent, total in counts.items():
|
70 |
accuracy = correct.get(intent, 0) / total
|
71 |
log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
|
72 |
+
if accuracy < s_config.TRAIN_CONFIDENCE_THRESHOLD or total < 5:
|
73 |
log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
|
74 |
|
75 |
log("📦 Intent modeli eğitimi kaydediliyor...")
|
76 |
+
if os.path.exists(s_config.INTENT_MODEL_PATH):
|
77 |
+
shutil.rmtree(s_config.INTENT_MODEL_PATH)
|
78 |
+
model.save_pretrained(s_config.INTENT_MODEL_PATH)
|
79 |
+
tokenizer.save_pretrained(s_config.INTENT_MODEL_PATH)
|
80 |
+
with open(os.path.join(s_config.INTENT_MODEL_PATH, "label2id.json"), "w") as f:
|
81 |
json.dump(label2id, f)
|
82 |
|
83 |
log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
|