Spaces:
Paused
Paused
Update intent.py
Browse files
intent.py
CHANGED
|
@@ -10,7 +10,7 @@ global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID
|
|
| 10 |
class TrainInput(BaseModel):
|
| 11 |
intents: list
|
| 12 |
|
| 13 |
-
def background_training(intents,
|
| 14 |
try:
|
| 15 |
log("🔧 Intent eğitimi başlatıldı...")
|
| 16 |
texts, labels, label2id = [], [], {}
|
|
@@ -21,11 +21,11 @@ def background_training(intents, app_config):
|
|
| 21 |
labels.append(idx)
|
| 22 |
|
| 23 |
dataset = Dataset.from_dict({"text": texts, "label": labels})
|
| 24 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 25 |
-
config = AutoConfig.from_pretrained(
|
| 26 |
config.problem_type = "single_label_classification"
|
| 27 |
config.num_labels = len(label2id)
|
| 28 |
-
model = AutoModelForSequenceClassification.from_pretrained(
|
| 29 |
|
| 30 |
tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
|
| 31 |
for row in dataset:
|
|
@@ -69,15 +69,15 @@ def background_training(intents, app_config):
|
|
| 69 |
for intent, total in counts.items():
|
| 70 |
accuracy = correct.get(intent, 0) / total
|
| 71 |
log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
|
| 72 |
-
if accuracy <
|
| 73 |
log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
|
| 74 |
|
| 75 |
log("📦 Intent modeli eğitimi kaydediliyor...")
|
| 76 |
-
if os.path.exists(
|
| 77 |
-
shutil.rmtree(
|
| 78 |
-
model.save_pretrained(
|
| 79 |
-
tokenizer.save_pretrained(
|
| 80 |
-
with open(os.path.join(
|
| 81 |
json.dump(label2id, f)
|
| 82 |
|
| 83 |
log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
|
|
|
|
| 10 |
class TrainInput(BaseModel):
|
| 11 |
intents: list
|
| 12 |
|
| 13 |
+
def background_training(intents, service_config):
|
| 14 |
try:
|
| 15 |
log("🔧 Intent eğitimi başlatıldı...")
|
| 16 |
texts, labels, label2id = [], [], {}
|
|
|
|
| 21 |
labels.append(idx)
|
| 22 |
|
| 23 |
dataset = Dataset.from_dict({"text": texts, "label": labels})
|
| 24 |
+
tokenizer = AutoTokenizer.from_pretrained(service_config.INTENT_MODEL_ID)
|
| 25 |
+
config = AutoConfig.from_pretrained(service_config.INTENT_MODEL_ID)
|
| 26 |
config.problem_type = "single_label_classification"
|
| 27 |
config.num_labels = len(label2id)
|
| 28 |
+
model = AutoModelForSequenceClassification.from_pretrained(service_config.INTENT_MODEL_ID, config=config)
|
| 29 |
|
| 30 |
tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
|
| 31 |
for row in dataset:
|
|
|
|
| 69 |
for intent, total in counts.items():
|
| 70 |
accuracy = correct.get(intent, 0) / total
|
| 71 |
log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
|
| 72 |
+
if accuracy < service_config.TRAIN_CONFIDENCE_THRESHOLD or total < 5:
|
| 73 |
log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
|
| 74 |
|
| 75 |
log("📦 Intent modeli eğitimi kaydediliyor...")
|
| 76 |
+
if os.path.exists(service_config.INTENT_MODEL_PATH):
|
| 77 |
+
shutil.rmtree(service_config.INTENT_MODEL_PATH)
|
| 78 |
+
model.save_pretrained(service_config.INTENT_MODEL_PATH)
|
| 79 |
+
tokenizer.save_pretrained(service_config.INTENT_MODEL_PATH)
|
| 80 |
+
with open(os.path.join(service_config.INTENT_MODEL_PATH, "label2id.json"), "w") as f:
|
| 81 |
json.dump(label2id, f)
|
| 82 |
|
| 83 |
log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
|