Spaces:

UcsTurkey
/

mistral7b

Paused

App Files Files Community

ciyidogan commited on May 22

Commit

39728bb

verified ·

1 Parent(s): c0112d6

Update inference_test_turkcell_with_intents.py

Browse files

Files changed (1) hide show

inference_test_turkcell_with_intents.py +190 -39

inference_test_turkcell_with_intents.py CHANGED Viewed

@@ -7,25 +7,17 @@ from peft import PeftModel
 from datasets import Dataset
 from datetime import datetime
-# === Ortam
 HF_TOKEN = os.getenv("HF_TOKEN")
 os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
 os.environ["TORCH_HOME"] = "/app/.torch_cache"
 os.makedirs("/app/.torch_cache", exist_ok=True)
-# === Ayarlar
 MODEL_BASE = "TURKCELL/Turkcell-LLM-7b-v1"
 USE_FINE_TUNE = False
 FINE_TUNE_REPO = "UcsTurkey/trained-zips"
 FINE_TUNE_ZIP = "trained_model_000_009.zip"
 USE_SAMPLING = False
-GENERATION_CONFIDENCE_THRESHOLD = -1.5
-INTENT_CONFIDENCE_THRESHOLD = 0.5
-FALLBACK_ANSWERS = [
-    "Bu konuda maalesef bilgim yok.",
-    "Ne demek istediğinizi tam anlayamadım.",
-    "Bu soruya şu an yanıt veremiyorum."
-]
 INTENT_MODEL_PATH = "intent_model"
 INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
@@ -34,7 +26,16 @@ INTENT_TOKENIZER = None
 LABEL2ID = {}
 INTENT_DEFINITIONS = {}
-# === FastAPI
 app = FastAPI()
 chat_history = []
 model = None
@@ -75,6 +76,153 @@ def root():
     </body></html>
     """
 @app.post("/chat")
 async def chat(msg: Message):
     user_input = msg.user_input.strip()
@@ -85,44 +233,47 @@ async def chat(msg: Message):
         if INTENT_MODEL:
             intent_task = asyncio.create_task(detect_intent(user_input))
             response_task = asyncio.create_task(generate_response(user_input))
-            intent = await intent_task
-            if intent is None:
-                log("🟡 Intent confidence düşük. Ana modele yönlendiriliyor.")
-                response = await response_task
-                if isinstance(response, dict) and response.get("score", 0) < GENERATION_CONFIDENCE_THRESHOLD:
-                    return {"response": random.choice(FALLBACK_ANSWERS)}
-                return {"response": response if isinstance(response, str) else response.get("text", "")}
-            if intent in INTENT_DEFINITIONS:
                 result = execute_intent(intent, user_input)
                 return result
             else:
-                response = await response_task
-                return {"response": response if isinstance(response, str) else response.get("text", "")}
         else:
-            response = await generate_response(user_input)
-            if isinstance(response, dict) and response.get("score", 0) < GENERATION_CONFIDENCE_THRESHOLD:
                 return {"response": random.choice(FALLBACK_ANSWERS)}
-            return {"response": response if isinstance(response, str) else response.get("text", "")}
     except Exception as e:
         traceback.print_exc()
         return JSONResponse(content={"error": str(e)}, status_code=500)
-async def detect_intent(text):
-    inputs = INTENT_TOKENIZER(text, return_tensors="pt")
-    outputs = INTENT_MODEL(**inputs)
-    logits = outputs.logits
-    probs = torch.nn.functional.softmax(logits, dim=1)
-    pred_id = logits.argmax().item()
-    confidence = probs[0][pred_id].item()
-    id2label = {v: k for k, v in LABEL2ID.items()}
-    intent_name = id2label[pred_id]
-    log(f"🔍 Intent tahmini: {intent_name} (confidence: {confidence:.2f})")
-    if confidence < INTENT_CONFIDENCE_THRESHOLD:
-        log(f"⚠️ Düşük confidence ({confidence:.2f}) nedeniyle intent boş döndü.")
-        return None
-    return intent_name

 from datasets import Dataset
 from datetime import datetime
+# === Ortam ve Ayarlar ===
 HF_TOKEN = os.getenv("HF_TOKEN")
 os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
 os.environ["TORCH_HOME"] = "/app/.torch_cache"
 os.makedirs("/app/.torch_cache", exist_ok=True)
 MODEL_BASE = "TURKCELL/Turkcell-LLM-7b-v1"
 USE_FINE_TUNE = False
 FINE_TUNE_REPO = "UcsTurkey/trained-zips"
 FINE_TUNE_ZIP = "trained_model_000_009.zip"
 USE_SAMPLING = False
 INTENT_MODEL_PATH = "intent_model"
 INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
 LABEL2ID = {}
 INTENT_DEFINITIONS = {}
+INTENT_CONFIDENCE_THRESHOLD = 0.5
+LLM_CONFIDENCE_THRESHOLD = 0.2
+TRAIN_CONFIDENCE_THRESHOLD = 0.7
+FALLBACK_ANSWERS = [
+    "Bu konuda maalesef bilgim yok.",
+    "Ne demek istediğinizi tam anlayamadım.",
+    "Bu soruya şu an yanıt veremiyorum."
+]
+# === FastAPI ===
 app = FastAPI()
 chat_history = []
 model = None
     </body></html>
     """
+@app.post("/train_intents", status_code=202)
+def train_intents(train_input: TrainInput):
+    global INTENT_DEFINITIONS
+    log("📥 POST /train_intents çağrıldı.")
+    intents = train_input.intents
+    INTENT_DEFINITIONS = {intent["name"]: intent for intent in intents}
+    threading.Thread(target=lambda: background_training(intents), daemon=True).start()
+    return {"status": "accepted", "message": "Intent eğitimi arka planda başlatıldı."}
+def background_training(intents):
+    try:
+        log("🔧 Intent eğitimi başlatıldı...")
+        texts, labels, label2id = [], [], {}
+        for idx, intent in enumerate(intents):
+            label2id[intent["name"]] = idx
+            for ex in intent["examples"]:
+                texts.append(ex)
+                labels.append(idx)
+        dataset = Dataset.from_dict({"text": texts, "label": labels})
+        tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
+        config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
+        config.problem_type = "single_label_classification"
+        config.num_labels = len(label2id)
+        model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, config=config)
+        tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
+        for row in dataset:
+            out = tokenizer(row["text"], truncation=True, padding="max_length", max_length=128)
+            tokenized_data["input_ids"].append(out["input_ids"])
+            tokenized_data["attention_mask"].append(out["attention_mask"])
+            tokenized_data["label"].append(row["label"])
+        tokenized = Dataset.from_dict(tokenized_data)
+        tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
+        output_dir = "/app/intent_train_output"
+        os.makedirs(output_dir, exist_ok=True)
+        trainer = Trainer(
+            model=model,
+            args=TrainingArguments(output_dir, per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]),
+            train_dataset=tokenized,
+            data_collator=default_data_collator
+        )
+        trainer.train()
+        # Raporlama
+        predictions = model(tokenized["input_ids"]).logits.argmax(dim=-1).tolist()
+        actuals = tokenized["label"]
+        counts = {}
+        correct = {}
+        for pred, actual in zip(predictions, actuals):
+            intent = list(label2id.keys())[list(label2id.values()).index(actual)]
+            counts[intent] = counts.get(intent, 0) + 1
+            if pred == actual:
+                correct[intent] = correct.get(intent, 0) + 1
+        for intent, total in counts.items():
+            accuracy = correct.get(intent, 0) / total
+            log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
+            if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5:
+                log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
+        if os.path.exists(INTENT_MODEL_PATH):
+            shutil.rmtree(INTENT_MODEL_PATH)
+        model.save_pretrained(INTENT_MODEL_PATH)
+        tokenizer.save_pretrained(INTENT_MODEL_PATH)
+        with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
+            json.dump(label2id, f)
+        log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
+    except Exception as e:
+        log(f"❌ Intent eğitimi hatası: {e}")
+        traceback.print_exc()
+@app.post("/load_intent_model")
+def load_intent_model():
+    global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID
+    try:
+        INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH)
+        INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH)
+        with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f:
+            LABEL2ID = json.load(f)
+        return {"status": "ok", "message": "Intent modeli yüklendi."}
+    except Exception as e:
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+async def detect_intent(text):
+    inputs = INTENT_TOKENIZER(text, return_tensors="pt")
+    outputs = INTENT_MODEL(**inputs)
+    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    confidence, pred_id = torch.max(probs, dim=-1)
+    id2label = {v: k for k, v in LABEL2ID.items()}
+    return id2label[pred_id.item()], confidence.item()
+async def generate_response(text):
+    messages = [{"role": "user", "content": text}]
+    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
+    eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
+    input_ids = encodeds.to(model.device)
+    attention_mask = (input_ids != tokenizer.pad_token_id).long()
+    with torch.no_grad():
+        output = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            max_new_tokens=128,
+            do_sample=USE_SAMPLING,
+            eos_token_id=eos_token,
+            pad_token_id=tokenizer.pad_token_id,
+            return_dict_in_generate=True,
+            output_scores=True
+        )
+    if not USE_SAMPLING:
+        scores = torch.stack(output.scores, dim=1)
+        probs = torch.nn.functional.softmax(scores[0], dim=-1)
+        top_conf = probs.max().item()
+    else:
+        top_conf = None
+    decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
+    for tag in ["assistant", "<|im_start|>assistant"]:
+        start = decoded.find(tag)
+        if start != -1:
+            decoded = decoded[start + len(tag):].strip()
+            break
+    return decoded, top_conf
+def extract_parameters(variables_list, user_input):
+    for pattern in variables_list:
+        regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
+        match = re.match(regex, user_input)
+        if match:
+            return [{"key": k, "value": v} for k, v in match.groupdict().items()]
+    return []
+def execute_intent(intent_name, user_input):
+    if intent_name in INTENT_DEFINITIONS:
+        definition = INTENT_DEFINITIONS[intent_name]
+        variables = extract_parameters(definition.get("variables", []), user_input)
+        log(f"🚀 execute_intent('{intent_name}', {variables})")
+        return {"intent": intent_name, "parameters": variables}
+    return {"intent": intent_name, "parameters": []}
 @app.post("/chat")
 async def chat(msg: Message):
     user_input = msg.user_input.strip()
         if INTENT_MODEL:
             intent_task = asyncio.create_task(detect_intent(user_input))
             response_task = asyncio.create_task(generate_response(user_input))
+            intent, intent_conf = await intent_task
+            log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
+            if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
                 result = execute_intent(intent, user_input)
                 return result
             else:
+                response, response_conf = await response_task
+                if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
+                    return {"response": random.choice(FALLBACK_ANSWERS)}
+                return {"response": response}
         else:
+            response, response_conf = await generate_response(user_input)
+            if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
                 return {"response": random.choice(FALLBACK_ANSWERS)}
+            return {"response": response}
     except Exception as e:
         traceback.print_exc()
         return JSONResponse(content={"error": str(e)}, status_code=500)
+def log(message):
+    timestamp = datetime.now().strftime("%H:%M:%S")
+    print(f"[{timestamp}] {message}", flush=True)
+def setup_model():
+    global model, tokenizer, eos_token_id
+    try:
+        log("🧠 setup_model() başladı")
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
+        model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
+        tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
+        model.config.pad_token_id = tokenizer.pad_token_id
+        eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
+        model.eval()
+        log("✅ Ana model yüklendi")
+    except Exception as e:
+        log(f"❌ setup_model() hatası: {e}")
+        traceback.print_exc()
+threading.Thread(target=setup_model, daemon=True).start()
+threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start()
+while True:
+    time.sleep(60)