Spaces:

chen666-666
/

wechat-ner-re

Sleeping

App Files Files Community

chen666-666 commited on Apr 16

Commit

4cd7a63

1 Parent(s): 1d3964d

add app.py and requirements.txt

Browse files

Files changed (1) hide show

app.py +29 -30

app.py CHANGED Viewed

@@ -240,15 +240,15 @@ def convert_telegram_json_to_eval_format(path):
 def evaluate_ner_model(data, model_type):
-    y_true, y_pred = [], []
-    POS_TOLERANCE = 1  # 允许的位置误差
     for item in data:
         text = item["text"]
         gold_entities = []
         for e in item.get("entities", []):
             if "text" in e and "type" in e:
-                # 标准化标签
                 norm_type = LABEL_MAPPING.get(e["type"], e["type"])
                 gold_entities.append({
                     "text": e["text"],
@@ -257,39 +257,38 @@ def evaluate_ner_model(data, model_type):
                     "end": e.get("end", -1)
                 })
         pred_entities, _ = ner(text, model_type)
-        # 构建对比集合
-        all_entities = set()
-        # 处理标注数据
-        for g in gold_entities:
-            key = f"{g['text']}|{g['type']}|{g['start']}|{g['end']}"
-            all_entities.add(key)
-        # 处理预测结果
-        pred_set = set()
-        for p in pred_entities:
-            # 允许位置误差
-            matched = False
-            for g in gold_entities:
-                if (p["text"] == g["text"] and
-                        p["type"] == g["type"] and
-                        abs(p["start"] - g["start"]) <= POS_TOLERANCE and
-                        abs(p["end"] - g["end"]) <= POS_TOLERANCE):
-                    matched = True
                     break
-            pred_set.add(matched)
-        # 构建指标
-        y_true.extend([1] * len(gold_entities))
-        y_pred.extend([1 if m else 0 for m in pred_set])
-    if not y_true:
-        return "⚠️ 无有效标注数据"
-    return (f"Precision: {precision_score(y_true, y_pred, zero_division=0):.2f}\n"
-            f"Recall: {recall_score(y_true, y_pred, zero_division=0):.2f}\n"
-            f"F1: {f1_score(y_true, y_pred, zero_division=0):.2f}")
 def auto_annotate(file, model_type):
     data = convert_telegram_json_to_eval_format(file.name)

 def evaluate_ner_model(data, model_type):
+    tp, fp, fn = 0, 0, 0
+    POS_TOLERANCE = 1
     for item in data:
         text = item["text"]
+        # 处理标注数据
         gold_entities = []
         for e in item.get("entities", []):
             if "text" in e and "type" in e:
                 norm_type = LABEL_MAPPING.get(e["type"], e["type"])
                 gold_entities.append({
                     "text": e["text"],
                     "end": e.get("end", -1)
                 })
+        # 获取预测结果
         pred_entities, _ = ner(text, model_type)
+        # 初始化匹配状态
+        matched_gold = [False] * len(gold_entities)
+        matched_pred = [False] * len(pred_entities)
+        # 遍历预测实体寻找匹配
+        for p_idx, p in enumerate(pred_entities):
+            for g_idx, g in enumerate(gold_entities):
+                if not matched_gold[g_idx] and \
+                        p["text"] == g["text"] and \
+                        p["type"] == g["type"] and \
+                        abs(p["start"] - g["start"]) <= POS_TOLERANCE and \
+                        abs(p["end"] - g["end"]) <= POS_TOLERANCE:
+                    matched_gold[g_idx] = True
+                    matched_pred[p_idx] = True
                     break
+        # 统计指标
+        tp += sum(matched_pred)
+        fp += len(pred_entities) - sum(matched_pred)
+        fn += len(gold_entities) - sum(matched_gold)
+    # 处理除零情况
+    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
+    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
+    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+    return (f"Precision: {precision:.2f}\n"
+            f"Recall: {recall:.2f}\n"
+            f"F1: {f1:.2f}")
 def auto_annotate(file, model_type):
     data = convert_telegram_json_to_eval_format(file.name)