Spaces:
Sleeping
Sleeping
Commit
·
1d3964d
1
Parent(s):
6129c00
add app.py and requirements.txt
Browse files
app.py
CHANGED
@@ -63,13 +63,6 @@ def visualize_kg_text():
|
|
63 |
edges = [f"{h} --[{r}]-> {t}" for h, t, r in knowledge_graph["relations"]]
|
64 |
return "\n".join(["📌 实体:"] + nodes + ["", "📎 关系:"] + edges)
|
65 |
|
66 |
-
|
67 |
-
def visualize_kg_text():
|
68 |
-
nodes = [f"{ent[0]} ({ent[1]})" for ent in knowledge_graph["entities"]]
|
69 |
-
edges = [f"{h} --[{r}]-> {t}" for h, t, r in knowledge_graph["relations"]]
|
70 |
-
return "\n".join(["📌 实体:"] + nodes + ["", "📎 关系:"] + edges)
|
71 |
-
|
72 |
-
|
73 |
# ======================== 实体识别(NER) ========================
|
74 |
def merge_adjacent_entities(entities):
|
75 |
merged = []
|
@@ -97,9 +90,32 @@ def merge_adjacent_entities(entities):
|
|
97 |
def ner(text, model_type="bert"):
|
98 |
start_time = time.time()
|
99 |
if model_type == "chatglm" and use_chatglm:
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
-
#
|
103 |
raw_results = bert_ner_pipeline(text)
|
104 |
entities = []
|
105 |
for r in raw_results:
|
@@ -115,6 +131,7 @@ def ner(text, model_type="bert"):
|
|
115 |
entities = merge_adjacent_entities(entities)
|
116 |
return entities, time.time() - start_time
|
117 |
|
|
|
118 |
# ======================== 关系抽取(RE) ========================
|
119 |
def re_extract(entities, text):
|
120 |
# 修改7:添加实体类型过滤
|
|
|
63 |
edges = [f"{h} --[{r}]-> {t}" for h, t, r in knowledge_graph["relations"]]
|
64 |
return "\n".join(["📌 实体:"] + nodes + ["", "📎 关系:"] + edges)
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
# ======================== 实体识别(NER) ========================
|
67 |
def merge_adjacent_entities(entities):
|
68 |
merged = []
|
|
|
90 |
def ner(text, model_type="bert"):
|
91 |
start_time = time.time()
|
92 |
if model_type == "chatglm" and use_chatglm:
|
93 |
+
try:
|
94 |
+
prompt = f"""请从以下文本中识别所有实体,严格按照JSON列表格式返回,每个实体包含text、type、start、end字段:
|
95 |
+
示例:[{{"text": "北京", "type": "LOC", "start": 0, "end": 2}}]
|
96 |
+
文本:{text}"""
|
97 |
+
response = chatglm_model.chat(chatglm_tokenizer, prompt, temperature=0.1)
|
98 |
+
if isinstance(response, tuple):
|
99 |
+
response = response[0]
|
100 |
+
|
101 |
+
# 增强 JSON 解析
|
102 |
+
try:
|
103 |
+
json_str = re.search(r'\[.*\]', response, re.DOTALL).group()
|
104 |
+
entities = json.loads(json_str)
|
105 |
+
# 验证字段
|
106 |
+
valid_entities = []
|
107 |
+
for ent in entities:
|
108 |
+
if all(k in ent for k in ("text", "type", "start", "end")):
|
109 |
+
valid_entities.append(ent)
|
110 |
+
return valid_entities, time.time() - start_time
|
111 |
+
except Exception as e:
|
112 |
+
print(f"JSON 解析失败: {e}")
|
113 |
+
return [], time.time() - start_time
|
114 |
+
except Exception as e:
|
115 |
+
print(f"ChatGLM 调用失败:{e}")
|
116 |
+
return [], time.time() - start_time
|
117 |
|
118 |
+
# 使用微调的 BERT 中文 NER 模型
|
119 |
raw_results = bert_ner_pipeline(text)
|
120 |
entities = []
|
121 |
for r in raw_results:
|
|
|
131 |
entities = merge_adjacent_entities(entities)
|
132 |
return entities, time.time() - start_time
|
133 |
|
134 |
+
|
135 |
# ======================== 关系抽取(RE) ========================
|
136 |
def re_extract(entities, text):
|
137 |
# 修改7:添加实体类型过滤
|