chen666-666 commited on
Commit
1d3964d
·
1 Parent(s): 6129c00

add app.py and requirements.txt

Browse files
Files changed (1) hide show
  1. app.py +26 -9
app.py CHANGED
@@ -63,13 +63,6 @@ def visualize_kg_text():
63
  edges = [f"{h} --[{r}]-> {t}" for h, t, r in knowledge_graph["relations"]]
64
  return "\n".join(["📌 实体:"] + nodes + ["", "📎 关系:"] + edges)
65
 
66
-
67
- def visualize_kg_text():
68
- nodes = [f"{ent[0]} ({ent[1]})" for ent in knowledge_graph["entities"]]
69
- edges = [f"{h} --[{r}]-> {t}" for h, t, r in knowledge_graph["relations"]]
70
- return "\n".join(["📌 实体:"] + nodes + ["", "📎 关系:"] + edges)
71
-
72
-
73
  # ======================== 实体识别(NER) ========================
74
  def merge_adjacent_entities(entities):
75
  merged = []
@@ -97,9 +90,32 @@ def merge_adjacent_entities(entities):
97
  def ner(text, model_type="bert"):
98
  start_time = time.time()
99
  if model_type == "chatglm" and use_chatglm:
100
- # ... [原有ChatGLM代码保持不变] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- # 修改6:优化BERT模型处理流程
103
  raw_results = bert_ner_pipeline(text)
104
  entities = []
105
  for r in raw_results:
@@ -115,6 +131,7 @@ def ner(text, model_type="bert"):
115
  entities = merge_adjacent_entities(entities)
116
  return entities, time.time() - start_time
117
 
 
118
  # ======================== 关系抽取(RE) ========================
119
  def re_extract(entities, text):
120
  # 修改7:添加实体类型过滤
 
63
  edges = [f"{h} --[{r}]-> {t}" for h, t, r in knowledge_graph["relations"]]
64
  return "\n".join(["📌 实体:"] + nodes + ["", "📎 关系:"] + edges)
65
 
 
 
 
 
 
 
 
66
  # ======================== 实体识别(NER) ========================
67
  def merge_adjacent_entities(entities):
68
  merged = []
 
90
  def ner(text, model_type="bert"):
91
  start_time = time.time()
92
  if model_type == "chatglm" and use_chatglm:
93
+ try:
94
+ prompt = f"""请从以下文本中识别所有实体,严格按照JSON列表格式返回,每个实体包含text、type、start、end字段:
95
+ 示例:[{{"text": "北京", "type": "LOC", "start": 0, "end": 2}}]
96
+ 文本:{text}"""
97
+ response = chatglm_model.chat(chatglm_tokenizer, prompt, temperature=0.1)
98
+ if isinstance(response, tuple):
99
+ response = response[0]
100
+
101
+ # 增强 JSON 解析
102
+ try:
103
+ json_str = re.search(r'\[.*\]', response, re.DOTALL).group()
104
+ entities = json.loads(json_str)
105
+ # 验证字段
106
+ valid_entities = []
107
+ for ent in entities:
108
+ if all(k in ent for k in ("text", "type", "start", "end")):
109
+ valid_entities.append(ent)
110
+ return valid_entities, time.time() - start_time
111
+ except Exception as e:
112
+ print(f"JSON 解析失败: {e}")
113
+ return [], time.time() - start_time
114
+ except Exception as e:
115
+ print(f"ChatGLM 调用失败:{e}")
116
+ return [], time.time() - start_time
117
 
118
+ # 使用微调的 BERT 中文 NER 模型
119
  raw_results = bert_ner_pipeline(text)
120
  entities = []
121
  for r in raw_results:
 
131
  entities = merge_adjacent_entities(entities)
132
  return entities, time.time() - start_time
133
 
134
+
135
  # ======================== 关系抽取(RE) ========================
136
  def re_extract(entities, text):
137
  # 修改7:添加实体类型过滤