Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -422,55 +422,135 @@ def re_extract(entities, text, use_bert_model=True):
|
|
422 |
# ======================== 文本分析主流程 ========================
|
423 |
def create_knowledge_graph(entities, relations):
|
424 |
"""
|
425 |
-
|
426 |
"""
|
427 |
-
|
428 |
-
|
429 |
-
# 去重实体
|
430 |
-
unique_entities = []
|
431 |
-
seen = set()
|
432 |
-
for e in entities:
|
433 |
-
if (e['text'], e['type']) not in seen:
|
434 |
-
seen.add((e['text'], e['type']))
|
435 |
-
unique_entities.append(e)
|
436 |
|
437 |
-
#
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
|
|
444 |
|
445 |
-
#
|
446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
|
448 |
-
#
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
454 |
|
455 |
-
#
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
}
|
464 |
-
|
465 |
-
icon = icons.get(e['type'], '•')
|
466 |
-
result.append(f"- {icon} **{e['text']}** `{e['type']}`")
|
467 |
-
|
468 |
-
# 添加关系
|
469 |
-
result.append("\n## 关系列表")
|
470 |
-
for r in unique_relations:
|
471 |
-
result.append(f"- **{r['head']}** ➡️ *{r['relation']}* ➡️ **{r['tail']}**")
|
472 |
|
473 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
|
475 |
def process_text(text, model_type="bert"):
|
476 |
"""
|
|
|
422 |
# ======================== 文本分析主流程 ========================
|
423 |
def create_knowledge_graph(entities, relations):
|
424 |
"""
|
425 |
+
创建交互式网络图形式的知识图谱
|
426 |
"""
|
427 |
+
# 创建一个新的网络图
|
428 |
+
net = Network(height="600px", width="100%", bgcolor="#ffffff", font_color="black", directed=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
+
# 设置实体类型的颜色映射
|
431 |
+
entity_colors = {
|
432 |
+
'PER': '#FF6B6B', # 人物-红色
|
433 |
+
'ORG': '#4ECDC4', # 组织-青色
|
434 |
+
'LOC': '#45B7D1', # 地点-蓝色
|
435 |
+
'TIME': '#96CEB4', # 时间-绿色
|
436 |
+
'TITLE': '#D4A5A5' # 职位-粉色
|
437 |
+
}
|
438 |
|
439 |
+
# 添加实体节点
|
440 |
+
added_nodes = set()
|
441 |
+
for entity in entities:
|
442 |
+
if entity['text'] not in added_nodes:
|
443 |
+
node_color = entity_colors.get(entity['type'], '#D3D3D3')
|
444 |
+
net.add_node(
|
445 |
+
entity['text'],
|
446 |
+
label=entity['text'],
|
447 |
+
title=f"类型: {entity['type']}",
|
448 |
+
color=node_color,
|
449 |
+
size=20,
|
450 |
+
font={'size': 16}
|
451 |
+
)
|
452 |
+
added_nodes.add(entity['text'])
|
453 |
|
454 |
+
# 添加关系边
|
455 |
+
for relation in relations:
|
456 |
+
if relation['head'] in added_nodes and relation['tail'] in added_nodes:
|
457 |
+
net.add_edge(
|
458 |
+
relation['head'],
|
459 |
+
relation['tail'],
|
460 |
+
label=relation['relation'],
|
461 |
+
title=relation['relation'],
|
462 |
+
arrows={'to': {'enabled': True, 'type': 'arrow'}},
|
463 |
+
color={'color': '#666666'},
|
464 |
+
font={'size': 12}
|
465 |
+
)
|
466 |
|
467 |
+
# 设置物理布局参数
|
468 |
+
net.set_options('''
|
469 |
+
{
|
470 |
+
"nodes": {
|
471 |
+
"shape": "dot",
|
472 |
+
"shadow": true
|
473 |
+
},
|
474 |
+
"edges": {
|
475 |
+
"smooth": {
|
476 |
+
"type": "continuous",
|
477 |
+
"forceDirection": "none"
|
478 |
+
},
|
479 |
+
"shadow": true
|
480 |
+
},
|
481 |
+
"physics": {
|
482 |
+
"barnesHut": {
|
483 |
+
"gravitationalConstant": -2000,
|
484 |
+
"centralGravity": 0.3,
|
485 |
+
"springLength": 200,
|
486 |
+
"springConstant": 0.04,
|
487 |
+
"damping": 0.09
|
488 |
+
},
|
489 |
+
"minVelocity": 0.75
|
490 |
+
},
|
491 |
+
"interaction": {
|
492 |
+
"hover": true,
|
493 |
+
"navigationButtons": true,
|
494 |
+
"keyboard": true
|
495 |
+
}
|
496 |
}
|
497 |
+
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
498 |
|
499 |
+
# 生成HTML文件
|
500 |
+
try:
|
501 |
+
# 创建临时目录(如果不存在)
|
502 |
+
temp_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "temp")
|
503 |
+
os.makedirs(temp_dir, exist_ok=True)
|
504 |
+
|
505 |
+
# 生成唯一的文件名
|
506 |
+
output_path = os.path.join(temp_dir, f"kg_{int(time.time())}.html")
|
507 |
+
|
508 |
+
# 保存图谱
|
509 |
+
net.save_graph(output_path)
|
510 |
+
|
511 |
+
# 读取生成的HTML文件内容
|
512 |
+
with open(output_path, 'r', encoding='utf-8') as f:
|
513 |
+
html_content = f.read()
|
514 |
+
|
515 |
+
# 删除临时文件
|
516 |
+
os.remove(output_path)
|
517 |
+
|
518 |
+
# 修改HTML内容以适应Gradio界面
|
519 |
+
html_content = html_content.replace('height: 600px', 'height: 700px')
|
520 |
+
|
521 |
+
# 添加图例
|
522 |
+
legend_html = f"""
|
523 |
+
<div style="margin-bottom: 10px; padding: 10px; background-color: #f8f9fa; border-radius: 5px;">
|
524 |
+
<div style="font-weight: bold; margin-bottom: 5px;">图例说明:</div>
|
525 |
+
<div style="display: flex; gap: 15px; flex-wrap: wrap;">
|
526 |
+
<div style="display: flex; align-items: center; gap: 5px;">
|
527 |
+
<div style="width: 15px; height: 15px; background: {entity_colors['PER']}; border-radius: 50%;"></div>
|
528 |
+
<span>人物 (PER)</span>
|
529 |
+
</div>
|
530 |
+
<div style="display: flex; align-items: center; gap: 5px;">
|
531 |
+
<div style="width: 15px; height: 15px; background: {entity_colors['ORG']}; border-radius: 50%;"></div>
|
532 |
+
<span>组织 (ORG)</span>
|
533 |
+
</div>
|
534 |
+
<div style="display: flex; align-items: center; gap: 5px;">
|
535 |
+
<div style="width: 15px; height: 15px; background: {entity_colors['LOC']}; border-radius: 50%;"></div>
|
536 |
+
<span>地点 (LOC)</span>
|
537 |
+
</div>
|
538 |
+
<div style="display: flex; align-items: center; gap: 5px;">
|
539 |
+
<div style="width: 15px; height: 15px; background: {entity_colors['TITLE']}; border-radius: 50%;"></div>
|
540 |
+
<span>职位 (TITLE)</span>
|
541 |
+
</div>
|
542 |
+
</div>
|
543 |
+
</div>
|
544 |
+
"""
|
545 |
+
|
546 |
+
# 将图例添加到HTML内容中
|
547 |
+
html_content = legend_html + html_content
|
548 |
+
|
549 |
+
return html_content
|
550 |
+
|
551 |
+
except Exception as e:
|
552 |
+
logging.error(f"生成知识图谱失败: {str(e)}")
|
553 |
+
return f"<div class='error'>生成知识图谱失败: {str(e)}</div>"
|
554 |
|
555 |
def process_text(text, model_type="bert"):
|
556 |
"""
|