MuhanGao's picture
Upload 7 files
1e4cfbb verified
raw
history blame
50 kB
import streamlit as st
import pandas as pd
import numpy as np
import os
import json
import gzip
import re
from urllib.parse import quote, unquote
# Updated CSS styles to use default background
CUSTOM_CSS = """
<style>
/* Set default background color */
body {
background-color: white !important;
}
.stApp {
background-color: white !important;
}
h1 {
color: #2E4053;
font-family: 'Helvetica Neue', sans-serif;
font-size: 2.8rem !important;
border-bottom: 3px solid #3498DB;
padding-bottom: 0.3em;
}
h2, h3, h4 {
color: #2C3E50 !important;
font-family: 'Arial Rounded MT Bold', sans-serif;
}
.metric-card {
background: linear-gradient(145deg, #F8F9FA 0%, #FFFFFF 100%);
border-radius: 12px;
padding: 1.2rem;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
border: 1px solid #E0E7FF;
transition: transform 0.2s;
}
.metric-card:hover {
transform: translateY(-2px);
}
.citation-badge:hover::after,
.influential-badge:hover::after {
content: attr(title);
position: absolute;
bottom: calc(100% + 5px);
left: 50%;
transform: translateX(-50%);
background-color: rgba(0, 0, 0, 0.8);
color: #fff;
padding: 5px 10px;
border-radius: 4px;
white-space: nowrap;
z-index: 100;
opacity: 0;
pointer-events: none;
transition: opacity 0.3s ease;
}
.citation-badge:hover::after,
.influential-badge:hover::after {
opacity: 1;
}
.path-nav {
color: #6C757D;
font-size: 0.95rem;
padding: 0.8rem 1rem;
background: #F8F9FA;
border-radius: 8px;
margin: 0.5rem 0; /* 减少上下margin */
}
.stButton>button {
background: #3498DB !important;
color: white !important;
border-radius: 8px !important;
padding: 8px 20px !important;
border: none !important;
transition: all 0.3s !important;
}
.stButton>button:hover {
background: #2980B9 !important;
transform: scale(1.05);
box-shadow: 0 4px 8px rgba(52, 152, 219, 0.3);
}
.paper-card, .cluster-card {
background: white;
border-radius: 10px;
padding: 1.5rem;
margin: 1rem 0;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
border: 1px solid #EAEDF3;
overflow: hidden;
}
/* 调整标题的字号 - 增大cluster title */
.paper-title, .cluster-title {
color: #2C3E50;
font-size: 1.3rem !important; /* 增大原来的字号 */
font-weight: 700; /* 加粗 */
margin-bottom: 0.5rem;
cursor: pointer;
}
.paper-abstract, .cluster-abstract {
color: #6C757D;
line-height: 1.6;
font-size: 0.95rem;
margin: 1rem 0;
padding: 0.8rem;
background: #F9FAFB;
border-radius: 8px;
border-left: 4px solid #3498DB;
}
/* 减少expander之间的间距 */
.streamlit-expanderHeader {
font-weight: 600 !important;
color: #2C3E50 !important;
margin-top: 0.5rem !important;
margin-bottom: 0.5rem !important;
}
/* 调整expander的内部和外部间距 */
.streamlit-expander {
margin-top: 0.5rem !important;
margin-bottom: 0.5rem !important;
}
/* 更紧凑的expander内容区 */
.streamlit-expanderContent {
background: #FAFAFA;
border-radius: 0 0 8px 8px;
border: 1px solid #EAEDF3;
border-top: none;
padding: 8px 12px !important; /* 减少内部padding */
}
/* Additional styles */
.paper-section, .cluster-section {
margin-top: 20px;
padding: 15px;
border-radius: 8px;
background: #FAFAFA;
border-left: 4px solid #3498DB;
}
.paper-section-title, .cluster-section-title {
color: #2C3E50;
font-weight: 600;
margin-bottom: 10px;
border-bottom: 2px solid #EEE;
padding-bottom: 5px;
}
.section-problem {
border-left-color: #3498DB;
}
.section-solution {
border-left-color: #2ECC71;
}
.section-results {
border-left-color: #9B59B6;
}
.label {
font-weight: 600;
color: #34495E;
margin-bottom: 5px;
}
.value-box {
background: #F8F9FA;
padding: 10px;
border-radius: 5px;
margin-bottom: 10px;
font-size: 0.95rem;
color: #333;
line-height: 1.5;
}
/* Citation badge styles */
.citation-badge, .influential-badge {
display: inline-flex;
align-items: center;
padding: 4px 8px;
border-radius: 6px;
font-size: 0.85rem;
font-weight: 600;
gap: 4px;
white-space: nowrap;
}
.citation-badge {
background: #EBF5FB;
color: #2980B9;
}
.influential-badge {
background: #FCF3CF;
color: #F39C12;
}
.citation-icon, .influential-icon {
font-size: 1rem;
}
/* 修改后的引用统计格式 */
.citation-stats, .influential-stats {
display: flex;
align-items: center;
padding: 4px 12px;
border-radius: 6px;
font-size: 0.85rem;
margin-bottom: 6px;
white-space: nowrap;
}
.citation-stats {
background: #EBF5FB;
color: #2980B9;
}
.influential-stats {
background: #FCF3CF;
color: #F39C12;
}
.stats-divider {
margin: 0 6px;
color: rgba(0,0,0,0.2);
}
/* Field of study badge */
.field-badge {
display: inline-block;
background: #F1F8E9;
color: #558B2F;
padding: 3px 10px;
border-radius: 16px;
font-size: 0.75rem;
font-weight: 500;
border: 1px solid #C5E1A5;
}
/* JSON value display */
.json-value {
background: #F8F9FA;
padding: 10px;
border-radius: 6px;
margin-bottom: 10px;
white-space: pre-wrap;
font-family: monospace;
font-size: 0.9rem;
line-height: 1.5;
color: #2C3E50;
overflow-x: auto;
}
/* Collapsible content */
.cluster-content {
display: none;
}
.cluster-content.show {
display: block;
}
/* 重新设计集群标题区布局 */
.cluster-header {
display: flex;
flex-wrap: wrap;
justify-content: space-between;
align-items: center;
padding-bottom: 10px;
border-bottom: 1px solid #eee;
margin-bottom: 0px;
}
/* 左侧标题和集群信息 */
.cluster-header-left {
display: flex;
align-items: center;
flex: 1;
min-width: 200px;
}
/* 中间区域用于摘要展开器 */
.cluster-header-middle {
display: flex;
flex: 0 0 auto;
margin: 0 15px;
}
/* 右侧统计数据 */
.cluster-badge-container {
display: flex;
flex-wrap: wrap;
gap: 6px;
justify-content: flex-end;
}
/* 子集群查看按钮 */
.view-button {
margin-left: 15px;
}
/* 调整h3标题的上下margin */
h3 {
margin-top: 1rem !important;
margin-bottom: 0.5rem !important;
}
/* 调整内容区块的上下margin */
.stBlock {
margin-top: 0.5rem !important;
margin-bottom: 0.5rem !important;
}
/* 内联expander按钮样式 */
.inline-expander-button {
background: #E3F2FD;
border: 1px solid #BBDEFB;
border-radius: 4px;
padding: 4px 8px;
font-size: 0.85rem;
color: #1976D2;
cursor: pointer;
display: inline-flex;
align-items: center;
transition: all 0.2s;
}
.inline-expander-button:hover {
background: #BBDEFB;
}
/* 导航路径中的按钮样式 */
.path-nav-button {
display: inline-block;
margin: 0 5px;
padding: 5px 10px;
background: #E3F2FD;
border-radius: 5px;
color: #1976D2;
cursor: pointer;
font-weight: 500;
font-size: 0.9rem;
border: none;
transition: all 0.2s;
}
.path-nav-button:hover {
background: #BBDEFB;
}
/* 路径导航容器样式 */
.path-nav {
color: #6C757D;
font-size: 0.95rem;
padding: 0.8rem 1rem;
background: #F8F9FA;
border-radius: 8px;
margin: 0.8rem 0;
}
/* Paper count badge style */
.paper-count-badge {
display: inline-flex;
align-items: center;
margin-left: 12px;
background: #E8F4FD;
color: #2980B9;
padding: 3px 8px;
border-radius: 12px;
font-size: 0.85rem;
font-weight: 500;
}
</style>
<script>
function toggleClusterContent(id) {
const content = document.getElementById('cluster-content-' + id);
if (content) {
content.classList.toggle('show');
}
}
</script>
"""
def get_hierarchy_files():
hierarchy_dir = 'hierarchies'
if not os.path.exists(hierarchy_dir):
return []
files = [f for f in os.listdir(hierarchy_dir) if f.endswith('.json')]
print(f"Found files: {files}")
return files
def parse_filename(filename):
"""Parse hierarchy filename to extract metadata using improved patterns."""
filename = filename.replace('.json', '')
parts = filename.split('_')
# Basic fields that should be consistent
if len(parts) < 6:
return {
'date': 'Unknown',
'embedder': 'Unknown',
'summarizer': 'Unknown',
'clustermethod': 'Unknown',
'contribution_type': 'Unknown',
'building_method': 'Unknown',
'clusterlevel': 'Unknown',
'clusterlevel_array': [],
'level_count': 0,
'random_seed': 'Unknown'
}
# These are consistent across formats
date_str = parts[1]
embedder = parts[2]
summarizer = parts[3]
clustermethod = parts[4]
# parts[5] is typically "emb" placeholder
contribution_type = parts[6]
# Special handling for building methods
# Check for compound building methods
building_method = None
clusterlevel_str = None
seed = None
# Handle different cases for building method and what follows
if len(parts) > 7:
if parts[7] == "bidirectional":
building_method = "bidirectional"
if len(parts) > 8:
# The cluster level is next
clusterlevel_str = parts[8]
if len(parts) > 9:
seed = parts[9]
elif parts[7] == "top" and len(parts) > 8 and parts[8] == "down":
building_method = "top_down"
if len(parts) > 9:
clusterlevel_str = parts[9]
if len(parts) > 10:
seed = parts[10]
elif parts[7] == "bottom" and len(parts) > 8 and parts[8] == "up":
building_method = "bottom_up"
if len(parts) > 9:
clusterlevel_str = parts[9]
if len(parts) > 10:
seed = parts[10]
# Default case - building method is not compound
else:
building_method = parts[7]
if len(parts) > 8:
clusterlevel_str = parts[8]
if len(parts) > 9:
seed = parts[9]
# Format date with slashes for better readability
formatted_date = f"{date_str[:4]}/{date_str[4:6]}/{date_str[6:]}" if len(date_str) == 8 else date_str
# Process cluster levels
clusterlevel_array = clusterlevel_str.split('-') if clusterlevel_str else []
level_count = len(clusterlevel_array)
return {
'date': formatted_date,
'embedder': embedder,
'summarizer': summarizer,
'clustermethod': clustermethod,
'contribution_type': contribution_type,
'building_method': building_method or 'Unknown',
'clusterlevel': clusterlevel_str or 'Unknown',
'clusterlevel_array': clusterlevel_array,
'level_count': level_count,
'random_seed': seed or 'Unknown'
}
def format_hierarchy_option(filename):
info = parse_filename(filename)
levels_str = "×".join(info['clusterlevel_array'])
return f"{info['date']} - {info['clustermethod']} ({info['embedder']}/{info['summarizer']}, {info['contribution_type']}, {info['building_method']}, {info['level_count']} levels: {levels_str}, seed: {info['random_seed']})"
@st.cache_data
def load_hierarchy_data(filename):
"""Load hierarchy data with support for compressed files"""
filepath = os.path.join('hierarchies', filename)
# 检查是否存在未压缩版本
if os.path.exists(filepath):
with open(filepath, 'r') as f:
return json.load(f)
# 检查是否存在 gzip 压缩版本
gzip_filepath = filepath + '.gz'
if os.path.exists(gzip_filepath):
try:
with gzip.open(gzip_filepath, 'rt') as f:
return json.load(f)
except Exception as e:
st.error(f"Error loading compressed file {gzip_filepath}: {str(e)}")
return {"clusters": []}
st.error(f"Could not find hierarchy file: {filepath} or {gzip_filepath}")
return {"clusters": []}
def get_cluster_statistics(clusters):
"""获取集群统计信息,包括悬停提示"""
def count_papers(node):
if "children" not in node:
return 0
children = node["children"]
if not children:
return 0
if "paper_id" in children[0]:
return len(children)
return sum(count_papers(child) for child in children)
cluster_count = len(clusters)
paper_counts = []
for cluster, _ in clusters:
paper_count = count_papers(cluster)
paper_counts.append(paper_count)
if paper_counts:
total_papers = sum(paper_counts)
average_papers = total_papers / cluster_count if cluster_count > 0 else 0
return {
'Total Clusters': {'value': cluster_count, 'tooltip': 'Total number of clusters at this level'},
'Total Papers': {'value': total_papers, 'tooltip': 'Total number of papers across all clusters at this level'},
'Average Papers per Cluster': {'value': round(average_papers, 2), 'tooltip': 'Average number of papers per cluster'},
'Median Papers': {'value': round(np.median(paper_counts), 2), 'tooltip': 'Median number of papers per cluster'},
'Standard Deviation': {'value': round(np.std(paper_counts), 2), 'tooltip': 'Standard deviation of paper counts across clusters'},
'Max Papers in Cluster': {'value': max(paper_counts), 'tooltip': 'Maximum number of papers in any single cluster'},
'Min Papers in Cluster': {'value': min(paper_counts), 'tooltip': 'Minimum number of papers in any single cluster'}
}
return {
'Total Clusters': {'value': cluster_count, 'tooltip': 'Total number of clusters at this level'},
'Total Papers': {'value': 0, 'tooltip': 'Total number of papers across all clusters at this level'},
'Average Papers per Cluster': {'value': 0, 'tooltip': 'Average number of papers per cluster'},
'Median Papers': {'value': 0, 'tooltip': 'Median number of papers per cluster'},
'Standard Deviation': {'value': 0, 'tooltip': 'Standard deviation of paper counts across clusters'},
'Max Papers in Cluster': {'value': 0, 'tooltip': 'Maximum number of papers in any single cluster'},
'Min Papers in Cluster': {'value': 0, 'tooltip': 'Minimum number of papers in any single cluster'}
}
def calculate_citation_metrics(node):
"""Calculate total, average, and maximum citation and influential citation counts for a cluster."""
total_citations = 0
total_influential_citations = 0
paper_count = 0
citation_values = [] # 存储每篇论文的引用数
influential_citation_values = [] # 存储每篇论文的有影响力引用数
def process_node(n):
nonlocal total_citations, total_influential_citations, paper_count
if "children" not in n or n["children"] is None:
return
children = n["children"]
if not children:
return
# If this node contains papers directly
if children and len(children) > 0 and isinstance(children[0], dict) and "paper_id" in children[0]:
for paper in children:
if not isinstance(paper, dict):
continue
semantic_scholar = paper.get('semantic_scholar', {}) or {}
citations = semantic_scholar.get('citationCount', 0)
influential_citations = semantic_scholar.get('influentialCitationCount', 0)
total_citations += citations
total_influential_citations += influential_citations
paper_count += 1
citation_values.append(citations)
influential_citation_values.append(influential_citations)
else:
# Recursively process child clusters
for child in children:
if isinstance(child, dict):
process_node(child)
process_node(node)
# 计算平均值和最大值
avg_citations = round(total_citations / paper_count, 2) if paper_count > 0 else 0
avg_influential_citations = round(total_influential_citations / paper_count, 2) if paper_count > 0 else 0
max_citations = max(citation_values) if citation_values else 0
max_influential_citations = max(influential_citation_values) if influential_citation_values else 0
return {
'total_citations': total_citations,
'avg_citations': avg_citations,
'max_citations': max_citations,
'total_influential_citations': total_influential_citations,
'avg_influential_citations': avg_influential_citations,
'max_influential_citations': max_influential_citations,
'paper_count': paper_count
}
def find_clusters_in_path(data, path):
"""Find clusters or papers at the given path in the hierarchy."""
if not data or "clusters" not in data:
return []
clusters = data["clusters"]
current_clusters = []
if not path:
return [(cluster, []) for cluster in clusters]
current = clusters
for i, p in enumerate(path):
found = False
for cluster in current:
if cluster.get("cluster_id") == p:
if "children" not in cluster or not cluster["children"]:
# No children found, return empty list
return []
current = cluster["children"]
found = True
if i == len(path) - 1:
# We're at the target level
if current and len(current) > 0 and isinstance(current[0], dict) and "paper_id" in current[0]:
# This level contains papers
return [(paper, path) for paper in current]
else:
# This level contains subclusters
current_clusters = []
for c in current:
if isinstance(c, dict):
cluster_id = c.get("cluster_id")
if cluster_id is not None:
current_clusters.append((c, path + [cluster_id]))
return current_clusters
break
if not found:
# Path segment not found
return []
return current_clusters
def parse_json_abstract(abstract_text):
"""Parse JSON formatted abstract string into a beautifully formatted HTML string"""
try:
abstract_json = json.loads(abstract_text)
# Create a formatted display for the structured abstract
if "Problem" in abstract_json:
problem = abstract_json["Problem"]
return f"""
<div class='section-problem paper-section'>
<div class='paper-section-title'>Problem</div>
<div class='label'>Domain:</div>
<div class='value-box'>{problem.get('overarching problem domain', 'N/A')}</div>
<div class='label'>Challenges:</div>
<div class='value-box'>{problem.get('challenges/difficulties', 'N/A')}</div>
<div class='label'>Goal:</div>
<div class='value-box'>{problem.get('research question/goal', 'N/A')}</div>
</div>
"""
return abstract_text
except (json.JSONDecodeError, ValueError, TypeError):
# If not valid JSON, return the original text
return abstract_text
def display_path_details(path, data, level_count):
if not path:
return
st.markdown("### Path Details")
current = data["clusters"]
# Dynamically generate level labels and containers
for i, cluster_id in enumerate(path):
# 修改这里:使用 i + 1 作为层级编号
level_number = i + 1 # 从1开始计算层级,顶层是Level 1
indent = i * 32 # Indent 32 pixels per level
for c in current:
if c["cluster_id"] == cluster_id:
# Create a container with proper indentation
st.markdown(f"""
<div style='margin-left: {indent}px; margin-bottom: 10px;'>
</div>
""", unsafe_allow_html=True)
# Add extra spacing at the bottom
st.markdown("<div style='margin-bottom: 25px;'></div>", unsafe_allow_html=True)
# Create a row with cluster name and level button
col1, col2 = st.columns([0.85, 0.15])
with col1:
st.markdown(f"""
<div style='display: flex; align-items: center;'>
<div style='width: 12px; height: 12px;
border-radius: 50%; background: #3B82F6;
margin-right: 8px;'></div>
<h4 style='font-size: 1.15rem; font-weight: 600;
color: #1F2937; margin: 0;'>
Cluster {c["cluster_id"]}: {c["title"]}
</h4>
</div>
""", unsafe_allow_html=True)
with col2:
button_clicked = st.button(f'Level {level_number}', key=f'level_btn_{i}_{c["cluster_id"]}')
if button_clicked:
st.session_state.path = path[:i]
new_params = {}
new_params['hierarchy'] = st.query_params['hierarchy']
if st.session_state.path:
new_params['path'] = st.session_state.path
st.query_params.clear()
for key, value in new_params.items():
if isinstance(value, list):
for v in value:
st.query_params[key] = v
else:
st.query_params[key] = value
st.rerun()
# Calculate left margin for expander content to align with the header
# Use an extra container with margin to create the indentation
with st.container():
st.markdown(f"""
<div style='margin-left: {indent}px; width: calc(100% - {indent}px);'>
</div>
""", unsafe_allow_html=True)
# Remove the key parameter that was causing the error
with st.expander("📄 Show Cluster Details", expanded=False):
# Parse abstract if it's in JSON format
abstract_content = parse_json_abstract(c["abstract"])
st.markdown(f"""
<div style='color: #374151; line-height: 1.6;'>
{abstract_content}
</div>
""", unsafe_allow_html=True)
current = c["children"]
break
def display_paper(item):
"""Display detailed paper information including problem, solution, and results with semantic scholar info"""
# Check for semantic scholar data with proper fallbacks
semantic_scholar = item.get('semantic_scholar', {}) or {}
url = semantic_scholar.get('url', '')
citation_count = semantic_scholar.get('citationCount', 0)
influential_citation_count = semantic_scholar.get('influentialCitationCount', 0)
fields_of_study = semantic_scholar.get('fieldsOfStudy', []) or []
# Generate field badges HTML
field_badges_html = ""
for field in fields_of_study:
field_badges_html += f"<span class='field-badge' title='Field of study'>{field}</span> "
# Basic information section with URL link and citation counts - Always visible
st.markdown(f"""
<div class='paper-card'>
<div style='display: flex; justify-content: space-between; align-items: flex-start;'>
<div class='paper-title' style='flex-grow: 1;'>
{item.get('title', 'Untitled Paper')}
<a href="{url}" target="_blank"
style='font-size: 0.9em; margin-left: 8px;
color: #3498DB; text-decoration: none;
transition: all 0.3s;'
title='View paper on Semantic Scholar'>
🔗
</a>
</div>
<div style='display: flex; align-items: center; gap: 12px;'>
<div class='citation-badge' title='Number of times this paper has been cited by other papers.'>
<span class='citation-icon'>⭐</span> Citations: {citation_count}
</div>
<div class='influential-badge' title='Number of times this paper has been cited by influential papers. Influential citation means that the cited publication has a significant impact on the citing publication.'>
<span class='influential-icon'>🔥</span> Influential Citations: {influential_citation_count}
</div>
</div>
</div>
""", unsafe_allow_html=True)
# One main expander for all detailed information - Default collapsed
with st.expander("📑 Show Detailed Information", expanded=False):
# Abstract section
st.markdown("""
<div style='margin-top: 15px; margin-bottom: 20px;'>
<h4 style='color: #2C3E50; border-bottom: 2px solid #3498DB; padding-bottom: 8px;'>
📄 Abstract
</h4>
</div>
""", unsafe_allow_html=True)
abstract_text = item.get('abstract', 'No abstract available')
st.markdown(f"<div class='paper-abstract'>{abstract_text}</div>", unsafe_allow_html=True)
# Problem section
if 'problem' in item and item['problem']:
st.markdown("""
<div style='margin-top: 25px; margin-bottom: 20px;'>
<h4 style='color: #2C3E50; border-bottom: 2px solid #3498DB; padding-bottom: 8px;'>
🔍 Problem Details
</h4>
</div>
""", unsafe_allow_html=True)
problem = item['problem']
cols = st.columns([1, 2])
with cols[0]:
st.markdown("""
<div style='font-weight: 600; color: #34495E; margin-bottom: 5px;'>
Problem Domain
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
Challenges/Difficulties
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
Research Question/Goal
</div>
""", unsafe_allow_html=True)
with cols[1]:
st.markdown(f"""
<div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
border-left: 4px solid #3498DB;'>
{problem.get('overarching problem domain', 'Not specified')}
</div>
""", unsafe_allow_html=True)
st.markdown(f"""
<div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
border-left: 4px solid #E74C3C; margin-top: 10px;'>
{problem.get('challenges/difficulties', 'Not specified')}
</div>
""", unsafe_allow_html=True)
st.markdown(f"""
<div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
border-left: 4px solid #2ECC71; margin-top: 10px;'>
{problem.get('research question/goal', 'Not specified')}
</div>
""", unsafe_allow_html=True)
# Solution section
if 'solution' in item and item['solution']:
st.markdown("""
<div style='margin-top: 25px; margin-bottom: 20px;'>
<h4 style='color: #2C3E50; border-bottom: 2px solid #2ECC71; padding-bottom: 8px;'>
💡 Solution Details
</h4>
</div>
""", unsafe_allow_html=True)
solution = item['solution']
cols = st.columns([1, 2])
with cols[0]:
st.markdown("""
<div style='font-weight: 600; color: #34495E; margin-bottom: 5px;'>
Solution Domain
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
Solution Approach
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
Novelty of Solution
</div>
""", unsafe_allow_html=True)
with cols[1]:
st.markdown(f"""
<div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
border-left: 4px solid #3498DB;'>
{solution.get('overarching solution domain', 'Not specified')}
</div>
""", unsafe_allow_html=True)
st.markdown(f"""
<div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
border-left: 4px solid #9B59B6; margin-top: 10px;'>
{solution.get('solution approach', 'Not specified')}
</div>
""", unsafe_allow_html=True)
st.markdown(f"""
<div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
border-left: 4px solid #F1C40F; margin-top: 10px;'>
{solution.get('novelty of the solution', 'Not specified')}
</div>
""", unsafe_allow_html=True)
# Results section
if 'results' in item and item['results']:
st.markdown("""
<div style='margin-top: 25px; margin-bottom: 20px;'>
<h4 style='color: #2C3E50; border-bottom: 2px solid #9B59B6; padding-bottom: 8px;'>
📊 Results Details
</h4>
</div>
""", unsafe_allow_html=True)
results = item['results']
cols = st.columns([1, 2])
with cols[0]:
st.markdown("""
<div style='font-weight: 600; color: #34495E; margin-bottom: 5px;'>
Findings/Results
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
Potential Impact
</div>
""", unsafe_allow_html=True)
with cols[1]:
st.markdown(f"""
<div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
border-left: 4px solid #3498DB;'>
{results.get('findings/results', 'Not specified')}
</div>
""", unsafe_allow_html=True)
st.markdown(f"""
<div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
border-left: 4px solid #E67E22; margin-top: 10px;'>
{results.get('potential impact of the results', 'Not specified')}
</div>
""", unsafe_allow_html=True)
# Author information
if 'semantic_scholar' in item and item['semantic_scholar'] and 'authors' in item['semantic_scholar'] and item['semantic_scholar']['authors']:
st.markdown("""
<div style='margin-top: 25px; margin-bottom: 20px;'>
<h4 style='color: #2C3E50; border-bottom: 2px solid #E67E22; padding-bottom: 8px;'>
👥 Authors
</h4>
</div>
""", unsafe_allow_html=True)
authors = item['semantic_scholar']['authors'] or []
for author in authors:
if not isinstance(author, dict):
continue
st.markdown(f"""
<div style='display: flex; margin-bottom: 15px; padding-bottom: 10px; border-bottom: 1px solid #eee;'>
<div style='flex: 1;'>
<div style='font-weight: 600; font-size: 1.05rem;'>{author.get('name', 'Unknown')}</div>
<div style='color: #666; margin-top: 3px;'>Author ID: {author.get('authorId', 'N/A')}</div>
</div>
<div style='display: flex; gap: 15px;'>
<div title='Papers'>
<span style='font-size: 0.85rem; color: #666;'>Papers</span>
<div style='font-weight: 600; color: #3498DB;'>{author.get('paperCount', 0)}</div>
</div>
<div title='Citations'>
<span style='font-size: 0.85rem; color: #666;'>Citations</span>
<div style='font-weight: 600; color: #3498DB;'>{author.get('citationCount', 0)}</div>
</div>
<div title='h-index'>
<span style='font-size: 0.85rem; color: #666;'>h-index</span>
<div style='font-weight: 600; color: #3498DB;'>{author.get('hIndex', 0)}</div>
</div>
</div>
</div>
""", unsafe_allow_html=True)
# Close paper-card div
st.markdown("</div>", unsafe_allow_html=True)
def display_cluster(item, path):
"""Display a collapsible cluster with citation metrics integrated into the header, including abstract expander and buttons"""
# Generate a unique ID for this cluster for the expander functionality
cluster_id = item['cluster_id']
unique_id = f"{cluster_id}_{'-'.join(map(str, path))}"
# Calculate citation metrics using the updated function
citation_metrics = calculate_citation_metrics(item)
# Parse the abstract
abstract_content = parse_json_abstract(item['abstract'])
# 根据是否包含子项来设置按钮文本和行为
has_children = "children" in item and item["children"]
if has_children:
count = citation_metrics['paper_count'] if "paper_id" in item["children"][0] else len(item["children"])
next_level_items = item["children"]
is_next_level_papers = len(next_level_items) > 0 and "paper_id" in next_level_items[0]
btn_text = f'View Papers ({count})' if is_next_level_papers else f'View Sub-clusters ({count})'
# 标题和论文数量显示 - 确保它们在同一水平线上
st.markdown(f"""
<div style='display: flex; align-items: center;'>
<div class='cluster-title' style='margin: 0; font-weight: 700; font-size: 1.3rem;'>
{item['title']}
</div>
<div style='display: inline-flex; align-items: center; margin-left: 12px;
background: #F4F6F9; color: #566573; padding: 2px 10px;
border-radius: 6px; font-size: 0.95rem; font-weight: 500;'>
<span style='margin-right: 4px;'>📑</span>{citation_metrics['paper_count']} papers
</div>
</div>
""", unsafe_allow_html=True)
# 使用两列布局
cols = st.columns([8, 2])
with cols[0]: # 统计数据区域
# 引用统计格式:使用管道符号分隔
st.markdown(f"""
<div>
<div class='citation-stats'>
<span style='font-weight: bold; margin-right: 5px;'>⭐</span> Citations:
Total {citation_metrics['total_citations']} <span class='stats-divider'>|</span>
Avg {citation_metrics['avg_citations']} <span class='stats-divider'>|</span>
Max {citation_metrics['max_citations']}
</div>
<div class='influential-stats'>
<span style='font-weight: bold; margin-right: 5px;'>🔥</span> Influential Citations:
Total {citation_metrics['total_influential_citations']} <span class='stats-divider'>|</span>
Avg {citation_metrics['avg_influential_citations']} <span class='stats-divider'>|</span>
Max {citation_metrics['max_influential_citations']}
</div>
</div>
""", unsafe_allow_html=True)
# 创建摘要展开器 - 修改文本为"Cluster Summary"
with st.expander("📄 Cluster Summary", expanded=False):
st.markdown(f"""
<div class='cluster-abstract'>{abstract_content}</div>
""", unsafe_allow_html=True)
with cols[1]: # 查看按钮
# 如果有子集群或论文,添加查看按钮
if has_children:
# 使用动态生成的按钮文本,而不是固定的"View Sub-Cluster"
if st.button(btn_text, key=f"btn_{unique_id}"):
st.session_state.path.append(item['cluster_id'])
st.rerun()
# 创建一个分隔线
st.markdown("<hr style='margin: 0.5rem 0; border-color: #eee;'>", unsafe_allow_html=True)
def main():
st.set_page_config(
layout="wide",
page_title="Paper Clusters Explorer",
initial_sidebar_state="expanded",
menu_items=None
)
# 设置浅色主题
st.markdown("""
<script>
var elements = window.parent.document.querySelectorAll('.stApp');
elements[0].classList.add('light');
elements[0].classList.remove('dark');
</script>
""", unsafe_allow_html=True)
st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
hierarchy_files = get_hierarchy_files()
if not hierarchy_files:
st.error("No hierarchy files found in /hierarchies directory")
return
# Manage file selection via query params
current_url = st.query_params.get('hierarchy', None)
current_file = unquote(current_url) + '.json' if current_url else None
hierarchy_options = {format_hierarchy_option(f): f for f in hierarchy_files}
selected_option = st.selectbox(
'Select Hierarchy',
options=list(hierarchy_options.keys()),
index=list(hierarchy_options.values()).index(current_file) if current_file else 0
)
selected_file = hierarchy_options[selected_option]
# Save selected file in query params
if selected_file != current_file:
st.query_params['hierarchy'] = quote(selected_file.replace('.json', ''))
data = load_hierarchy_data(selected_file)
info = parse_filename(selected_file)
# Hierarchy metadata and navigation state
with st.expander("📋 Hierarchy Metadata", expanded=False):
# Create a grid layout for metadata
col1, col2, col3 = st.columns(3)
with col1:
st.markdown(f"""
<div class='metric-card'>
<h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Date</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['date']}</p>
</div>
<div class='metric-card' style='margin-top: 10px;'>
<h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Clustering Method</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['clustermethod']}</p>
</div>
""", unsafe_allow_html=True)
with col2:
st.markdown(f"""
<div class='metric-card'>
<h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Embedder / Summarizer</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['embedder']} / {info['summarizer']}</p>
</div>
<div class='metric-card' style='margin-top: 10px;'>
<h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Contribution Type</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['contribution_type']}</p>
</div>
""", unsafe_allow_html=True)
with col3:
st.markdown(f"""
<div class='metric-card'>
<h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Building Method</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['building_method']}</p>
</div>
<div class='metric-card' style='margin-top: 10px;'>
<h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Cluster Levels</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['clusterlevel']} (Total: {info['level_count']})</p>
</div>
""", unsafe_allow_html=True)
if 'path' not in st.session_state:
path_params = st.query_params.get_all('path')
st.session_state.path = [p for p in path_params if p]
current_clusters = find_clusters_in_path(data, st.session_state.path)
current_level = len(st.session_state.path)
total_levels = info['level_count']
level_name = f'Level {current_level + 1}' if current_level < total_levels else 'Papers'
is_paper_level = current_level >= total_levels or (current_clusters and "paper_id" in current_clusters[0][0])
if not is_paper_level and current_clusters:
with st.expander("📊 Cluster Statistics", expanded=False):
stats = get_cluster_statistics(current_clusters)
# Create a 3x2 grid for six small metric cards
row1_col1, row1_col2, row1_col3 = st.columns(3)
row2_col1, row2_col2, row2_col3 = st.columns(3)
# Row 1 - First 3 metrics
with row1_col1:
st.markdown(f"""
<div class='metric-card' style='padding: 0.8rem;'>
<h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Total Clusters</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Total Clusters']['value']}</p>
</div>
""", unsafe_allow_html=True)
with row1_col2:
st.markdown(f"""
<div class='metric-card' style='padding: 0.8rem;'>
<h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Total Papers</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Total Papers']['value']}</p>
</div>
""", unsafe_allow_html=True)
with row1_col3:
st.markdown(f"""
<div class='metric-card' style='padding: 0.8rem;'>
<h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Avg Papers/Cluster</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Average Papers per Cluster']['value']}</p>
</div>
""", unsafe_allow_html=True)
# Row 2 - Next 3 metrics
with row2_col1:
st.markdown(f"""
<div class='metric-card' style='padding: 0.8rem; margin-bottom: 15px;'>
<h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Median Papers</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Median Papers']['value']}</p>
</div>
""", unsafe_allow_html=True)
with row2_col2:
st.markdown(f"""
<div class='metric-card' style='padding: 0.8rem; margin-bottom: 15px;'>
<h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Max Papers in Cluster</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Max Papers in Cluster']['value']}</p>
</div>
""", unsafe_allow_html=True)
with row2_col3:
st.markdown(f"""
<div class='metric-card' style='padding: 0.8rem; margin-bottom: 15px;'>
<h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Min Papers in Cluster</h4>
<p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Min Papers in Cluster']['value']}</p>
</div>
""", unsafe_allow_html=True)
# Back navigation button
if st.session_state.path:
if st.button('← Back', key='back_button'):
st.session_state.path.pop()
st.rerun()
# Current path display
if st.session_state.path:
# 获取路径上每个聚类的标题
path_info = []
current = data["clusters"]
# 构建路径中每个聚类的标题和层级信息
for i, cid in enumerate(st.session_state.path):
level_num = i + 1 # 从1开始的层级编号
for c in current:
if c["cluster_id"] == cid:
path_info.append((level_num, c["title"], c["cluster_id"]))
current = c["children"]
break
# 在Streamlit中创建路径导航
with st.container():
st.markdown("<h3 style='margin-top: 0.5rem; margin-bottom: 0.8rem;'>🗂️ Current Path</h3>", unsafe_allow_html=True)
# 🔝 添加 Root 入口
col1, col2 = st.columns([0.3, 0.7])
with col1:
st.markdown(f"<div><strong>Root:</strong></div>", unsafe_allow_html=True)
with col2:
if st.button("All Papers", key="root_button"):
st.session_state.path = []
st.rerun()
# 使用缩进显示路径层次结构
for i, (level_num, title, cluster_id) in enumerate(path_info):
col1, col2 = st.columns([0.3, 0.7])
with col1:
st.markdown(f"<div><strong>Level {level_num}:</strong></div>", unsafe_allow_html=True)
with col2:
# 创建用于返回到该级别的按钮
if st.button(f"{title}", key=f"lvl_{i}_{cluster_id}"):
# 当按钮被点击时,将路径截断到该级别
st.session_state.path = st.session_state.path[:i+1]
st.rerun()
# 内容展示标题
st.markdown(f"""
<h3 style='margin: 1rem 0 0.5rem 0; color: #2C3E50;'>
{'📑 Papers' if is_paper_level else '📂 ' + level_name}
</h3>
""", unsafe_allow_html=True)
for item, full_path in current_clusters:
if is_paper_level:
display_paper(item)
else:
display_cluster(item, full_path)
if __name__ == '__main__':
main()