MuhanGao commited on
Commit
78eb592
·
verified ·
1 Parent(s): bc62668

Delete src

Browse files
src/README.md DELETED
@@ -1,35 +0,0 @@
1
- ---
2
- title: Science Hierarchography
3
- emoji: 📚
4
- colorFrom: blue
5
- colorTo: indigo
6
- sdk: streamlit
7
- sdk_version: "1.41.1"
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- # Paper Clusters Explorer
13
-
14
- This Streamlit application visualizes research paper hierarchies, allowing exploration of clustered academic papers at different levels. Users can navigate through a hierarchical structure of paper clusters, view detailed paper information, and explore relationships between papers.
15
-
16
- ## Features
17
-
18
- - Browse hierarchical clusters of research papers
19
- - View detailed paper information including abstracts, citations, and metadata
20
- - Navigate through multiple clustering levels
21
- - Inspect citation statistics for papers and clusters
22
- - Interactive UI with expandable sections for paper details
23
-
24
- ## Usage
25
-
26
- 1. Select a hierarchy from the dropdown menu
27
- 2. Navigate through clusters by clicking on them
28
- 3. Expand paper details to view abstracts, problem statements, solutions, and results
29
- 4. Use the breadcrumb navigation to move up the hierarchy
30
-
31
- ## Data Structure
32
-
33
- This app expects hierarchy data in JSON format stored in the `hierarchies/` directory.
34
-
35
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/app.py DELETED
@@ -1,1301 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import os
5
- import json
6
- import gzip
7
- import re
8
- from urllib.parse import quote, unquote
9
-
10
- # Updated CSS styles to use default background
11
- CUSTOM_CSS = """
12
- <style>
13
- /* Set default background color */
14
- body {
15
- background-color: white !important;
16
- }
17
-
18
- .stApp {
19
- background-color: white !important;
20
- }
21
-
22
- h1 {
23
- color: #2E4053;
24
- font-family: 'Helvetica Neue', sans-serif;
25
- font-size: 2.8rem !important;
26
- border-bottom: 3px solid #3498DB;
27
- padding-bottom: 0.3em;
28
- }
29
-
30
- h2, h3, h4 {
31
- color: #2C3E50 !important;
32
- font-family: 'Arial Rounded MT Bold', sans-serif;
33
- }
34
-
35
- .metric-card {
36
- background: linear-gradient(145deg, #F8F9FA 0%, #FFFFFF 100%);
37
- border-radius: 12px;
38
- padding: 1.2rem;
39
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
40
- border: 1px solid #E0E7FF;
41
- transition: transform 0.2s;
42
- }
43
-
44
- .metric-card:hover {
45
- transform: translateY(-2px);
46
- }
47
-
48
- .citation-badge:hover::after,
49
- .influential-badge:hover::after {
50
- content: attr(title);
51
- position: absolute;
52
- bottom: calc(100% + 5px);
53
- left: 50%;
54
- transform: translateX(-50%);
55
- background-color: rgba(0, 0, 0, 0.8);
56
- color: #fff;
57
- padding: 5px 10px;
58
- border-radius: 4px;
59
- white-space: nowrap;
60
- z-index: 100;
61
- opacity: 0;
62
- pointer-events: none;
63
- transition: opacity 0.3s ease;
64
- }
65
-
66
- .citation-badge:hover::after,
67
- .influential-badge:hover::after {
68
- opacity: 1;
69
- }
70
-
71
- .path-nav {
72
- color: #6C757D;
73
- font-size: 0.95rem;
74
- padding: 0.8rem 1rem;
75
- background: #F8F9FA;
76
- border-radius: 8px;
77
- margin: 0.5rem 0; /* 减少上下margin */
78
- }
79
-
80
- .stButton>button {
81
- background: #3498DB !important;
82
- color: white !important;
83
- border-radius: 8px !important;
84
- padding: 8px 20px !important;
85
- border: none !important;
86
- transition: all 0.3s !important;
87
- }
88
-
89
- .stButton>button:hover {
90
- background: #2980B9 !important;
91
- transform: scale(1.05);
92
- box-shadow: 0 4px 8px rgba(52, 152, 219, 0.3);
93
- }
94
-
95
- .paper-card, .cluster-card {
96
- background: white;
97
- border-radius: 10px;
98
- padding: 1.5rem;
99
- margin: 1rem 0;
100
- box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
101
- border: 1px solid #EAEDF3;
102
- overflow: hidden;
103
- }
104
-
105
- /* 调整标题的字号 - 增大cluster title */
106
- .paper-title, .cluster-title {
107
- color: #2C3E50;
108
- font-size: 1.3rem !important; /* 增大原来的字号 */
109
- font-weight: 700; /* 加粗 */
110
- margin-bottom: 0.5rem;
111
- cursor: pointer;
112
- }
113
-
114
- .paper-abstract, .cluster-abstract {
115
- color: #6C757D;
116
- line-height: 1.6;
117
- font-size: 0.95rem;
118
- margin: 1rem 0;
119
- padding: 0.8rem;
120
- background: #F9FAFB;
121
- border-radius: 8px;
122
- border-left: 4px solid #3498DB;
123
- }
124
-
125
- /* 减少expander之间的间距 */
126
- .streamlit-expanderHeader {
127
- font-weight: 600 !important;
128
- color: #2C3E50 !important;
129
- margin-top: 0.5rem !important;
130
- margin-bottom: 0.5rem !important;
131
- }
132
-
133
- /* 调整expander的内部和外部间距 */
134
- .streamlit-expander {
135
- margin-top: 0.5rem !important;
136
- margin-bottom: 0.5rem !important;
137
- }
138
-
139
- /* 更紧凑的expander内容区 */
140
- .streamlit-expanderContent {
141
- background: #FAFAFA;
142
- border-radius: 0 0 8px 8px;
143
- border: 1px solid #EAEDF3;
144
- border-top: none;
145
- padding: 8px 12px !important; /* 减少内部padding */
146
- }
147
-
148
- /* Additional styles */
149
- .paper-section, .cluster-section {
150
- margin-top: 20px;
151
- padding: 15px;
152
- border-radius: 8px;
153
- background: #FAFAFA;
154
- border-left: 4px solid #3498DB;
155
- }
156
-
157
- .paper-section-title, .cluster-section-title {
158
- color: #2C3E50;
159
- font-weight: 600;
160
- margin-bottom: 10px;
161
- border-bottom: 2px solid #EEE;
162
- padding-bottom: 5px;
163
- }
164
-
165
- .section-problem {
166
- border-left-color: #3498DB;
167
- }
168
-
169
- .section-solution {
170
- border-left-color: #2ECC71;
171
- }
172
-
173
- .section-results {
174
- border-left-color: #9B59B6;
175
- }
176
-
177
- .label {
178
- font-weight: 600;
179
- color: #34495E;
180
- margin-bottom: 5px;
181
- }
182
-
183
- .value-box {
184
- background: #F8F9FA;
185
- padding: 10px;
186
- border-radius: 5px;
187
- margin-bottom: 10px;
188
- font-size: 0.95rem;
189
- color: #333;
190
- line-height: 1.5;
191
- }
192
-
193
- /* Citation badge styles */
194
- .citation-badge, .influential-badge {
195
- display: inline-flex;
196
- align-items: center;
197
- padding: 4px 8px;
198
- border-radius: 6px;
199
- font-size: 0.85rem;
200
- font-weight: 600;
201
- gap: 4px;
202
- white-space: nowrap;
203
- }
204
-
205
- .citation-badge {
206
- background: #EBF5FB;
207
- color: #2980B9;
208
- }
209
-
210
- .influential-badge {
211
- background: #FCF3CF;
212
- color: #F39C12;
213
- }
214
-
215
- .citation-icon, .influential-icon {
216
- font-size: 1rem;
217
- }
218
-
219
- /* 修改后的引用统计格式 */
220
- .citation-stats, .influential-stats {
221
- display: flex;
222
- align-items: center;
223
- padding: 4px 12px;
224
- border-radius: 6px;
225
- font-size: 0.85rem;
226
- margin-bottom: 6px;
227
- white-space: nowrap;
228
- }
229
-
230
- .citation-stats {
231
- background: #EBF5FB;
232
- color: #2980B9;
233
- }
234
-
235
- .influential-stats {
236
- background: #FCF3CF;
237
- color: #F39C12;
238
- }
239
-
240
- .stats-divider {
241
- margin: 0 6px;
242
- color: rgba(0,0,0,0.2);
243
- }
244
-
245
- /* Field of study badge */
246
- .field-badge {
247
- display: inline-block;
248
- background: #F1F8E9;
249
- color: #558B2F;
250
- padding: 3px 10px;
251
- border-radius: 16px;
252
- font-size: 0.75rem;
253
- font-weight: 500;
254
- border: 1px solid #C5E1A5;
255
- }
256
-
257
- /* JSON value display */
258
- .json-value {
259
- background: #F8F9FA;
260
- padding: 10px;
261
- border-radius: 6px;
262
- margin-bottom: 10px;
263
- white-space: pre-wrap;
264
- font-family: monospace;
265
- font-size: 0.9rem;
266
- line-height: 1.5;
267
- color: #2C3E50;
268
- overflow-x: auto;
269
- }
270
-
271
- /* Collapsible content */
272
- .cluster-content {
273
- display: none;
274
- }
275
-
276
- .cluster-content.show {
277
- display: block;
278
- }
279
-
280
- /* 重新设计集群标题区布局 */
281
- .cluster-header {
282
- display: flex;
283
- flex-wrap: wrap;
284
- justify-content: space-between;
285
- align-items: center;
286
- padding-bottom: 10px;
287
- border-bottom: 1px solid #eee;
288
- margin-bottom: 0px;
289
- }
290
-
291
- /* 左侧标题和集群信息 */
292
- .cluster-header-left {
293
- display: flex;
294
- align-items: center;
295
- flex: 1;
296
- min-width: 200px;
297
- }
298
-
299
- /* 中间区域用于摘要展开器 */
300
- .cluster-header-middle {
301
- display: flex;
302
- flex: 0 0 auto;
303
- margin: 0 15px;
304
- }
305
-
306
- /* 右侧统计数据 */
307
- .cluster-badge-container {
308
- display: flex;
309
- flex-wrap: wrap;
310
- gap: 6px;
311
- justify-content: flex-end;
312
- }
313
-
314
- /* 子集群查看按钮 */
315
- .view-button {
316
- margin-left: 15px;
317
- }
318
-
319
- /* 调整h3标题的上下margin */
320
- h3 {
321
- margin-top: 1rem !important;
322
- margin-bottom: 0.5rem !important;
323
- }
324
-
325
- /* 调整内容区块的上下margin */
326
- .stBlock {
327
- margin-top: 0.5rem !important;
328
- margin-bottom: 0.5rem !important;
329
- }
330
-
331
- /* 内联expander按钮样式 */
332
- .inline-expander-button {
333
- background: #E3F2FD;
334
- border: 1px solid #BBDEFB;
335
- border-radius: 4px;
336
- padding: 4px 8px;
337
- font-size: 0.85rem;
338
- color: #1976D2;
339
- cursor: pointer;
340
- display: inline-flex;
341
- align-items: center;
342
- transition: all 0.2s;
343
- }
344
-
345
- .inline-expander-button:hover {
346
- background: #BBDEFB;
347
- }
348
-
349
- /* 导航路径中的按钮样式 */
350
- .path-nav-button {
351
- display: inline-block;
352
- margin: 0 5px;
353
- padding: 5px 10px;
354
- background: #E3F2FD;
355
- border-radius: 5px;
356
- color: #1976D2;
357
- cursor: pointer;
358
- font-weight: 500;
359
- font-size: 0.9rem;
360
- border: none;
361
- transition: all 0.2s;
362
- }
363
-
364
- .path-nav-button:hover {
365
- background: #BBDEFB;
366
- }
367
-
368
- /* 路径导航容器样式 */
369
- .path-nav {
370
- color: #6C757D;
371
- font-size: 0.95rem;
372
- padding: 0.8rem 1rem;
373
- background: #F8F9FA;
374
- border-radius: 8px;
375
- margin: 0.8rem 0;
376
- }
377
-
378
- /* Paper count badge style */
379
- .paper-count-badge {
380
- display: inline-flex;
381
- align-items: center;
382
- margin-left: 12px;
383
- background: #E8F4FD;
384
- color: #2980B9;
385
- padding: 3px 8px;
386
- border-radius: 12px;
387
- font-size: 0.85rem;
388
- font-weight: 500;
389
- }
390
- </style>
391
-
392
- <script>
393
- function toggleClusterContent(id) {
394
- const content = document.getElementById('cluster-content-' + id);
395
- if (content) {
396
- content.classList.toggle('show');
397
- }
398
- }
399
- </script>
400
- """
401
-
402
- def get_hierarchy_files():
403
- hierarchy_dir = 'hierarchies'
404
- if not os.path.exists(hierarchy_dir):
405
- return []
406
- files = [f for f in os.listdir(hierarchy_dir) if f.endswith('.json')]
407
- print(f"Found files: {files}")
408
- return files
409
-
410
- def parse_filename(filename):
411
- """Parse hierarchy filename to extract metadata using improved patterns."""
412
- filename = filename.replace('.json', '')
413
- parts = filename.split('_')
414
-
415
- # Basic fields that should be consistent
416
- if len(parts) < 6:
417
- return {
418
- 'date': 'Unknown',
419
- 'embedder': 'Unknown',
420
- 'summarizer': 'Unknown',
421
- 'clustermethod': 'Unknown',
422
- 'contribution_type': 'Unknown',
423
- 'building_method': 'Unknown',
424
- 'clusterlevel': 'Unknown',
425
- 'clusterlevel_array': [],
426
- 'level_count': 0,
427
- 'random_seed': 'Unknown'
428
- }
429
-
430
- # These are consistent across formats
431
- date_str = parts[1]
432
- embedder = parts[2]
433
- summarizer = parts[3]
434
- clustermethod = parts[4]
435
- # parts[5] is typically "emb" placeholder
436
- contribution_type = parts[6]
437
-
438
- # Special handling for building methods
439
- # Check for compound building methods
440
- building_method = None
441
- clusterlevel_str = None
442
- seed = None
443
-
444
- # Handle different cases for building method and what follows
445
- if len(parts) > 7:
446
- if parts[7] == "bidirectional":
447
- building_method = "bidirectional"
448
- if len(parts) > 8:
449
- # The cluster level is next
450
- clusterlevel_str = parts[8]
451
- if len(parts) > 9:
452
- seed = parts[9]
453
- elif parts[7] == "top" and len(parts) > 8 and parts[8] == "down":
454
- building_method = "top_down"
455
- if len(parts) > 9:
456
- clusterlevel_str = parts[9]
457
- if len(parts) > 10:
458
- seed = parts[10]
459
- elif parts[7] == "bottom" and len(parts) > 8 and parts[8] == "up":
460
- building_method = "bottom_up"
461
- if len(parts) > 9:
462
- clusterlevel_str = parts[9]
463
- if len(parts) > 10:
464
- seed = parts[10]
465
- # Default case - building method is not compound
466
- else:
467
- building_method = parts[7]
468
- if len(parts) > 8:
469
- clusterlevel_str = parts[8]
470
- if len(parts) > 9:
471
- seed = parts[9]
472
-
473
- # Format date with slashes for better readability
474
- formatted_date = f"{date_str[:4]}/{date_str[4:6]}/{date_str[6:]}" if len(date_str) == 8 else date_str
475
-
476
- # Process cluster levels
477
- clusterlevel_array = clusterlevel_str.split('-') if clusterlevel_str else []
478
- level_count = len(clusterlevel_array)
479
-
480
- return {
481
- 'date': formatted_date,
482
- 'embedder': embedder,
483
- 'summarizer': summarizer,
484
- 'clustermethod': clustermethod,
485
- 'contribution_type': contribution_type,
486
- 'building_method': building_method or 'Unknown',
487
- 'clusterlevel': clusterlevel_str or 'Unknown',
488
- 'clusterlevel_array': clusterlevel_array,
489
- 'level_count': level_count,
490
- 'random_seed': seed or 'Unknown'
491
- }
492
-
493
- def format_hierarchy_option(filename):
494
- info = parse_filename(filename)
495
- levels_str = "×".join(info['clusterlevel_array'])
496
-
497
- return f"{info['date']} - {info['clustermethod']} ({info['embedder']}/{info['summarizer']}, {info['contribution_type']}, {info['building_method']}, {info['level_count']} levels: {levels_str}, seed: {info['random_seed']})"
498
-
499
- @st.cache_data
500
- def load_hierarchy_data(filename):
501
- """Load hierarchy data with support for compressed files"""
502
- filepath = os.path.join('hierarchies', filename)
503
-
504
- # 检查是否存在未压缩版本
505
- if os.path.exists(filepath):
506
- with open(filepath, 'r') as f:
507
- return json.load(f)
508
-
509
- # 检查是否存在 gzip 压缩版本
510
- gzip_filepath = filepath + '.gz'
511
- if os.path.exists(gzip_filepath):
512
- try:
513
- with gzip.open(gzip_filepath, 'rt') as f:
514
- return json.load(f)
515
- except Exception as e:
516
- st.error(f"Error loading compressed file {gzip_filepath}: {str(e)}")
517
- return {"clusters": []}
518
-
519
- st.error(f"Could not find hierarchy file: {filepath} or {gzip_filepath}")
520
- return {"clusters": []}
521
-
522
- def get_cluster_statistics(clusters):
523
- """获取集群统计信息,包括悬停提示"""
524
- def count_papers(node):
525
- if "children" not in node:
526
- return 0
527
- children = node["children"]
528
- if not children:
529
- return 0
530
- if "paper_id" in children[0]:
531
- return len(children)
532
- return sum(count_papers(child) for child in children)
533
-
534
- cluster_count = len(clusters)
535
- paper_counts = []
536
-
537
- for cluster, _ in clusters:
538
- paper_count = count_papers(cluster)
539
- paper_counts.append(paper_count)
540
-
541
- if paper_counts:
542
- total_papers = sum(paper_counts)
543
- average_papers = total_papers / cluster_count if cluster_count > 0 else 0
544
- return {
545
- 'Total Clusters': {'value': cluster_count, 'tooltip': 'Total number of clusters at this level'},
546
- 'Total Papers': {'value': total_papers, 'tooltip': 'Total number of papers across all clusters at this level'},
547
- 'Average Papers per Cluster': {'value': round(average_papers, 2), 'tooltip': 'Average number of papers per cluster'},
548
- 'Median Papers': {'value': round(np.median(paper_counts), 2), 'tooltip': 'Median number of papers per cluster'},
549
- 'Standard Deviation': {'value': round(np.std(paper_counts), 2), 'tooltip': 'Standard deviation of paper counts across clusters'},
550
- 'Max Papers in Cluster': {'value': max(paper_counts), 'tooltip': 'Maximum number of papers in any single cluster'},
551
- 'Min Papers in Cluster': {'value': min(paper_counts), 'tooltip': 'Minimum number of papers in any single cluster'}
552
- }
553
- return {
554
- 'Total Clusters': {'value': cluster_count, 'tooltip': 'Total number of clusters at this level'},
555
- 'Total Papers': {'value': 0, 'tooltip': 'Total number of papers across all clusters at this level'},
556
- 'Average Papers per Cluster': {'value': 0, 'tooltip': 'Average number of papers per cluster'},
557
- 'Median Papers': {'value': 0, 'tooltip': 'Median number of papers per cluster'},
558
- 'Standard Deviation': {'value': 0, 'tooltip': 'Standard deviation of paper counts across clusters'},
559
- 'Max Papers in Cluster': {'value': 0, 'tooltip': 'Maximum number of papers in any single cluster'},
560
- 'Min Papers in Cluster': {'value': 0, 'tooltip': 'Minimum number of papers in any single cluster'}
561
- }
562
-
563
- def calculate_citation_metrics(node):
564
- """Calculate total, average, and maximum citation and influential citation counts for a cluster."""
565
- total_citations = 0
566
- total_influential_citations = 0
567
- paper_count = 0
568
- citation_values = [] # 存储每篇论文的引用数
569
- influential_citation_values = [] # 存储每篇论文的有影响力引用数
570
-
571
- def process_node(n):
572
- nonlocal total_citations, total_influential_citations, paper_count
573
-
574
- if "children" not in n or n["children"] is None:
575
- return
576
-
577
- children = n["children"]
578
- if not children:
579
- return
580
-
581
- # If this node contains papers directly
582
- if children and len(children) > 0 and isinstance(children[0], dict) and "paper_id" in children[0]:
583
- for paper in children:
584
- if not isinstance(paper, dict):
585
- continue
586
- semantic_scholar = paper.get('semantic_scholar', {}) or {}
587
- citations = semantic_scholar.get('citationCount', 0)
588
- influential_citations = semantic_scholar.get('influentialCitationCount', 0)
589
-
590
- total_citations += citations
591
- total_influential_citations += influential_citations
592
- paper_count += 1
593
- citation_values.append(citations)
594
- influential_citation_values.append(influential_citations)
595
- else:
596
- # Recursively process child clusters
597
- for child in children:
598
- if isinstance(child, dict):
599
- process_node(child)
600
-
601
- process_node(node)
602
-
603
- # 计算平均值和最大值
604
- avg_citations = round(total_citations / paper_count, 2) if paper_count > 0 else 0
605
- avg_influential_citations = round(total_influential_citations / paper_count, 2) if paper_count > 0 else 0
606
- max_citations = max(citation_values) if citation_values else 0
607
- max_influential_citations = max(influential_citation_values) if influential_citation_values else 0
608
-
609
- return {
610
- 'total_citations': total_citations,
611
- 'avg_citations': avg_citations,
612
- 'max_citations': max_citations,
613
- 'total_influential_citations': total_influential_citations,
614
- 'avg_influential_citations': avg_influential_citations,
615
- 'max_influential_citations': max_influential_citations,
616
- 'paper_count': paper_count
617
- }
618
-
619
- def find_clusters_in_path(data, path):
620
- """Find clusters or papers at the given path in the hierarchy."""
621
- if not data or "clusters" not in data:
622
- return []
623
-
624
- clusters = data["clusters"]
625
- current_clusters = []
626
-
627
- if not path:
628
- return [(cluster, []) for cluster in clusters]
629
-
630
- current = clusters
631
- for i, p in enumerate(path):
632
- found = False
633
- for cluster in current:
634
- if cluster.get("cluster_id") == p:
635
- if "children" not in cluster or not cluster["children"]:
636
- # No children found, return empty list
637
- return []
638
-
639
- current = cluster["children"]
640
- found = True
641
-
642
- if i == len(path) - 1:
643
- # We're at the target level
644
- if current and len(current) > 0 and isinstance(current[0], dict) and "paper_id" in current[0]:
645
- # This level contains papers
646
- return [(paper, path) for paper in current]
647
- else:
648
- # This level contains subclusters
649
- current_clusters = []
650
- for c in current:
651
- if isinstance(c, dict):
652
- cluster_id = c.get("cluster_id")
653
- if cluster_id is not None:
654
- current_clusters.append((c, path + [cluster_id]))
655
- return current_clusters
656
- break
657
-
658
- if not found:
659
- # Path segment not found
660
- return []
661
-
662
- return current_clusters
663
-
664
- def parse_json_abstract(abstract_text):
665
- """Parse JSON formatted abstract string into a beautifully formatted HTML string"""
666
- try:
667
- abstract_json = json.loads(abstract_text)
668
- # Create a formatted display for the structured abstract
669
- if "Problem" in abstract_json:
670
- problem = abstract_json["Problem"]
671
- return f"""
672
- <div class='section-problem paper-section'>
673
- <div class='paper-section-title'>Problem</div>
674
- <div class='label'>Domain:</div>
675
- <div class='value-box'>{problem.get('overarching problem domain', 'N/A')}</div>
676
- <div class='label'>Challenges:</div>
677
- <div class='value-box'>{problem.get('challenges/difficulties', 'N/A')}</div>
678
- <div class='label'>Goal:</div>
679
- <div class='value-box'>{problem.get('research question/goal', 'N/A')}</div>
680
- </div>
681
- """
682
- return abstract_text
683
- except (json.JSONDecodeError, ValueError, TypeError):
684
- # If not valid JSON, return the original text
685
- return abstract_text
686
-
687
- def display_path_details(path, data, level_count):
688
- if not path:
689
- return
690
-
691
- st.markdown("### Path Details")
692
-
693
- current = data["clusters"]
694
-
695
- # Dynamically generate level labels and containers
696
- for i, cluster_id in enumerate(path):
697
- # 修改这里:使用 i + 1 作为层级编号
698
- level_number = i + 1 # 从1开始计算层级,顶层是Level 1
699
- indent = i * 32 # Indent 32 pixels per level
700
-
701
- for c in current:
702
- if c["cluster_id"] == cluster_id:
703
- # Create a container with proper indentation
704
- st.markdown(f"""
705
- <div style='margin-left: {indent}px; margin-bottom: 10px;'>
706
- </div>
707
- """, unsafe_allow_html=True)
708
-
709
- # Add extra spacing at the bottom
710
- st.markdown("<div style='margin-bottom: 25px;'></div>", unsafe_allow_html=True)
711
-
712
- # Create a row with cluster name and level button
713
- col1, col2 = st.columns([0.85, 0.15])
714
-
715
- with col1:
716
- st.markdown(f"""
717
- <div style='display: flex; align-items: center;'>
718
- <div style='width: 12px; height: 12px;
719
- border-radius: 50%; background: #3B82F6;
720
- margin-right: 8px;'></div>
721
- <h4 style='font-size: 1.15rem; font-weight: 600;
722
- color: #1F2937; margin: 0;'>
723
- Cluster {c["cluster_id"]}: {c["title"]}
724
- </h4>
725
- </div>
726
- """, unsafe_allow_html=True)
727
-
728
- with col2:
729
- button_clicked = st.button(f'Level {level_number}', key=f'level_btn_{i}_{c["cluster_id"]}')
730
-
731
- if button_clicked:
732
- st.session_state.path = path[:i]
733
- new_params = {}
734
- new_params['hierarchy'] = st.query_params['hierarchy']
735
- if st.session_state.path:
736
- new_params['path'] = st.session_state.path
737
- st.query_params.clear()
738
- for key, value in new_params.items():
739
- if isinstance(value, list):
740
- for v in value:
741
- st.query_params[key] = v
742
- else:
743
- st.query_params[key] = value
744
- st.rerun()
745
-
746
- # Calculate left margin for expander content to align with the header
747
- # Use an extra container with margin to create the indentation
748
- with st.container():
749
- st.markdown(f"""
750
- <div style='margin-left: {indent}px; width: calc(100% - {indent}px);'>
751
- </div>
752
- """, unsafe_allow_html=True)
753
-
754
- # Remove the key parameter that was causing the error
755
- with st.expander("📄 Show Cluster Details", expanded=False):
756
- # Parse abstract if it's in JSON format
757
- abstract_content = parse_json_abstract(c["abstract"])
758
- st.markdown(f"""
759
- <div style='color: #374151; line-height: 1.6;'>
760
- {abstract_content}
761
- </div>
762
- """, unsafe_allow_html=True)
763
-
764
- current = c["children"]
765
- break
766
-
767
- def display_paper(item):
768
- """Display detailed paper information including problem, solution, and results with semantic scholar info"""
769
-
770
- # Check for semantic scholar data with proper fallbacks
771
- semantic_scholar = item.get('semantic_scholar', {}) or {}
772
- url = semantic_scholar.get('url', '')
773
- citation_count = semantic_scholar.get('citationCount', 0)
774
- influential_citation_count = semantic_scholar.get('influentialCitationCount', 0)
775
- fields_of_study = semantic_scholar.get('fieldsOfStudy', []) or []
776
-
777
- # Generate field badges HTML
778
- field_badges_html = ""
779
- for field in fields_of_study:
780
- field_badges_html += f"<span class='field-badge' title='Field of study'>{field}</span> "
781
-
782
- # Basic information section with URL link and citation counts - Always visible
783
- st.markdown(f"""
784
- <div class='paper-card'>
785
- <div style='display: flex; justify-content: space-between; align-items: flex-start;'>
786
- <div class='paper-title' style='flex-grow: 1;'>
787
- {item.get('title', 'Untitled Paper')}
788
- <a href="{url}" target="_blank"
789
- style='font-size: 0.9em; margin-left: 8px;
790
- color: #3498DB; text-decoration: none;
791
- transition: all 0.3s;'
792
- title='View paper on Semantic Scholar'>
793
- 🔗
794
- </a>
795
- </div>
796
- <div style='display: flex; align-items: center; gap: 12px;'>
797
- <div class='citation-badge' title='Number of times this paper has been cited by other papers.'>
798
- <span class='citation-icon'>⭐</span> Citations: {citation_count}
799
- </div>
800
- <div class='influential-badge' title='Number of times this paper has been cited by influential papers. Influential citation means that the cited publication has a significant impact on the citing publication.'>
801
- <span class='influential-icon'>🔥</span> Influential Citations: {influential_citation_count}
802
- </div>
803
- </div>
804
- </div>
805
- """, unsafe_allow_html=True)
806
-
807
- # One main expander for all detailed information - Default collapsed
808
- with st.expander("📑 Show Detailed Information", expanded=False):
809
- # Abstract section
810
- st.markdown("""
811
- <div style='margin-top: 15px; margin-bottom: 20px;'>
812
- <h4 style='color: #2C3E50; border-bottom: 2px solid #3498DB; padding-bottom: 8px;'>
813
- 📄 Abstract
814
- </h4>
815
- </div>
816
- """, unsafe_allow_html=True)
817
-
818
- abstract_text = item.get('abstract', 'No abstract available')
819
- st.markdown(f"<div class='paper-abstract'>{abstract_text}</div>", unsafe_allow_html=True)
820
-
821
- # Problem section
822
- if 'problem' in item and item['problem']:
823
- st.markdown("""
824
- <div style='margin-top: 25px; margin-bottom: 20px;'>
825
- <h4 style='color: #2C3E50; border-bottom: 2px solid #3498DB; padding-bottom: 8px;'>
826
- 🔍 Problem Details
827
- </h4>
828
- </div>
829
- """, unsafe_allow_html=True)
830
-
831
- problem = item['problem']
832
- cols = st.columns([1, 2])
833
-
834
- with cols[0]:
835
- st.markdown("""
836
- <div style='font-weight: 600; color: #34495E; margin-bottom: 5px;'>
837
- Problem Domain
838
- </div>
839
- """, unsafe_allow_html=True)
840
-
841
- st.markdown("""
842
- <div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
843
- Challenges/Difficulties
844
- </div>
845
- """, unsafe_allow_html=True)
846
-
847
- st.markdown("""
848
- <div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
849
- Research Question/Goal
850
- </div>
851
- """, unsafe_allow_html=True)
852
-
853
- with cols[1]:
854
- st.markdown(f"""
855
- <div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
856
- border-left: 4px solid #3498DB;'>
857
- {problem.get('overarching problem domain', 'Not specified')}
858
- </div>
859
- """, unsafe_allow_html=True)
860
-
861
- st.markdown(f"""
862
- <div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
863
- border-left: 4px solid #E74C3C; margin-top: 10px;'>
864
- {problem.get('challenges/difficulties', 'Not specified')}
865
- </div>
866
- """, unsafe_allow_html=True)
867
-
868
- st.markdown(f"""
869
- <div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
870
- border-left: 4px solid #2ECC71; margin-top: 10px;'>
871
- {problem.get('research question/goal', 'Not specified')}
872
- </div>
873
- """, unsafe_allow_html=True)
874
-
875
- # Solution section
876
- if 'solution' in item and item['solution']:
877
- st.markdown("""
878
- <div style='margin-top: 25px; margin-bottom: 20px;'>
879
- <h4 style='color: #2C3E50; border-bottom: 2px solid #2ECC71; padding-bottom: 8px;'>
880
- 💡 Solution Details
881
- </h4>
882
- </div>
883
- """, unsafe_allow_html=True)
884
-
885
- solution = item['solution']
886
- cols = st.columns([1, 2])
887
-
888
- with cols[0]:
889
- st.markdown("""
890
- <div style='font-weight: 600; color: #34495E; margin-bottom: 5px;'>
891
- Solution Domain
892
- </div>
893
- """, unsafe_allow_html=True)
894
-
895
- st.markdown("""
896
- <div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
897
- Solution Approach
898
- </div>
899
- """, unsafe_allow_html=True)
900
-
901
- st.markdown("""
902
- <div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
903
- Novelty of Solution
904
- </div>
905
- """, unsafe_allow_html=True)
906
-
907
- with cols[1]:
908
- st.markdown(f"""
909
- <div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
910
- border-left: 4px solid #3498DB;'>
911
- {solution.get('overarching solution domain', 'Not specified')}
912
- </div>
913
- """, unsafe_allow_html=True)
914
-
915
- st.markdown(f"""
916
- <div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
917
- border-left: 4px solid #9B59B6; margin-top: 10px;'>
918
- {solution.get('solution approach', 'Not specified')}
919
- </div>
920
- """, unsafe_allow_html=True)
921
-
922
- st.markdown(f"""
923
- <div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
924
- border-left: 4px solid #F1C40F; margin-top: 10px;'>
925
- {solution.get('novelty of the solution', 'Not specified')}
926
- </div>
927
- """, unsafe_allow_html=True)
928
-
929
- # Results section
930
- if 'results' in item and item['results']:
931
- st.markdown("""
932
- <div style='margin-top: 25px; margin-bottom: 20px;'>
933
- <h4 style='color: #2C3E50; border-bottom: 2px solid #9B59B6; padding-bottom: 8px;'>
934
- 📊 Results Details
935
- </h4>
936
- </div>
937
- """, unsafe_allow_html=True)
938
-
939
- results = item['results']
940
- cols = st.columns([1, 2])
941
-
942
- with cols[0]:
943
- st.markdown("""
944
- <div style='font-weight: 600; color: #34495E; margin-bottom: 5px;'>
945
- Findings/Results
946
- </div>
947
- """, unsafe_allow_html=True)
948
-
949
- st.markdown("""
950
- <div style='font-weight: 600; color: #34495E; margin-top: 15px; margin-bottom: 5px;'>
951
- Potential Impact
952
- </div>
953
- """, unsafe_allow_html=True)
954
-
955
- with cols[1]:
956
- st.markdown(f"""
957
- <div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
958
- border-left: 4px solid #3498DB;'>
959
- {results.get('findings/results', 'Not specified')}
960
- </div>
961
- """, unsafe_allow_html=True)
962
-
963
- st.markdown(f"""
964
- <div style='background: #F8F9FA; padding: 10px; border-radius: 5px;
965
- border-left: 4px solid #E67E22; margin-top: 10px;'>
966
- {results.get('potential impact of the results', 'Not specified')}
967
- </div>
968
- """, unsafe_allow_html=True)
969
-
970
- # Author information
971
- if 'semantic_scholar' in item and item['semantic_scholar'] and 'authors' in item['semantic_scholar'] and item['semantic_scholar']['authors']:
972
- st.markdown("""
973
- <div style='margin-top: 25px; margin-bottom: 20px;'>
974
- <h4 style='color: #2C3E50; border-bottom: 2px solid #E67E22; padding-bottom: 8px;'>
975
- 👥 Authors
976
- </h4>
977
- </div>
978
- """, unsafe_allow_html=True)
979
-
980
- authors = item['semantic_scholar']['authors'] or []
981
- for author in authors:
982
- if not isinstance(author, dict):
983
- continue
984
-
985
- st.markdown(f"""
986
- <div style='display: flex; margin-bottom: 15px; padding-bottom: 10px; border-bottom: 1px solid #eee;'>
987
- <div style='flex: 1;'>
988
- <div style='font-weight: 600; font-size: 1.05rem;'>{author.get('name', 'Unknown')}</div>
989
- <div style='color: #666; margin-top: 3px;'>Author ID: {author.get('authorId', 'N/A')}</div>
990
- </div>
991
- <div style='display: flex; gap: 15px;'>
992
- <div title='Papers'>
993
- <span style='font-size: 0.85rem; color: #666;'>Papers</span>
994
- <div style='font-weight: 600; color: #3498DB;'>{author.get('paperCount', 0)}</div>
995
- </div>
996
- <div title='Citations'>
997
- <span style='font-size: 0.85rem; color: #666;'>Citations</span>
998
- <div style='font-weight: 600; color: #3498DB;'>{author.get('citationCount', 0)}</div>
999
- </div>
1000
- <div title='h-index'>
1001
- <span style='font-size: 0.85rem; color: #666;'>h-index</span>
1002
- <div style='font-weight: 600; color: #3498DB;'>{author.get('hIndex', 0)}</div>
1003
- </div>
1004
- </div>
1005
- </div>
1006
- """, unsafe_allow_html=True)
1007
-
1008
- # Close paper-card div
1009
- st.markdown("</div>", unsafe_allow_html=True)
1010
-
1011
- def display_cluster(item, path):
1012
- """Display a collapsible cluster with citation metrics integrated into the header, including abstract expander and buttons"""
1013
-
1014
- # Generate a unique ID for this cluster for the expander functionality
1015
- cluster_id = item['cluster_id']
1016
- unique_id = f"{cluster_id}_{'-'.join(map(str, path))}"
1017
-
1018
- # Calculate citation metrics using the updated function
1019
- citation_metrics = calculate_citation_metrics(item)
1020
-
1021
- # Parse the abstract
1022
- abstract_content = parse_json_abstract(item['abstract'])
1023
-
1024
- # 根据是否包含子项来设置按钮文本和行为
1025
- has_children = "children" in item and item["children"]
1026
- if has_children:
1027
- count = citation_metrics['paper_count'] if "paper_id" in item["children"][0] else len(item["children"])
1028
- next_level_items = item["children"]
1029
- is_next_level_papers = len(next_level_items) > 0 and "paper_id" in next_level_items[0]
1030
- btn_text = f'View Papers ({count})' if is_next_level_papers else f'View Sub-clusters ({count})'
1031
-
1032
- # 标题和论文数量显示 - 确保它们在同一水平线上
1033
- st.markdown(f"""
1034
- <div style='display: flex; align-items: center;'>
1035
- <div class='cluster-title' style='margin: 0; font-weight: 700; font-size: 1.3rem;'>
1036
- {item['title']}
1037
- </div>
1038
- <div style='display: inline-flex; align-items: center; margin-left: 12px;
1039
- background: #F4F6F9; color: #566573; padding: 2px 10px;
1040
- border-radius: 6px; font-size: 0.95rem; font-weight: 500;'>
1041
- <span style='margin-right: 4px;'>📑</span>{citation_metrics['paper_count']} papers
1042
- </div>
1043
- </div>
1044
- """, unsafe_allow_html=True)
1045
-
1046
- # 使用两列布局
1047
- cols = st.columns([8, 2])
1048
-
1049
- with cols[0]: # 统计数据区域
1050
- # 引用统计格式:使用管道符号分隔
1051
- st.markdown(f"""
1052
- <div>
1053
- <div class='citation-stats'>
1054
- <span style='font-weight: bold; margin-right: 5px;'>⭐</span> Citations:
1055
- Total {citation_metrics['total_citations']} <span class='stats-divider'>|</span>
1056
- Avg {citation_metrics['avg_citations']} <span class='stats-divider'>|</span>
1057
- Max {citation_metrics['max_citations']}
1058
- </div>
1059
- <div class='influential-stats'>
1060
- <span style='font-weight: bold; margin-right: 5px;'>🔥</span> Influential Citations:
1061
- Total {citation_metrics['total_influential_citations']} <span class='stats-divider'>|</span>
1062
- Avg {citation_metrics['avg_influential_citations']} <span class='stats-divider'>|</span>
1063
- Max {citation_metrics['max_influential_citations']}
1064
- </div>
1065
- </div>
1066
- """, unsafe_allow_html=True)
1067
-
1068
- # 创建摘要展开器 - 修改文本为"Cluster Summary"
1069
- with st.expander("📄 Cluster Summary", expanded=False):
1070
- st.markdown(f"""
1071
- <div class='cluster-abstract'>{abstract_content}</div>
1072
- """, unsafe_allow_html=True)
1073
-
1074
- with cols[1]: # 查看按钮
1075
- # 如果有子集群或论文,添加查看按钮
1076
- if has_children:
1077
- # 使用动态生成的按钮文本,而不是固定的"View Sub-Cluster"
1078
- if st.button(btn_text, key=f"btn_{unique_id}"):
1079
- st.session_state.path.append(item['cluster_id'])
1080
- st.rerun()
1081
-
1082
- # 创建一个分隔线
1083
- st.markdown("<hr style='margin: 0.5rem 0; border-color: #eee;'>", unsafe_allow_html=True)
1084
-
1085
- def main():
1086
- st.set_page_config(
1087
- layout="wide",
1088
- page_title="Paper Clusters Explorer",
1089
- initial_sidebar_state="expanded",
1090
- menu_items=None
1091
- )
1092
- # 设置浅色主题
1093
- st.markdown("""
1094
- <script>
1095
- var elements = window.parent.document.querySelectorAll('.stApp');
1096
- elements[0].classList.add('light');
1097
- elements[0].classList.remove('dark');
1098
- </script>
1099
- """, unsafe_allow_html=True)
1100
- st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
1101
-
1102
- hierarchy_files = get_hierarchy_files()
1103
- if not hierarchy_files:
1104
- st.error("No hierarchy files found in /hierarchies directory")
1105
- return
1106
-
1107
- # Manage file selection via query params
1108
- current_url = st.query_params.get('hierarchy', None)
1109
- current_file = unquote(current_url) + '.json' if current_url else None
1110
-
1111
- hierarchy_options = {format_hierarchy_option(f): f for f in hierarchy_files}
1112
- selected_option = st.selectbox(
1113
- 'Select Hierarchy',
1114
- options=list(hierarchy_options.keys()),
1115
- index=list(hierarchy_options.values()).index(current_file) if current_file else 0
1116
- )
1117
- selected_file = hierarchy_options[selected_option]
1118
-
1119
- # Save selected file in query params
1120
- if selected_file != current_file:
1121
- st.query_params['hierarchy'] = quote(selected_file.replace('.json', ''))
1122
-
1123
- data = load_hierarchy_data(selected_file)
1124
- info = parse_filename(selected_file)
1125
-
1126
- # Hierarchy metadata and navigation state
1127
- with st.expander("📋 Hierarchy Metadata", expanded=False):
1128
- # Create a grid layout for metadata
1129
- col1, col2, col3 = st.columns(3)
1130
-
1131
- with col1:
1132
- st.markdown(f"""
1133
- <div class='metric-card'>
1134
- <h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Date</h4>
1135
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['date']}</p>
1136
- </div>
1137
-
1138
- <div class='metric-card' style='margin-top: 10px;'>
1139
- <h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Clustering Method</h4>
1140
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['clustermethod']}</p>
1141
- </div>
1142
- """, unsafe_allow_html=True)
1143
-
1144
- with col2:
1145
- st.markdown(f"""
1146
- <div class='metric-card'>
1147
- <h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Embedder / Summarizer</h4>
1148
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['embedder']} / {info['summarizer']}</p>
1149
- </div>
1150
-
1151
- <div class='metric-card' style='margin-top: 10px;'>
1152
- <h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Contribution Type</h4>
1153
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['contribution_type']}</p>
1154
- </div>
1155
- """, unsafe_allow_html=True)
1156
-
1157
- with col3:
1158
- st.markdown(f"""
1159
- <div class='metric-card'>
1160
- <h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Building Method</h4>
1161
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['building_method']}</p>
1162
- </div>
1163
-
1164
- <div class='metric-card' style='margin-top: 10px;'>
1165
- <h4 style='margin-top: 0; color: #2C3E50; font-size: 0.9rem;'>Cluster Levels</h4>
1166
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB;'>{info['clusterlevel']} (Total: {info['level_count']})</p>
1167
- </div>
1168
- """, unsafe_allow_html=True)
1169
-
1170
- if 'path' not in st.session_state:
1171
- path_params = st.query_params.get_all('path')
1172
- st.session_state.path = [p for p in path_params if p]
1173
-
1174
- current_clusters = find_clusters_in_path(data, st.session_state.path)
1175
- current_level = len(st.session_state.path)
1176
- total_levels = info['level_count']
1177
- level_name = f'Level {current_level + 1}' if current_level < total_levels else 'Papers'
1178
-
1179
- is_paper_level = current_level >= total_levels or (current_clusters and "paper_id" in current_clusters[0][0])
1180
-
1181
- if not is_paper_level and current_clusters:
1182
- with st.expander("📊 Cluster Statistics", expanded=False):
1183
- stats = get_cluster_statistics(current_clusters)
1184
-
1185
- # Create a 3x2 grid for six small metric cards
1186
- row1_col1, row1_col2, row1_col3 = st.columns(3)
1187
- row2_col1, row2_col2, row2_col3 = st.columns(3)
1188
-
1189
- # Row 1 - First 3 metrics
1190
- with row1_col1:
1191
- st.markdown(f"""
1192
- <div class='metric-card' style='padding: 0.8rem;'>
1193
- <h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Total Clusters</h4>
1194
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Total Clusters']['value']}</p>
1195
- </div>
1196
- """, unsafe_allow_html=True)
1197
-
1198
- with row1_col2:
1199
- st.markdown(f"""
1200
- <div class='metric-card' style='padding: 0.8rem;'>
1201
- <h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Total Papers</h4>
1202
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Total Papers']['value']}</p>
1203
- </div>
1204
- """, unsafe_allow_html=True)
1205
-
1206
- with row1_col3:
1207
- st.markdown(f"""
1208
- <div class='metric-card' style='padding: 0.8rem;'>
1209
- <h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Avg Papers/Cluster</h4>
1210
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Average Papers per Cluster']['value']}</p>
1211
- </div>
1212
- """, unsafe_allow_html=True)
1213
-
1214
- # Row 2 - Next 3 metrics
1215
- with row2_col1:
1216
- st.markdown(f"""
1217
- <div class='metric-card' style='padding: 0.8rem; margin-bottom: 15px;'>
1218
- <h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Median Papers</h4>
1219
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Median Papers']['value']}</p>
1220
- </div>
1221
- """, unsafe_allow_html=True)
1222
-
1223
- with row2_col2:
1224
- st.markdown(f"""
1225
- <div class='metric-card' style='padding: 0.8rem; margin-bottom: 15px;'>
1226
- <h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Max Papers in Cluster</h4>
1227
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Max Papers in Cluster']['value']}</p>
1228
- </div>
1229
- """, unsafe_allow_html=True)
1230
-
1231
- with row2_col3:
1232
- st.markdown(f"""
1233
- <div class='metric-card' style='padding: 0.8rem; margin-bottom: 15px;'>
1234
- <h4 style='margin-top: 0; margin-bottom: 5px; color: #2C3E50; font-size: 0.85rem;'>Min Papers in Cluster</h4>
1235
- <p style='font-size: 0.9rem; font-weight: 600; color: #3498DB; margin: 0;'>{stats['Min Papers in Cluster']['value']}</p>
1236
- </div>
1237
- """, unsafe_allow_html=True)
1238
-
1239
- # Back navigation button
1240
- if st.session_state.path:
1241
- if st.button('← Back', key='back_button'):
1242
- st.session_state.path.pop()
1243
- st.rerun()
1244
-
1245
- # Current path display
1246
- if st.session_state.path:
1247
- # 获取路径上每个聚类的标题
1248
- path_info = []
1249
- current = data["clusters"]
1250
-
1251
- # 构建路径中每个聚类的标题和层级信息
1252
- for i, cid in enumerate(st.session_state.path):
1253
- level_num = i + 1 # 从1开始的层级编号
1254
- for c in current:
1255
- if c["cluster_id"] == cid:
1256
- path_info.append((level_num, c["title"], c["cluster_id"]))
1257
- current = c["children"]
1258
- break
1259
-
1260
- # 在Streamlit中创建路径导航
1261
- with st.container():
1262
- st.markdown("<h3 style='margin-top: 0.5rem; margin-bottom: 0.8rem;'>🗂️ Current Path</h3>", unsafe_allow_html=True)
1263
-
1264
- # 🔝 添加 Root 入口
1265
- col1, col2 = st.columns([0.3, 0.7])
1266
- with col1:
1267
- st.markdown(f"<div><strong>Root:</strong></div>", unsafe_allow_html=True)
1268
- with col2:
1269
- if st.button("All Papers", key="root_button"):
1270
- st.session_state.path = []
1271
- st.rerun()
1272
-
1273
- # 使用缩进显示路径层次结构
1274
- for i, (level_num, title, cluster_id) in enumerate(path_info):
1275
- col1, col2 = st.columns([0.3, 0.7])
1276
-
1277
- with col1:
1278
- st.markdown(f"<div><strong>Level {level_num}:</strong></div>", unsafe_allow_html=True)
1279
-
1280
- with col2:
1281
- # 创建用于返回到该级别的按钮
1282
- if st.button(f"{title}", key=f"lvl_{i}_{cluster_id}"):
1283
- # 当按钮被点击时,将路径截断到该级别
1284
- st.session_state.path = st.session_state.path[:i+1]
1285
- st.rerun()
1286
-
1287
- # 内容展示标题
1288
- st.markdown(f"""
1289
- <h3 style='margin: 1rem 0 0.5rem 0; color: #2C3E50;'>
1290
- {'📑 Papers' if is_paper_level else '📂 ' + level_name}
1291
- </h3>
1292
- """, unsafe_allow_html=True)
1293
-
1294
- for item, full_path in current_clusters:
1295
- if is_paper_level:
1296
- display_paper(item)
1297
- else:
1298
- display_cluster(item, full_path)
1299
-
1300
- if __name__ == '__main__':
1301
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/hierarchies/hierarchy_20250413_qwen_llama_kmeans_emb_problem_bidirectional_276-40-6_1037.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cf2f5949460f04855d0de76e1a4af817a352d014ea75d63d08e43fcfd7d1032
3
- size 10829606
 
 
 
 
src/hierarchies/hierarchy_20250413_qwen_llama_kmeans_emb_problem_bottom_up_276-40-6_1037.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5350511f53b942d57bbf69afe6599937c9e50d922a70b482a66fe26d1ecbe8a
3
- size 10823257
 
 
 
 
src/hierarchies/hierarchy_20250413_qwen_llama_kmeans_emb_problem_top_down_276-40-6_1037.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a833e6fe3d49126111c6ded442ccbbc419b70a01bac245fd3ccc07d77dac9112
3
- size 10821358
 
 
 
 
src/hierarchies/hierarchy_20250526_qwen_llama_kmeans_emb_results_top_down_500-70-18-9_1037.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3a9f5af4ff7f7715724978baa4e1c10bcfdb7d0feb05eec792511d676907f6f
3
- size 48034456
 
 
 
 
src/requirements.txt DELETED
@@ -1,4 +0,0 @@
1
- streamlit==1.41.1
2
- pandas
3
- numpy
4
- matplotlib
 
 
 
 
 
src/streamlit_app.py DELETED
@@ -1,40 +0,0 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
- import streamlit as st
5
-
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))